Minor code style updates to MinimalEncoder and a few other files ; minor dep updates

This commit is contained in:
Sean Owen 2021-10-12 21:06:05 -05:00
parent 3909ebe294
commit 831ebac6e7
7 changed files with 162 additions and 363 deletions

View file

@ -63,7 +63,7 @@
<plugin> <plugin>
<groupId>biz.aQute.bnd</groupId> <groupId>biz.aQute.bnd</groupId>
<artifactId>bnd-maven-plugin</artifactId> <artifactId>bnd-maven-plugin</artifactId>
<version>5.3.0</version> <version>6.0.0</version>
<executions> <executions>
<execution> <execution>
<goals> <goals>

View file

@ -113,7 +113,7 @@ public enum EncodeHintType {
/** /**
* Specifies whether to use compact mode for QR code (type {@link Boolean}, or "true" or "false" * Specifies whether to use compact mode for QR code (type {@link Boolean}, or "true" or "false"
* When compaction is performed the value for {@link CHARACTER_SET} is ignored. * When compaction is performed the value for {@link #CHARACTER_SET} is ignored.
* {@link String} value). * {@link String} value).
*/ */
QR_COMPACT, QR_COMPACT,

View file

@ -90,7 +90,6 @@ final class State {
// Create a new state representing this state with a latch to a (not // Create a new state representing this state with a latch to a (not
// necessary different) mode, and then a code. // necessary different) mode, and then a code.
State latchAndAppend(int mode, int value) { State latchAndAppend(int mode, int value) {
//assert binaryShiftByteCount == 0;
int bitCount = this.bitCount; int bitCount = this.bitCount;
Token token = this.token; Token token = this.token;
if (mode != this.mode) { if (mode != this.mode) {
@ -106,7 +105,6 @@ final class State {
// Create a new state representing this state, with a temporary shift // Create a new state representing this state, with a temporary shift
// to a different mode to output a single value. // to a different mode to output a single value.
State shiftAndAppend(int mode, int value) { State shiftAndAppend(int mode, int value) {
//assert binaryShiftByteCount == 0 && this.mode != mode;
Token token = this.token; Token token = this.token;
int thisModeBitCount = this.mode == HighLevelEncoder.MODE_DIGIT ? 4 : 5; int thisModeBitCount = this.mode == HighLevelEncoder.MODE_DIGIT ? 4 : 5;
// Shifts exist only to UPPER and PUNCT, both with tokens size 5. // Shifts exist only to UPPER and PUNCT, both with tokens size 5.
@ -122,7 +120,6 @@ final class State {
int mode = this.mode; int mode = this.mode;
int bitCount = this.bitCount; int bitCount = this.bitCount;
if (this.mode == HighLevelEncoder.MODE_PUNCT || this.mode == HighLevelEncoder.MODE_DIGIT) { if (this.mode == HighLevelEncoder.MODE_PUNCT || this.mode == HighLevelEncoder.MODE_DIGIT) {
//assert binaryShiftByteCount == 0;
int latch = HighLevelEncoder.LATCH_TABLE[mode][HighLevelEncoder.MODE_UPPER]; int latch = HighLevelEncoder.LATCH_TABLE[mode][HighLevelEncoder.MODE_UPPER];
token = token.add(latch & 0xFFFF, latch >> 16); token = token.add(latch & 0xFFFF, latch >> 16);
bitCount += latch >> 16; bitCount += latch >> 16;
@ -147,7 +144,6 @@ final class State {
} }
Token token = this.token; Token token = this.token;
token = token.addBinaryShift(index - binaryShiftByteCount, binaryShiftByteCount); token = token.addBinaryShift(index - binaryShiftByteCount, binaryShiftByteCount);
//assert token.getTotalBitCount() == this.bitCount;
return new State(token, mode, 0, this.bitCount); return new State(token, mode, 0, this.bitCount);
} }

View file

@ -1,5 +1,5 @@
/* /*
* Copyright 2008 ZXing authors * Copyright 2021 ZXing authors
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
@ -80,8 +80,6 @@ import java.nio.charset.UnsupportedCharsetException;
*/ */
final class MinimalEncoder { final class MinimalEncoder {
// static final boolean DEBUG = false;
private enum VersionSize { private enum VersionSize {
SMALL("version 1-9"), SMALL("version 1-9"),
MEDIUM("version 10-26"), MEDIUM("version 10-26"),
@ -103,7 +101,6 @@ final class MinimalEncoder {
private final boolean isGS1; private final boolean isGS1;
private final CharsetEncoder[] encoders; private final CharsetEncoder[] encoders;
/** /**
* Encoding is optional (default ISO-8859-1) and version is optional (minimal version is computed if not specified. * Encoding is optional (default ISO-8859-1) and version is optional (minimal version is computed if not specified.
*/ */
@ -142,13 +139,16 @@ final class MinimalEncoder {
isoEncoders[j] = ce; isoEncoders[j] = ce;
break; break;
} }
} catch (UnsupportedCharsetException e) { } } catch (UnsupportedCharsetException e) {
// continue
}
} }
} }
if (j >= 15) { if (j >= 15) {
if (!StandardCharsets.UTF_16BE.newEncoder().canEncode(stringToEncode.charAt(i))) { if (!StandardCharsets.UTF_16BE.newEncoder().canEncode(stringToEncode.charAt(i))) {
throw new WriterException("Can not encode character \\u" + String.format("%04X", throw new WriterException("Can not encode character \\u" +
(int) stringToEncode.charAt(i)) + " at position " + i + " in input \"" + stringToEncode + "\""); String.format("%04X", (int) stringToEncode.charAt(i)) + " at position " + i +
" in input \"" + stringToEncode + "\"");
} }
needUnicodeEncoder = true; needUnicodeEncoder = true;
} }
@ -174,8 +174,8 @@ final class MinimalEncoder {
} }
} }
encoders[index++] = StandardCharsets.UTF_8.newEncoder(); encoders[index] = StandardCharsets.UTF_8.newEncoder();
encoders[index++] = StandardCharsets.UTF_16BE.newEncoder(); encoders[index + 1] = StandardCharsets.UTF_16BE.newEncoder();
} }
} }
@ -231,9 +231,6 @@ final class MinimalEncoder {
int count = mode.getCharacterCountBits(version); int count = mode.getCharacterCountBits(version);
return count == 0 ? 0 : 1 << count; return count == 0 ? 0 : 1 << count;
} }
static int getMaximumNumberOfEncodeableCharacters(VersionSize versionSize, Mode mode) {
return getMaximumNumberOfEncodeableCharacters(getVersion(versionSize), mode);
}
boolean canEncode(Mode mode, char c) { boolean canEncode(Mode mode, char c) {
switch (mode) { switch (mode) {
@ -268,9 +265,9 @@ final class MinimalEncoder {
static ResultList smallest(ResultList[] results) { static ResultList smallest(ResultList[] results) {
ResultList smallestResult = null; ResultList smallestResult = null;
for (int i = 0; i < results.length; i++) { for (ResultList result : results) {
if (smallestResult == null || (results[i] != null && results[i].getSize() < smallestResult.getSize())) { if (smallestResult == null || (result != null && result.getSize() < smallestResult.getSize())) {
smallestResult = results[i]; smallestResult = result;
} }
} }
return smallestResult; return smallestResult;
@ -282,8 +279,8 @@ final class MinimalEncoder {
if (first != null) { if (first != null) {
if (first.mode != Mode.ECI) { if (first.mode != Mode.ECI) {
boolean haveECI = false; boolean haveECI = false;
for (Iterator<ResultList.ResultNode> it = result.iterator(); it.hasNext();) { for (ResultList.ResultNode resultNode : result) {
if (it.next().mode == Mode.ECI) { if (resultNode.mode == Mode.ECI) {
haveECI = true; haveECI = true;
break; break;
} }
@ -305,77 +302,43 @@ final class MinimalEncoder {
} }
} }
//Add TERMINATOR according to "8.4.8 Terminator" //Add TERMINATOR according to "8.4.8 Terminator"
//TODO: The terminiator can be omitted if there are less than 4 bit in the capacity of the symbol. //TODO: The terminator can be omitted if there are less than 4 bit in the capacity of the symbol.
result.add(result.new ResultNode(Mode.TERMINATOR, stringToEncode.length(), 0, 0)); result.add(result.new ResultNode(Mode.TERMINATOR, stringToEncode.length(), 0, 0));
return result; return result;
} }
int getEdgeCharsetEncoderIndex(ResultList edge) { int getEdgeCharsetEncoderIndex(ResultList edge) {
ResultList.ResultNode last = edge.getLast(); ResultList.ResultNode last = edge.getLast();
assert last != null;
return last != null ? last.charsetEncoderIndex : 0; return last != null ? last.charsetEncoderIndex : 0;
} }
Mode getEdgeMode(ResultList edge) { Mode getEdgeMode(ResultList edge) {
ResultList.ResultNode last = edge.getLast(); ResultList.ResultNode last = edge.getLast();
assert last != null;
return last != null ? last.mode : Mode.BYTE; return last != null ? last.mode : Mode.BYTE;
} }
int getEdgePosition(ResultList edge) { int getEdgePosition(ResultList edge) {
//The algorithm appends an edge at some point (in the method addEdge() with a minimal solution. // The algorithm appends an edge at some point (in the method addEdge() with a minimal solution.
//This function works regardless if the concatenation has already taken place or not. // This function works regardless if the concatenation has already taken place or not.
ResultList.ResultNode last = edge.getLast(); ResultList.ResultNode last = edge.getLast();
assert last != null;
return last != null ? last.position : 0; return last != null ? last.position : 0;
} }
int getEdgeLength(ResultList edge) { int getEdgeLength(ResultList edge) {
//The algorithm appends an edge at some point (in the method addEdge() with a minimal solution. // The algorithm appends an edge at some point (in the method addEdge() with a minimal solution.
//This function works regardless if the concatenation has already taken place or not. // This function works regardless if the concatenation has already taken place or not.
ResultList.ResultNode last = edge.getLast(); ResultList.ResultNode last = edge.getLast();
assert last != null;
return last != null ? last.getCharacterLength() : 0; return last != null ? last.getCharacterLength() : 0;
} }
ResultList.ResultNode getEdgePrevious(ResultList edge) {
Iterator<ResultList.ResultNode> it = edge.descendingIterator();
assert it.hasNext();
if (!it.hasNext()) {
return null;
}
it.next();
if (!it.hasNext()) {
return null;
}
ResultList.ResultNode result = it.next();
if (result.mode == Mode.ECI) {
if (!it.hasNext()) {
return null;
}
result = it.next();
}
return result;
}
void addEdge(ArrayList<ResultList>[][][] vertices, ResultList edge, ResultList previous) { void addEdge(ArrayList<ResultList>[][][] vertices, ResultList edge, ResultList previous) {
int vertexIndex = getEdgePosition(edge) + getEdgeLength(edge); int vertexIndex = getEdgePosition(edge) + getEdgeLength(edge);
if (vertices[vertexIndex][getEdgeCharsetEncoderIndex(edge)][getCompactedOrdinal(getEdgeMode(edge))] == null) { if (vertices[vertexIndex][getEdgeCharsetEncoderIndex(edge)][getCompactedOrdinal(getEdgeMode(edge))] == null) {
vertices[vertexIndex][getEdgeCharsetEncoderIndex(edge)][getCompactedOrdinal(getEdgeMode(edge))] = new vertices[vertexIndex][getEdgeCharsetEncoderIndex(edge)][getCompactedOrdinal(getEdgeMode(edge))] = new
ArrayList<ResultList>(); ArrayList<>();
} }
vertices[vertexIndex][getEdgeCharsetEncoderIndex(edge)][getCompactedOrdinal(getEdgeMode(edge))].add(edge); vertices[vertexIndex][getEdgeCharsetEncoderIndex(edge)][getCompactedOrdinal(getEdgeMode(edge))].add(edge);
// if (DEBUG) {
// if (previous == null) {
// System.err.println("DEBUG adding edge " + edge + " from " + edge.getPosition() + " to " + vertexIndex +
// " with an accumulated size of " + edge.getSize());
// } else {
// System.err.println("DEBUG adding edge " + edge + " from " + vertexToString(previous.getPosition(), previous)
// + " to " + vertexToString(vertexIndex, edge) + " with an accumulated size of " + edge.getSize());
// }
// }
if (previous != null) { if (previous != null) {
edge.addFirst(previous); edge.addFirst(previous);
} }
@ -416,241 +379,136 @@ final class MinimalEncoder {
} }
} }
// String vertexToString(int position, ResultList rl) {
// return (position >= stringToEncode.length() ? "end vertex" : "vertex for character '" +
// stringToEncode.charAt(position) + "' at position " + position) + " with encoding " +
// encoders[getEdgeCharsetEncoderIndex(rl)].charset().name() + " and mode " + getEdgeMode(rl);
// }
// void printEdges(ArrayList<ResultList>[][][] vertices) {
//
// final boolean showCompacted = true;
//
// boolean willHaveECI = encoders.length > 1;
// ArrayList<String> edgeStrings = new ArrayList<String>();
// int inputLength = stringToEncode.length();
// for (int i = 1; i <= inputLength; i++) {
// for (int j = 0; j < encoders.length; j++) {
// for (int k = 0; k < 4; k++) {
// if (vertices[i][j][k] != null) {
// ArrayList<ResultList> edges = vertices[i][j][k];
// assert edges.size() > 0;
// if (edges.size() > 0) {
// ResultList edge = edges.get(0);
// String vertexKey = "" + i + "_" + getEdgeMode(edge) + (willHaveECI ? "_" +
// encoders[getEdgeCharsetEncoderIndex(edge)].charset().name() : "");
// int fromPosition = getEdgePosition(edge);
// ResultList.ResultNode previous = getEdgePrevious(edge);
// String fromKey = previous == null ? "initial" : "" + fromPosition + "_" + previous.mode +
// (willHaveECI ? "_" + encoders[previous.charsetEncoderIndex].charset().name() : "");
// int toPosition = fromPosition + getEdgeLength(edge);
// edgeStrings.add("(" + fromKey + ") -- " + getEdgeMode(edge) + (toPosition -
// fromPosition > 0 ? "(" + stringToEncode.substring(fromPosition, toPosition) +
// ")" : "") + " (" + edge.getSize() + ")" + " --> " + "(" + vertexKey + ")");
// }
// }
// }
// }
// }
//
// if (showCompacted) {
// boolean modifiedSomething;
// do {
// modifiedSomething = false;
// for (Iterator<String> it = edgeStrings.iterator(); it.hasNext();) {
// String edge = it.next();
// if (edge.startsWith("(initial)")) {
// int pos = edge.lastIndexOf("--> (");
// String toKey = edge.substring(pos + 4);
// int cnt = 0;
// for (Iterator<String> it1 = edgeStrings.iterator(); it1.hasNext();) {
// String edge1 = it1.next();
// String fromKey = edge1.substring(0, edge1.indexOf(')') + 1);
// if (fromKey.equals(toKey)) {
// cnt++;
// }
// }
// for (Iterator<String> it1 = edgeStrings.iterator(); it1.hasNext();) {
// String edge1 = it1.next();
// String fromKey = edge1.substring(0, edge1.indexOf(')') + 1);
// if (fromKey.equals(toKey)) {
// modifiedSomething = true;
// if (cnt == 1) {
// edgeStrings.remove(edgeStrings.indexOf(edge));
// }
// edgeStrings.remove(edgeStrings.indexOf(edge1));
// edgeStrings.add(edge.substring(0, pos + 4) + edge1);
// break;
// }
// }
// if (modifiedSomething) {
// break;
// }
// }
// }
// } while (modifiedSomething);
// }
//
// for (Iterator<String> it = edgeStrings.iterator(); it.hasNext();) {
// System.err.println("DEBUG " + it.next());
// }
// }
ResultList encode(Version version) throws WriterException { ResultList encode(Version version) throws WriterException {
/* A vertex represents a tuple of a position in the input, a mode and an a character encoding where position 0 @SuppressWarnings("checkstyle:lineLength")
* denotes the position left of the first character, 1 the position left of the second character and so on. /* A vertex represents a tuple of a position in the input, a mode and an a character encoding where position 0
* Likewise the end vertices are located after the last character at position stringToEncode.length(). * denotes the position left of the first character, 1 the position left of the second character and so on.
* * Likewise the end vertices are located after the last character at position stringToEncode.length().
* An edge leading to such a vertex encodes one or more of the characters left of the position that the vertex *
* represents and encodes it in the same encoding and mode as the vertex on which the edge ends. In other words, * An edge leading to such a vertex encodes one or more of the characters left of the position that the vertex
* all edges leading to a particular vertex encode the same characters in the same mode with the same character * represents and encodes it in the same encoding and mode as the vertex on which the edge ends. In other words,
* encoding. They differ only by their source vertices who are all located at i+1 minus the number of encoded * all edges leading to a particular vertex encode the same characters in the same mode with the same character
* characters. * encoding. They differ only by their source vertices who are all located at i+1 minus the number of encoded
* * characters.
* The edges leading to a vertex are stored in such a way that there is a fast way to enumerate the edges ending on a *
* particular vertex. * The edges leading to a vertex are stored in such a way that there is a fast way to enumerate the edges ending
* * on a particular vertex.
* The algorithm processes the vertices in order of their position therby performing the following: *
* * The algorithm processes the vertices in order of their position thereby performing the following:
* For every vertex at position i the algorithm enumerates the edges ending on the vertex and removes all but the *
* shortest from that list. * For every vertex at position i the algorithm enumerates the edges ending on the vertex and removes all but the
* Then it processes the vertices for the position i+1. If i+1 == stringToEncode.length() then the algorithm ends * shortest from that list.
* and chooses the the edge with the smallest size from any of the edges leading to vertices at this position. * Then it processes the vertices for the position i+1. If i+1 == stringToEncode.length() then the algorithm ends
* Otherwise the algorithm computes all possible outgoing edges for the vertices at the position i+1 * and chooses the the edge with the smallest size from any of the edges leading to vertices at this position.
* * Otherwise the algorithm computes all possible outgoing edges for the vertices at the position i+1
* Examples: *
* The process is illustrated by showing the graph (edges) after each iteration from left to right over the input: * Examples:
* An edge is drawn as follows "(" + fromVertex + ") -- " + encodingMode + "(" + encodedInput + ") (" + * The process is illustrated by showing the graph (edges) after each iteration from left to right over the input:
* accumulatedSize + ") --> (" + toVertex + ")" * An edge is drawn as follows "(" + fromVertex + ") -- " + encodingMode + "(" + encodedInput + ") (" +
* * accumulatedSize + ") --> (" + toVertex + ")"
* The coding conversions of this project require lines to not exceed 120 characters. In order to view the examples *
* below join lines that end with a backslash. This can be achieved by running the command * The coding conversions of this project require lines to not exceed 120 characters. In order to view the examples
* sed -e ':a' -e 'N' -e '$!ba' -e 's/\\\n *[*]/ /g' on this file. * below join lines that end with a backslash. This can be achieved by running the command
* * sed -e ':a' -e 'N' -e '$!ba' -e 's/\\\n *[*]/ /g' on this file.
* Example 1 encoding the string "ABCDE": *
* * Example 1 encoding the string "ABCDE":
* Initial situation *
* (initial) -- BYTE(A) (20) --> (1_BYTE) * Initial situation
* (initial) -- ALPHANUMERIC(AB) (24) --> (2_ALPHANUMERIC) * (initial) -- BYTE(A) (20) --> (1_BYTE)
* * (initial) -- ALPHANUMERIC(AB) (24) --> (2_ALPHANUMERIC)
* Situation after adding edges to vertices at position 1 *
* (initial) -- BYTE(A) (20) --> (1_BYTE) -- BYTE(B) (28) --> (2_BYTE) * Situation after adding edges to vertices at position 1
* (1_BYTE) -- ALPHANUMERIC(BC) (44) --> (3_ALPHANUMERIC) * (initial) -- BYTE(A) (20) --> (1_BYTE) -- BYTE(B) (28) --> (2_BYTE)
* (initial) -- ALPHANUMERIC(AB) (24) --> (2_ALPHANUMERIC) * (1_BYTE) -- ALPHANUMERIC(BC) (44) --> (3_ALPHANUMERIC)
* * (initial) -- ALPHANUMERIC(AB) (24) --> (2_ALPHANUMERIC)
* Situation after adding edges to vertices at position 2 *
* (initial) -- BYTE(A) (20) --> (1_BYTE) * Situation after adding edges to vertices at position 2
* (initial) -- ALPHANUMERIC(AB) (24) --> (2_ALPHANUMERIC) * (initial) -- BYTE(A) (20) --> (1_BYTE)
* (initial) -- BYTE(A) (20) --> (1_BYTE) -- BYTE(B) (28) --> (2_BYTE) * (initial) -- ALPHANUMERIC(AB) (24) --> (2_ALPHANUMERIC)
* (1_BYTE) -- ALPHANUMERIC(BC) (44) --> (3_ALPHANUMERIC) * (initial) -- BYTE(A) (20) --> (1_BYTE) -- BYTE(B) (28) --> (2_BYTE)
* (initial) -- ALPHANUMERIC(AB) (24) --> (2_ALPHANUMERIC) -- BYTE(C) (44) --> (3_BYTE) * (1_BYTE) -- ALPHANUMERIC(BC) (44) --> (3_ALPHANUMERIC)
* (2_ALPHANUMERIC) -- ALPHANUMERIC(CD) \ * (initial) -- ALPHANUMERIC(AB) (24) --> (2_ALPHANUMERIC) -- BYTE(C) (44) --> (3_BYTE)
* (35) --> (4_ALPHANUMERIC) * (2_ALPHANUMERIC) -- ALPHANUMERIC(CD) (35) --> (4_ALPHANUMERIC)
* *
* Situation after adding edges to vertices at position 3 * Situation after adding edges to vertices at position 3
* (initial) -- BYTE(A) (20) --> (1_BYTE) -- BYTE(B) (28) --> (2_BYTE) -- BYTE(C) (36) --> (3_BYTE) * (initial) -- BYTE(A) (20) --> (1_BYTE) -- BYTE(B) (28) --> (2_BYTE) -- BYTE(C) (36) --> (3_BYTE)
* (1_BYTE) -- ALPHANUMERIC(BC) (44) --> (3_ALPHANUMERIC) -- \ * (1_BYTE) -- ALPHANUMERIC(BC) (44) --> (3_ALPHANUMERIC) -- BYTE(D) (64) --> (4_BYTE)
*BYTE(D) (64) --> (4_BYTE) * (3_ALPHANUMERIC) -- ALPHANUMERIC(DE) (55) --> (5_ALPHANUMERIC)
* (3_ALPHANUMERIC) -- \ * (initial) -- ALPHANUMERIC(AB) (24) --> (2_ALPHANUMERIC) -- ALPHANUMERIC(CD) (35) --> (4_ALPHANUMERIC)
*ALPHANUMERIC(DE) (55) --> (5_ALPHANUMERIC) * (2_ALPHANUMERIC) -- ALPHANUMERIC(CD) (35) --> (4_ALPHANUMERIC)
* (initial) -- ALPHANUMERIC(AB) (24) --> (2_ALPHANUMERIC) -- ALPHANUMERIC(CD) \ *
* (35) --> (4_ALPHANUMERIC) * Situation after adding edges to vertices at position 4
* (2_ALPHANUMERIC) -- ALPHANUMERIC(CD) \ * (initial) -- BYTE(A) (20) --> (1_BYTE) -- BYTE(B) (28) --> (2_BYTE) -- BYTE(C) (36) --> (3_BYTE) -- BYTE(D) (44) --> (4_BYTE)
* (35) --> (4_ALPHANUMERIC) * (1_BYTE) -- ALPHANUMERIC(BC) (44) --> (3_ALPHANUMERIC) -- ALPHANUMERIC(DE) (55) --> (5_ALPHANUMERIC)
* * (initial) -- ALPHANUMERIC(AB) (24) --> (2_ALPHANUMERIC) -- ALPHANUMERIC(CD) (35) --> (4_ALPHANUMERIC) -- BYTE(E) (55) --> (5_BYTE)
* Situation after adding edges to vertices at position 4 *
* (initial) -- BYTE(A) (20) --> (1_BYTE) -- BYTE(B) (28) --> (2_BYTE) -- BYTE(C) (36) --> (3_BYTE) -- BYTE(D) \ * Situation after adding edges to vertices at position 5
*(44) --> (4_BYTE) * (initial) -- BYTE(A) (20) --> (1_BYTE) -- BYTE(B) (28) --> (2_BYTE) -- BYTE(C) (36) --> (3_BYTE) -- BYTE(D) (44) --> (4_BYTE) -- BYTE(E) (52) --> (5_BYTE)
* (1_BYTE) -- ALPHANUMERIC(BC) (44) --> (3_ALPHANUMERIC) -- \ * (1_BYTE) -- ALPHANUMERIC(BC) (44) --> (3_ALPHANUMERIC) -- ALPHANUMERIC(DE) (55) --> (5_ALPHANUMERIC)
*ALPHANUMERIC(DE) (55) --> (5_ALPHANUMERIC) * (initial) -- ALPHANUMERIC(AB) (24) --> (2_ALPHANUMERIC) -- ALPHANUMERIC(CD) (35) --> (4_ALPHANUMERIC)
* (initial) -- ALPHANUMERIC(AB) (24) --> (2_ALPHANUMERIC) -- ALPHANUMERIC(CD) \ *
* (35) --> (4_ALPHANUMERIC) -- BYTE(E) (55) --> (5_BYTE) * Encoding as BYTE(ABCDE) has the smallest size of 52 and is hence chosen. The encodation ALPHANUMERIC(ABCD),
* * BYTE(E) is longer with a size of 55.
* Situation after adding edges to vertices at position 5 *
* (initial) -- BYTE(A) (20) --> (1_BYTE) -- BYTE(B) (28) --> (2_BYTE) -- BYTE(C) (36) --> (3_BYTE) -- BYTE(D) \ * Example 2 encoding the string "XXYY" where X denotes a character unique to character set ISO-8859-2 and Y a
* (44) --> (4_BYTE) -- BYTE(E) (52) --> (5_BYTE) * character unique to ISO-8859-3. Both characters encode as double byte in UTF-8:
* (1_BYTE) -- ALPHANUMERIC(BC) (44) --> (3_ALPHANUMERIC) -- \ *
*ALPHANUMERIC(DE) (55) --> (5_ALPHANUMERIC) * Initial situation
* (initial) -- ALPHANUMERIC(AB) (24) --> (2_ALPHANUMERIC) -- ALPHANUMERIC(CD) \ * (initial) -- BYTE(X) (32) --> (1_BYTE_ISO-8859-2)
* (35) --> (4_ALPHANUMERIC) * (initial) -- BYTE(X) (40) --> (1_BYTE_UTF-8)
* * (initial) -- BYTE(X) (40) --> (1_BYTE_UTF-16BE)
* Encoding as BYTE(ABCDE) has the smallest size of 52 and is hence chosen. The encodation ALPHANUMERIC(ABCD), BYTE(E) *
* is longer with a size of 55. * Situation after adding edges to vertices at position 1
* * (initial) -- BYTE(X) (32) --> (1_BYTE_ISO-8859-2) -- BYTE(X) (40) --> (2_BYTE_ISO-8859-2)
* Example 2 encoding the string "XXYY" where X denotes a character unique to character set ISO-8859-2 and Y a * (1_BYTE_ISO-8859-2) -- BYTE(X) (72) --> (2_BYTE_UTF-8)
* character unique to ISO-8859-3. Both characters encode as double byte in UTF-8: * (1_BYTE_ISO-8859-2) -- BYTE(X) (72) --> (2_BYTE_UTF-16BE)
* * (initial) -- BYTE(X) (40) --> (1_BYTE_UTF-8)
* Initial situation * (initial) -- BYTE(X) (40) --> (1_BYTE_UTF-16BE)
* (initial) -- BYTE(X) (32) --> (1_BYTE_ISO-8859-2) *
* (initial) -- BYTE(X) (40) --> (1_BYTE_UTF-8) * Situation after adding edges to vertices at position 2
* (initial) -- BYTE(X) (40) --> (1_BYTE_UTF-16BE) * (initial) -- BYTE(X) (32) --> (1_BYTE_ISO-8859-2) -- BYTE(X) (40) --> (2_BYTE_ISO-8859-2)
* * (2_BYTE_ISO-8859-2) -- BYTE(Y) (72) --> (3_BYTE_ISO-8859-3)
* Situation after adding edges to vertices at position 1 * (2_BYTE_ISO-8859-2) -- BYTE(Y) (80) --> (3_BYTE_UTF-8)
* (initial) -- BYTE(X) (32) --> (1_BYTE_ISO-8859-2) -- BYTE(X) (40) --> (2_BYTE_ISO-8859-2) * (2_BYTE_ISO-8859-2) -- BYTE(Y) (80) --> (3_BYTE_UTF-16BE)
* (1_BYTE_ISO-8859-2) -- BYTE(X) (72) --> (2_BYTE_UTF-8) * (initial) -- BYTE(X) (40) --> (1_BYTE_UTF-8) -- BYTE(X) (56) --> (2_BYTE_UTF-8)
* (1_BYTE_ISO-8859-2) -- BYTE(X) (72) --> (2_BYTE_UTF-16BE) * (initial) -- BYTE(X) (40) --> (1_BYTE_UTF-16BE) -- BYTE(X) (56) --> (2_BYTE_UTF-16BE)
* (initial) -- BYTE(X) (40) --> (1_BYTE_UTF-8) *
* (initial) -- BYTE(X) (40) --> (1_BYTE_UTF-16BE) * Situation after adding edges to vertices at position 3
* * (initial) -- BYTE(X) (32) --> (1_BYTE_ISO-8859-2) -- BYTE(X) (40) --> (2_BYTE_ISO-8859-2) -- BYTE(Y) (72) --> (3_BYTE_ISO-8859-3)
* Situation after adding edges to vertices at position 2 * (3_BYTE_ISO-8859-3) -- BYTE(Y) (80) --> (4_BYTE_ISO-8859-3)
* (initial) -- BYTE(X) (32) --> (1_BYTE_ISO-8859-2) -- BYTE(X) (40) --> (2_BYTE_ISO-8859-2) * (3_BYTE_ISO-8859-3) -- BYTE(Y) (112) --> (4_BYTE_UTF-8)
* (2_BYTE_ISO-8859-2) -- BYTE(Y) (72) --> (3_BYT\ * (3_BYTE_ISO-8859-3) -- BYTE(Y) (112) --> (4_BYTE_UTF-16BE)
*E_ISO-8859-3) * (initial) -- BYTE(X) (40) --> (1_BYTE_UTF-8) -- BYTE(X) (56) --> (2_BYTE_UTF-8) -- BYTE(Y) (72) --> (3_BYTE_UTF-8)
* (2_BYTE_ISO-8859-2) -- BYTE(Y) (80) --> (3_BYT\ * (initial) -- BYTE(X) (40) --> (1_BYTE_UTF-16BE) -- BYTE(X) (56) --> (2_BYTE_UTF-16BE) -- BYTE(Y) (72) --> (3_BYTE_UTF-16BE)
*E_UTF-8) *
* (2_BYTE_ISO-8859-2) -- BYTE(Y) (80) --> (3_BYT\ * Situation after adding edges to vertices at position 4
*E_UTF-16BE) * (initial) -- BYTE(X) (32) --> (1_BYTE_ISO-8859-2) -- BYTE(X) (40) --> (2_BYTE_ISO-8859-2) -- BYTE(Y) (72) --> (3_BYTE_ISO-8859-3) -- BYTE(Y) (80) --> (4_BYTE_ISO-8859-3)
* (initial) -- BYTE(X) (40) --> (1_BYTE_UTF-8) -- BYTE(X) (56) --> (2_BYTE_UTF-8) * (3_BYTE_UTF-8) -- BYTE(Y) (88) --> (4_BYTE_UTF-8)
* (initial) -- BYTE(X) (40) --> (1_BYTE_UTF-16BE) -- BYTE(X) (56) --> (2_BYTE_UTF-16BE) * (3_BYTE_UTF-16BE) -- BYTE(Y) (88) --> (4_BYTE_UTF-16BE)
* * (initial) -- BYTE(X) (40) --> (1_BYTE_UTF-8) -- BYTE(X) (56) --> (2_BYTE_UTF-8) -- BYTE(Y) (72) --> (3_BYTE_UTF-8)
* Situation after adding edges to vertices at position 3 * (initial) -- BYTE(X) (40) --> (1_BYTE_UTF-16BE) -- BYTE(X) (56) --> (2_BYTE_UTF-16BE) -- BYTE(Y) (72) --> (3_BYTE_UTF-16BE)
* (initial) -- BYTE(X) (32) --> (1_BYTE_ISO-8859-2) -- BYTE(X) (40) --> (2_BYTE_ISO-8859-2) -- BYTE(Y) (72) --> (3_BYT\ *
*E_ISO-8859-3) * Encoding as ECI(ISO-8859-2),BYTE(XX),ECI(ISO-8859-3),BYTE(YY) has the smallest size of 80 and is hence chosen.
* (3_BYT\ * The encodation ECI(UTF-8),BYTE(XXYY) is longer with a size of 88.
*E_ISO-8859-3) -- BYTE(Y) (80) --> (4_BYTE_ISO-8859-3) */
* (3_BYT\
*E_ISO-8859-3) -- BYTE(Y) (112) --> (4_BYTE_UTF-8)
* (3_BYT\
*E_ISO-8859-3) -- BYTE(Y) (112) --> (4_BYTE_UTF-16BE)
* (initial) -- BYTE(X) (40) --> (1_BYTE_UTF-8) -- BYTE(X) (56) --> (2_BYTE_UTF-8) -- BYTE(Y) (72) --> (3_BYTE_UTF-8)
* (initial) -- BYTE(X) (40) --> (1_BYTE_UTF-16BE) -- BYTE(X) (56) --> (2_BYTE_UTF-16BE) -- BYTE(Y) (72) --> (3_BYTE_UT\
*F-16BE)
*
* Situation after adding edges to vertices at position 4
* (initial) -- BYTE(X) (32) --> (1_BYTE_ISO-8859-2) -- BYTE(X) (40) --> (2_BYTE_ISO-8859-2) -- BYTE(Y) (72) --> (3_BYT\
*E_ISO-8859-3) -- BYTE(Y) (80) --> (4_BYTE_ISO-8859-3)
* (3_BYT\
*E_UTF-8) -- BYTE(Y) (88) --> (4_BYTE_UTF-8)
* (3_BYT\
*E_UTF-16BE) -- BYTE(Y) (88) --> (4_BYTE_UTF-16BE)
* (initial) -- BYTE(X) (40) --> (1_BYTE_UTF-8) -- BYTE(X) (56) --> (2_BYTE_UTF-8) -- BYTE(Y) (72) --> (3_BYTE_UTF-8)
* (initial) -- BYTE(X) (40) --> (1_BYTE_UTF-16BE) -- BYTE(X) (56) --> (2_BYTE_UTF-16BE) -- BYTE(Y) (72) --> (3_BYTE_UT\
*F-16BE)
*
* Encoding as ECI(ISO-8859-2),BYTE(XX),ECI(ISO-8859-3),BYTE(YY) has the smallest size of 80 and is hence chosen. The
* encodation ECI(UTF-8),BYTE(XXYY) is longer with a size of 88.
*/
int inputLength = stringToEncode.length(); int inputLength = stringToEncode.length();
//Array that represents vertices. There is a vertex for every character, encoding and mode. The vertex contains a list // Array that represents vertices. There is a vertex for every character, encoding and mode. The vertex contains
//of all edges that lead to it that have the same encoding and mode. // a list of all edges that lead to it that have the same encoding and mode.
//The lists are created lazily // The lists are created lazily
//The last dimension in the array below encodes the 4 modes KANJI, ALPHANUMERIC, NUMERIC and BYTE via the // The last dimension in the array below encodes the 4 modes KANJI, ALPHANUMERIC, NUMERIC and BYTE via the
//function getCompactedOrdinal(Mode) // function getCompactedOrdinal(Mode)
@SuppressWarnings("unchecked")
ArrayList<ResultList>[][][] vertices = new ArrayList[inputLength + 1][encoders.length][4]; ArrayList<ResultList>[][][] vertices = new ArrayList[inputLength + 1][encoders.length][4];
addEdges(version, vertices, 0, null); addEdges(version, vertices, 0, null);
// if (DEBUG) {
// System.err.println("DEBUG computing solution for " + getVersionSize(version));
// System.err.println("DEBUG Initial situation");
// printEdges(vertices);
// }
for (int i = 1; i <= inputLength; i++) { for (int i = 1; i <= inputLength; i++) {
for (int j = 0; j < encoders.length; j++) { for (int j = 0; j < encoders.length; j++) {
for (int k = 0; k < 4; k++) { for (int k = 0; k < 4; k++) {
ResultList minimalEdge = null; ResultList minimalEdge;
if (vertices[i][j][k] != null) { if (vertices[i][j][k] != null) {
ArrayList<ResultList> edges = vertices[i][j][k]; ArrayList<ResultList> edges = vertices[i][j][k];
if (edges.size() == 1) { //Optimization: if there is only one edge then that's the minimal one if (edges.size() == 1) { //Optimization: if there is only one edge then that's the minimal one
@ -665,30 +523,17 @@ final class MinimalEncoder {
minimalSize = edge.getSize(); minimalSize = edge.getSize();
} }
} }
assert minimalIndex != -1;
minimalEdge = edges.get(minimalIndex); minimalEdge = edges.get(minimalIndex);
edges.clear(); edges.clear();
edges.add(minimalEdge); edges.add(minimalEdge);
} }
if (i < inputLength) { if (i < inputLength) {
assert minimalEdge != null;
// if (DEBUG && minimalEdge != null) {
// System.err.println("DEBUG processing " + vertexToString(i, minimalEdge) +
// ". The minimal edge leading to this vertex is " + minimalEdge + " with a size of "
// + minimalEdge.getSize());
// }
addEdges(version, vertices, i, minimalEdge); addEdges(version, vertices, i, minimalEdge);
} }
} }
} }
} }
// if (DEBUG) {
// System.err.println("DEBUG situation after adding edges to vertices at position " + i);
// printEdges(vertices);
// }
} }
int minimalJ = -1; int minimalJ = -1;
int minimalK = -1; int minimalK = -1;
@ -697,7 +542,6 @@ final class MinimalEncoder {
for (int k = 0; k < 4; k++) { for (int k = 0; k < 4; k++) {
if (vertices[inputLength][j][k] != null) { if (vertices[inputLength][j][k] != null) {
ArrayList<ResultList> edges = vertices[inputLength][j][k]; ArrayList<ResultList> edges = vertices[inputLength][j][k];
assert edges.size() == 1;
ResultList edge = edges.get(0); ResultList edge = edges.get(0);
if (edge.getSize() < minimalSize) { if (edge.getSize() < minimalSize) {
minimalSize = edge.getSize(); minimalSize = edge.getSize();
@ -707,17 +551,10 @@ final class MinimalEncoder {
} }
} }
} }
assert minimalJ != -1; if (minimalJ < 0) {
if (minimalJ >= 0) {
// if (DEBUG) {
// System.err.println("DEBUG the minimal solution for version " + version + " is " + vertices[inputLength]
// [minimalJ][minimalK].get(0));
// }
return vertices[inputLength][minimalJ][minimalK].get(0);
} else {
throw new WriterException("Internal error: failed to encode"); throw new WriterException("Internal error: failed to encode");
} }
return vertices[inputLength][minimalJ][minimalK].get(0);
} }
byte[] getBytesOfCharacter(int position, int charsetEncoderIndex) { byte[] getBytesOfCharacter(int position, int charsetEncoderIndex) {
@ -755,11 +592,10 @@ final class MinimalEncoder {
ResultNode next = getFirst(); ResultNode next = getFirst();
if (next != null) { if (next != null) {
next.declaresMode = true; next.declaresMode = n.mode != next.mode ||
if (n.mode == next.mode && next.mode != Mode.ECI && n.getCharacterLength() + next.getCharacterLength() < next.mode == Mode.ECI ||
getMaximumNumberOfEncodeableCharacters(version, next.mode)) { n.getCharacterLength() + next.getCharacterLength() >=
next.declaresMode = false; getMaximumNumberOfEncodeableCharacters(version, next.mode);
}
} }
super.addFirst(n); super.addFirst(n);
@ -770,38 +606,8 @@ final class MinimalEncoder {
*/ */
int getSize() { int getSize() {
int result = 0; int result = 0;
for (Iterator<ResultNode> it = iterator(); it.hasNext();) { for (ResultNode resultNode : this) {
result += it.next().getSize(); result += resultNode.getSize();
}
return result;
}
/**
* returns the start position
*/
private int getPosition() {
return getFirst() != null ? getFirst().position : 0;
}
/**
* returns the length in characters
*/
int getCharacterLength() {
int result = 0;
for (Iterator<ResultNode> it = iterator(); it.hasNext();) {
result += it.next().getCharacterLength();
}
return result;
}
/**
* returns the length in characters according to the specification (differs from getCharacterLength() in BYTE mode
* for multi byte encoded characters)
*/
int getCharacterCountIndicator() {
int result = 0;
for (Iterator<ResultNode> it = iterator(); it.hasNext();) {
result += it.next().getCharacterCountIndicator();
} }
return result; return result;
} }
@ -851,12 +657,12 @@ final class MinimalEncoder {
upperLimit = 40; upperLimit = 40;
break; break;
} }
//increase version if needed // increase version if needed
while (versionNumber < upperLimit && !Encoder.willFit(getSize(), Version.getVersionForNumber(versionNumber), while (versionNumber < upperLimit && !Encoder.willFit(getSize(), Version.getVersionForNumber(versionNumber),
ecLevel)) { ecLevel)) {
versionNumber++; versionNumber++;
} }
//shrink version if possible // shrink version if possible
while (versionNumber > lowerLimit && Encoder.willFit(getSize(), Version.getVersionForNumber(versionNumber - 1), while (versionNumber > lowerLimit && Encoder.willFit(getSize(), Version.getVersionForNumber(versionNumber - 1),
ecLevel)) { ecLevel)) {
versionNumber--; versionNumber--;
@ -867,8 +673,7 @@ final class MinimalEncoder {
public String toString() { public String toString() {
StringBuilder result = new StringBuilder(); StringBuilder result = new StringBuilder();
ResultNode previous = null; ResultNode previous = null;
for (Iterator<ResultNode> it = iterator(); it.hasNext();) { for (ResultNode current : this) {
ResultNode current = it.next();
if (previous != null) { if (previous != null) {
if (current.declaresMode) { if (current.declaresMode) {
result.append(")"); result.append(")");
@ -893,8 +698,6 @@ final class MinimalEncoder {
private final int length; private final int length;
ResultNode(Mode mode, int position, int charsetEncoderIndex, int length) { ResultNode(Mode mode, int position, int charsetEncoderIndex, int length) {
assert mode != null;
this.mode = mode; this.mode = mode;
this.position = position; this.position = position;
this.charsetEncoderIndex = charsetEncoderIndex; this.charsetEncoderIndex = charsetEncoderIndex;
@ -956,7 +759,7 @@ final class MinimalEncoder {
public String toString() { public String toString() {
StringBuilder result = new StringBuilder(); StringBuilder result = new StringBuilder();
if (declaresMode) { if (declaresMode) {
result.append(mode + "("); result.append(mode).append('(');
} }
if (mode == Mode.ECI) { if (mode == Mode.ECI) {
result.append(encoders[charsetEncoderIndex].charset().displayName()); result.append(encoders[charsetEncoderIndex].charset().displayName());
@ -967,15 +770,15 @@ final class MinimalEncoder {
} }
private String makePrintable(String s) { private String makePrintable(String s) {
String result = ""; StringBuilder result = new StringBuilder();
for (int i = 0; i < s.length(); i++) { for (int i = 0; i < s.length(); i++) {
if (s.charAt(i) < 32 || s.charAt(i) > 126) { if (s.charAt(i) < 32 || s.charAt(i) > 126) {
result += "."; result.append('.');
} else { } else {
result += s.charAt(i); result.append(s.charAt(i));
} }
} }
return result; return result.toString();
} }
} }
} }

View file

@ -175,7 +175,7 @@ final class DecodeWorker implements Callable<Integer> {
} }
rawData.setLength(rawData.length() - 1); // chop off final space rawData.setLength(rawData.length() - 1); // chop off final space
output.write("Raw bits:\n" + rawData.toString() + "\n"); output.write("Raw bits:\n" + rawData + "\n");
} }
ResultPoint[] resultPoints = result.getResultPoints(); ResultPoint[] resultPoints = result.getResultPoints();

View file

@ -442,7 +442,7 @@
<dependency> <dependency>
<groupId>com.puppycrawl.tools</groupId> <groupId>com.puppycrawl.tools</groupId>
<artifactId>checkstyle</artifactId> <artifactId>checkstyle</artifactId>
<version>9.0</version> <version>9.0.1</version>
</dependency> </dependency>
</dependencies> </dependencies>
</plugin> </plugin>

View file

@ -39,7 +39,7 @@
<dependency> <dependency>
<groupId>com.google.guava</groupId> <groupId>com.google.guava</groupId>
<artifactId>guava</artifactId> <artifactId>guava</artifactId>
<version>30.1.1-android</version> <version>31.0.1-android</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>junit</groupId> <groupId>junit</groupId>
@ -73,7 +73,7 @@
</parent> </parent>
<properties> <properties>
<spring.version>5.3.9</spring.version> <spring.version>5.3.10</spring.version>
</properties> </properties>
<build> <build>