fix binary shift cost heuristics (#1044)

* fix binary shift cost heuristics

* corrected some indents
This commit is contained in:
Andreas Niederle 2018-07-23 15:59:35 +02:00 committed by Sean Owen
parent 48bb5fd860
commit e1b127c13a
2 changed files with 55 additions and 7 deletions

View file

@ -137,12 +137,15 @@ final class State {
// Returns true if "this" state is better (or equal) to be in than "that"
// state under all possible circumstances.
boolean isBetterThanOrEqualTo(State other) {
int mySize = this.bitCount + (HighLevelEncoder.LATCH_TABLE[this.mode][other.mode] >> 16);
if (other.binaryShiftByteCount > 0 &&
(this.binaryShiftByteCount == 0 || this.binaryShiftByteCount > other.binaryShiftByteCount)) {
mySize += 10; // Cost of entering Binary Shift mode.
int newModeBitCount = this.bitCount + (HighLevelEncoder.LATCH_TABLE[this.mode][other.mode] >> 16);
if (this.binaryShiftByteCount < other.binaryShiftByteCount) {
// add additional B/S encoding cost of other, if any
newModeBitCount += calculateBinaryShiftCost(other) - calculateBinaryShiftCost(this);
} else if (this.binaryShiftByteCount > other.binaryShiftByteCount && other.binaryShiftByteCount > 0) {
// maximum possible additional cost (we end up exceeding the 31 byte boundary and other state can stay beneath it)
newModeBitCount += 10;
}
return mySize <= other.bitCount;
return newModeBitCount <= other.bitCount;
}
BitArray toBitArray(byte[] text) {
@ -165,5 +168,18 @@ final class State {
public String toString() {
return String.format("%s bits=%d bytes=%d", HighLevelEncoder.MODE_NAMES[mode], bitCount, binaryShiftByteCount);
}
private static int calculateBinaryShiftCost(State state) {
if (state.binaryShiftByteCount > 62) {
return 21; // B/S with extended length
}
if (state.binaryShiftByteCount > 31) {
return 20; // two B/S
}
if (state.binaryShiftByteCount > 0) {
return 10; // one B/S
}
return 0;
}
}

View file

@ -307,7 +307,7 @@ public final class EncoderTest extends Assert {
+ "Ygh6utAIgLl1aBVM4EOTQtMQQYH9M2Z3Dp4qnA/fwWuQ+M8L3V8U=",
823);
}
@Test
public void testHighLevelEncodeBinary() {
// binary short form single byte
@ -361,8 +361,40 @@ public final class EncoderTest extends Assert {
// A lower case letter at both ends will enough to latch us into LOWER.
testHighLevelEncodeString('a' + sb.substring(0, i) + 'b', expectedLength + 15);
}
sb = new StringBuilder();
for (int i = 0; i < 32; i++) {
sb.append('§'); // § forces binary encoding
}
sb.setCharAt(1, 'A');
// expect B/S(1) A B/S(30)
testHighLevelEncodeString(sb.toString(), 5 + 20 + 31 * 8);
sb = new StringBuilder();
for (int i = 0; i < 31; i++) {
sb.append('§');
}
sb.setCharAt(1, 'A');
// expect B/S(31)
testHighLevelEncodeString(sb.toString(), 10 + 31 * 8);
sb = new StringBuilder();
for (int i = 0; i < 34; i++) {
sb.append('§');
}
sb.setCharAt(1, 'A');
// expect B/S(31) B/S(3)
testHighLevelEncodeString(sb.toString(), 20 + 34 * 8);
sb = new StringBuilder();
for (int i = 0; i < 64; i++) {
sb.append('§');
}
sb.setCharAt(30, 'A');
// expect B/S(64)
testHighLevelEncodeString(sb.toString(), 21 + 64 * 8);
}
@Test
public void testHighLevelEncodePairs() {
// Typical usage