formatting cleanup before trying to make the last failing C++ qr blackbox test pass

git-svn-id: https://zxing.googlecode.com/svn/trunk@1964 59b500cc-1b3d-0410-9834-0bbf25fbcc57
2025-03-05 20:48:51 -08:00 · 2011-10-13 15:20:25 +00:00 · 2011-10-13 15:20:25 +00:00 · beeef242b2
parent 9558d83d71
commit beeef242b2
5 changed files with 329 additions and 321 deletions
--- a/cpp/core/src/zxing/common/BitMatrix.cpp
+++ b/cpp/core/src/zxing/common/BitMatrix.cpp
@ -1,3 +1,4 @@
 // -*- mode:c++; tab-width:2; indent-tabs-mode:nil; c-basic-offset:2 -*-
 /*
 *  BitMatrix.cpp
 *  zxing
@ -24,34 +25,41 @@
 #include <sstream>
 #include <string>
-namespace zxing {
+using std::numeric_limits;
-using namespace std;
+using std::ostream;
 using std::ostringstream;
-unsigned int logDigits(unsigned digits) {
+using zxing::BitMatrix;
-  unsigned log = 0;
+using zxing::BitArray;
-  unsigned val = 1;
+using zxing::Ref;
-  while (val < digits) {
+
-    log++;
+namespace {
-    val <<= 1;
+  unsigned int logDigits(unsigned digits) {
    unsigned log = 0;
    unsigned val = 1;
    while (val < digits) {
      log++;
      val <<= 1;
    }
    return log;
  }
  return log;
 }
-const unsigned int bitsPerWord = numeric_limits<unsigned int>::digits;
+  const unsigned int bitsPerWord = numeric_limits<unsigned int>::digits;
-const unsigned int logBits = logDigits(bitsPerWord);
+  const unsigned int logBits = logDigits(bitsPerWord);
-const unsigned int bitsMask = (1 << logBits) - 1;
+  const unsigned int bitsMask = (1 << logBits) - 1;
-static size_t wordsForSize(size_t width, size_t height) {
+  size_t wordsForSize(size_t width, size_t height) {
-  size_t bits = width * height;
+    size_t bits = width * height;
-  int arraySize = bits >> logBits;
+    int arraySize = bits >> logBits;
-  if (bits - (arraySize << logBits) != 0) {
+    if (bits - (arraySize << logBits) != 0) {
-    arraySize++;
+      arraySize++;
    }
    return arraySize;
  }
  return arraySize;
 }
 BitMatrix::BitMatrix(size_t dimension) :
-    width_(dimension), height_(dimension), words_(0), bits_(NULL) {
+  width_(dimension), height_(dimension), words_(0), bits_(NULL) {
  words_ = wordsForSize(width_, height_);
  bits_ = new unsigned int[words_];
@ -59,7 +67,7 @@ BitMatrix::BitMatrix(size_t dimension) :
 }
 BitMatrix::BitMatrix(size_t width, size_t height) :
-    width_(width), height_(height), words_(0), bits_(NULL) {
+  width_(width), height_(height), words_(0), bits_(NULL) {
  words_ = wordsForSize(width_, height_);
  bits_ = new unsigned int[words_];
@ -160,19 +168,20 @@ unsigned int* BitMatrix::getBits() const {
  return bits_;
 }
-ostream& operator<<(ostream &out, const BitMatrix &bm) {
+namespace zxing {
-  for (size_t y = 0; y < bm.height_; y++) {
+  ostream& operator<<(ostream &out, const BitMatrix &bm) {
-    for (size_t x = 0; x < bm.width_; x++) {
+    for (size_t y = 0; y < bm.height_; y++) {
-      out << (bm.get(x, y) ? "X " : "  ");
+      for (size_t x = 0; x < bm.width_; x++) {
        out << (bm.get(x, y) ? "X " : "  ");
      }
      out << "\n";
    }
-    out << "\n";
+    return out;
  }
  return out;
 }
-const char *BitMatrix::description() {
+
 const char* BitMatrix::description() {
  ostringstream out;
  out << *this;
  return out.str().c_str();
 }
 }
--- a/cpp/core/src/zxing/common/BitMatrix.h
+++ b/cpp/core/src/zxing/common/BitMatrix.h
@ -1,3 +1,4 @@
 // -*- mode:c++; tab-width:2; indent-tabs-mode:nil; c-basic-offset:2 -*-
 #ifndef __BIT_MATRIX_H__
 #define __BIT_MATRIX_H__
--- a/cpp/core/src/zxing/qrcode/QRCodeReader.cpp
+++ b/cpp/core/src/zxing/qrcode/QRCodeReader.cpp
@ -1,3 +1,4 @@
 // -*- mode:c++; tab-width:2; indent-tabs-mode:nil; c-basic-offset:2 -*-
 /*
 *  QRCodeReader.cpp
 *  zxing
--- a/cpp/core/src/zxing/qrcode/QRCodeReader.h
+++ b/cpp/core/src/zxing/qrcode/QRCodeReader.h
@ -1,3 +1,4 @@
 // -*- mode:c++; tab-width:2; indent-tabs-mode:nil; c-basic-offset:2 -*-
 #ifndef __QR_CODE_READER_H__
 #define __QR_CODE_READER_H__
--- a/cpp/core/src/zxing/qrcode/decoder/DecodedBitStreamParser.cpp
+++ b/cpp/core/src/zxing/qrcode/decoder/DecodedBitStreamParser.cpp
@ -1,5 +1,5 @@
-/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+// -*- mode:c++; tab-width:2; indent-tabs-mode:nil; c-basic-offset:2 -*-
- *
+/*
 *  DecodedBitStreamParser.cpp
 *  zxing
 *
@ -35,329 +35,325 @@
 #define ICONV_CONST /**/
 #endif
 using namespace std;
 using namespace zxing;
 using namespace zxing::qrcode;
-namespace zxing {
+const char DecodedBitStreamParser::ALPHANUMERIC_CHARS[] =
-  namespace qrcode {
+{ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B',
  'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
  'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
  'Y', 'Z', ' ', '$', '%', '*', '+', '-', '.', '/', ':'
 };
-    using namespace std;
+const char *DecodedBitStreamParser::ASCII = "ASCII";
 const char *DecodedBitStreamParser::ISO88591 = "ISO-8859-1";
 const char *DecodedBitStreamParser::UTF8 = "UTF-8";
 const char *DecodedBitStreamParser::SHIFT_JIS = "SHIFT_JIS";
 const char *DecodedBitStreamParser::EUC_JP = "EUC-JP";
-    const char DecodedBitStreamParser::ALPHANUMERIC_CHARS[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B',
+void DecodedBitStreamParser::append(std::string &result, const unsigned char *bufIn, size_t nIn, const char *src) {
                                                                'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
                                                                'Y', 'Z', ' ', '$', '%', '*', '+', '-', '.', '/', ':'
    };
    const char *DecodedBitStreamParser::ASCII = "ASCII";
    const char *DecodedBitStreamParser::ISO88591 = "ISO-8859-1";
    const char *DecodedBitStreamParser::UTF8 = "UTF-8";
    const char *DecodedBitStreamParser::SHIFT_JIS = "SHIFT_JIS";
    const char *DecodedBitStreamParser::EUC_JP = "EUC-JP";
    void DecodedBitStreamParser::append(std::string &result, const unsigned char *bufIn, size_t nIn, const char *src) {
 #ifndef NO_ICONV
-      if (nIn == 0) {
+  if (nIn == 0) {
-        return;
+    return;
-      }
+  }
-      iconv_t cd = iconv_open(UTF8, src);
+  iconv_t cd = iconv_open(UTF8, src);
-      const int maxOut = 4 * nIn + 1;
+  const int maxOut = 4 * nIn + 1;
-      unsigned char* bufOut = new unsigned char[maxOut];
+  unsigned char* bufOut = new unsigned char[maxOut];
-      ICONV_CONST char *fromPtr = (ICONV_CONST char *)bufIn;
+  ICONV_CONST char *fromPtr = (ICONV_CONST char *)bufIn;
-      size_t nFrom = nIn;
+  size_t nFrom = nIn;
-      char *toPtr = (char *)bufOut;
+  char *toPtr = (char *)bufOut;
-      size_t nTo = maxOut;
+  size_t nTo = maxOut;
-      while (nFrom > 0) {
+  while (nFrom > 0) {
-        size_t oneway = iconv(cd, &fromPtr, &nFrom, &toPtr, &nTo);
+    size_t oneway = iconv(cd, &fromPtr, &nFrom, &toPtr, &nTo);
-        if (oneway == (size_t)(-1)) {
+    if (oneway == (size_t)(-1)) {
          iconv_close(cd);
          delete[] bufOut;
          throw ReaderException("error converting characters");
        }
      }
      iconv_close(cd);
      int nResult = maxOut - nTo;
      bufOut[nResult] = '\0';
      result.append((const char *)bufOut);
      delete[] bufOut;
      throw ReaderException("error converting characters");
    }
  }
  iconv_close(cd);
  int nResult = maxOut - nTo;
  bufOut[nResult] = '\0';
  result.append((const char *)bufOut);
  delete[] bufOut;
 #else
-      result.append((const char *)bufIn, nIn);
+  result.append((const char *)bufIn, nIn);
 #endif
 }
 void DecodedBitStreamParser::decodeKanjiSegment(Ref<BitSource> bits, std::string &result, int count) {
  // Each character will require 2 bytes. Read the characters as 2-byte pairs
  // and decode as Shift_JIS afterwards
  size_t nBytes = 2 * count;
  unsigned char* buffer = new unsigned char[nBytes];
  int offset = 0;
  while (count > 0) {
    // Each 13 bits encodes a 2-byte character
    int twoBytes = bits->readBits(13);
    int assembledTwoBytes = ((twoBytes / 0x0C0) << 8) | (twoBytes % 0x0C0);
    if (assembledTwoBytes < 0x01F00) {
      // In the 0x8140 to 0x9FFC range
      assembledTwoBytes += 0x08140;
    } else {
      // In the 0xE040 to 0xEBBF range
      assembledTwoBytes += 0x0C140;
    }
    buffer[offset] = (unsigned char)(assembledTwoBytes >> 8);
    buffer[offset + 1] = (unsigned char)assembledTwoBytes;
    offset += 2;
    count--;
  }
-    void DecodedBitStreamParser::decodeKanjiSegment(Ref<BitSource> bits, std::string &result, int count) {
+  append(result, buffer, nBytes, SHIFT_JIS);
-      // Each character will require 2 bytes. Read the characters as 2-byte pairs
+  delete[] buffer;
-      // and decode as Shift_JIS afterwards
+}
      size_t nBytes = 2 * count;
      unsigned char* buffer = new unsigned char[nBytes];
      int offset = 0;
      while (count > 0) {
        // Each 13 bits encodes a 2-byte character
-        int twoBytes = bits->readBits(13);
+void DecodedBitStreamParser::decodeByteSegment(Ref<BitSource> bits, std::string &result, int count) {
-        int assembledTwoBytes = ((twoBytes / 0x0C0) << 8) | (twoBytes % 0x0C0);
+  int nBytes = count;
-        if (assembledTwoBytes < 0x01F00) {
+  unsigned char* readBytes = new unsigned char[nBytes];
-          // In the 0x8140 to 0x9FFC range
+  if (count << 3 > bits->available()) {
-          assembledTwoBytes += 0x08140;
+    ostringstream s;
-        } else {
+    s << "Count too large: " << count;
-          // In the 0xE040 to 0xEBBF range
+    delete[] readBytes;
-          assembledTwoBytes += 0x0C140;
+    throw ReaderException(s.str().c_str());
-        }
+  }
-        buffer[offset] = (unsigned char)(assembledTwoBytes >> 8);
+  for (int i = 0; i < count; i++) {
-        buffer[offset + 1] = (unsigned char)assembledTwoBytes;
+    readBytes[i] = (unsigned char)bits->readBits(8);
-        offset += 2;
+  }
-        count--;
+  // The spec isn't clear on this mode; see
-      }
+  // section 6.4.5: t does not say which encoding to assuming
  // upon decoding. I have seen ISO-8859-1 used as well as
  // Shift_JIS -- without anything like an ECI designator to
  // give a hint.
  const char *encoding = guessEncoding(readBytes, nBytes);
  append(result, readBytes, nBytes, encoding);
  delete[] readBytes;
 }
-      append(result, buffer, nBytes, SHIFT_JIS);
+void DecodedBitStreamParser::decodeNumericSegment(Ref<BitSource> bits, std::string &result, int count) {
-      delete[] buffer;
+  int nBytes = count;
  unsigned char* bytes = new unsigned char[nBytes];
  int i = 0;
  // Read three digits at a time
  while (count >= 3) {
    // Each 10 bits encodes three digits
    if (bits->available() < 10) {
      throw ReaderException("format exception");
    }
-
+    int threeDigitsBits = bits->readBits(10);
-    void DecodedBitStreamParser::decodeByteSegment(Ref<BitSource> bits, std::string &result, int count) {
+    if (threeDigitsBits >= 1000) {
-      int nBytes = count;
+      ostringstream s;
-      unsigned char* readBytes = new unsigned char[nBytes];
+      s << "Illegal value for 3-digit unit: " << threeDigitsBits;
      if (count << 3 > bits->available()) {
        ostringstream s;
        s << "Count too large: " << count;
        delete[] readBytes;
        throw ReaderException(s.str().c_str());
      }
      for (int i = 0; i < count; i++) {
        readBytes[i] = (unsigned char)bits->readBits(8);
      }
      // The spec isn't clear on this mode; see
      // section 6.4.5: t does not say which encoding to assuming
      // upon decoding. I have seen ISO-8859-1 used as well as
      // Shift_JIS -- without anything like an ECI designator to
      // give a hint.
      const char *encoding = guessEncoding(readBytes, nBytes);
      append(result, readBytes, nBytes, encoding);
      delete[] readBytes;
    }
    void DecodedBitStreamParser::decodeNumericSegment(Ref<BitSource> bits, std::string &result, int count) {
      int nBytes = count;
      unsigned char* bytes = new unsigned char[nBytes];
      int i = 0;
      // Read three digits at a time
      while (count >= 3) {
        // Each 10 bits encodes three digits
        if (bits->available() < 10) {
          throw ReaderException("format exception");
        }
        int threeDigitsBits = bits->readBits(10);
        if (threeDigitsBits >= 1000) {
          ostringstream s;
          s << "Illegal value for 3-digit unit: " << threeDigitsBits;
          delete[] bytes;
          throw ReaderException(s.str().c_str());
        }
        bytes[i++] = ALPHANUMERIC_CHARS[threeDigitsBits / 100];
        bytes[i++] = ALPHANUMERIC_CHARS[(threeDigitsBits / 10) % 10];
        bytes[i++] = ALPHANUMERIC_CHARS[threeDigitsBits % 10];
        count -= 3;
      }
      if (count == 2) {
        if (bits->available() < 7) {
          throw ReaderException("format exception");
        }
        // Two digits left over to read, encoded in 7 bits
        int twoDigitsBits = bits->readBits(7);
        if (twoDigitsBits >= 100) {
          ostringstream s;
          s << "Illegal value for 2-digit unit: " << twoDigitsBits;
          delete[] bytes;
          throw ReaderException(s.str().c_str());
        }
        bytes[i++] = ALPHANUMERIC_CHARS[twoDigitsBits / 10];
        bytes[i++] = ALPHANUMERIC_CHARS[twoDigitsBits % 10];
      } else if (count == 1) {
        if (bits->available() < 4) {
          throw ReaderException("format exception");
        }
        // One digit left over to read
        int digitBits = bits->readBits(4);
        if (digitBits >= 10) {
          ostringstream s;
          s << "Illegal value for digit unit: " << digitBits;
          delete[] bytes;
          throw ReaderException(s.str().c_str());
        }
        bytes[i++] = ALPHANUMERIC_CHARS[digitBits];
      }
      append(result, bytes, nBytes, ASCII);
      delete[] bytes;
      throw ReaderException(s.str().c_str());
    }
-
+    bytes[i++] = ALPHANUMERIC_CHARS[threeDigitsBits / 100];
-    void DecodedBitStreamParser::decodeAlphanumericSegment(Ref<BitSource> bits, std::string &result, int count) {
+    bytes[i++] = ALPHANUMERIC_CHARS[(threeDigitsBits / 10) % 10];
-      int nBytes = count;
+    bytes[i++] = ALPHANUMERIC_CHARS[threeDigitsBits % 10];
-      unsigned char* bytes = new unsigned char[nBytes];
+    count -= 3;
-      int i = 0;
+  }
-      // Read two characters at a time
+  if (count == 2) {
-      while (count > 1) {
+    if (bits->available() < 7) {
-        int nextTwoCharsBits = bits->readBits(11);
+      throw ReaderException("format exception");
-        bytes[i++] = ALPHANUMERIC_CHARS[nextTwoCharsBits / 45];
+    }
-        bytes[i++] = ALPHANUMERIC_CHARS[nextTwoCharsBits % 45];
+    // Two digits left over to read, encoded in 7 bits
-        count -= 2;
+    int twoDigitsBits = bits->readBits(7);
-      }
+    if (twoDigitsBits >= 100) {
-      if (count == 1) {
+      ostringstream s;
-        bytes[i++] = ALPHANUMERIC_CHARS[bits->readBits(6)];
+      s << "Illegal value for 2-digit unit: " << twoDigitsBits;
      }
      append(result, bytes, nBytes, ASCII);
      delete[] bytes;
      throw ReaderException(s.str().c_str());
    }
    bytes[i++] = ALPHANUMERIC_CHARS[twoDigitsBits / 10];
    bytes[i++] = ALPHANUMERIC_CHARS[twoDigitsBits % 10];
  } else if (count == 1) {
    if (bits->available() < 4) {
      throw ReaderException("format exception");
    }
    // One digit left over to read
    int digitBits = bits->readBits(4);
    if (digitBits >= 10) {
      ostringstream s;
      s << "Illegal value for digit unit: " << digitBits;
      delete[] bytes;
      throw ReaderException(s.str().c_str());
    }
    bytes[i++] = ALPHANUMERIC_CHARS[digitBits];
  }
  append(result, bytes, nBytes, ASCII);
  delete[] bytes;
 }
-    const char *
+void DecodedBitStreamParser::decodeAlphanumericSegment(Ref<BitSource> bits, std::string &result, int count) {
-    DecodedBitStreamParser::guessEncoding(unsigned char *bytes, int length) {
+  int nBytes = count;
-      const bool ASSUME_SHIFT_JIS = false;
+  unsigned char* bytes = new unsigned char[nBytes];
-      char const* const PLATFORM_DEFAULT_ENCODING="UTF-8";
+  int i = 0;
  // Read two characters at a time
  while (count > 1) {
    int nextTwoCharsBits = bits->readBits(11);
    bytes[i++] = ALPHANUMERIC_CHARS[nextTwoCharsBits / 45];
    bytes[i++] = ALPHANUMERIC_CHARS[nextTwoCharsBits % 45];
    count -= 2;
  }
  if (count == 1) {
    bytes[i++] = ALPHANUMERIC_CHARS[bits->readBits(6)];
  }
  append(result, bytes, nBytes, ASCII);
  delete[] bytes;
 }
-      // Does it start with the UTF-8 byte order mark? then guess it's UTF-8
+const char *
-      if (length > 3 && bytes[0] == (unsigned char)0xEF && bytes[1] == (unsigned char)0xBB && bytes[2]
+DecodedBitStreamParser::guessEncoding(unsigned char *bytes, int length) {
-          == (unsigned char)0xBF) {
+  const bool ASSUME_SHIFT_JIS = false;
-        return UTF8;
+  char const* const PLATFORM_DEFAULT_ENCODING="UTF-8";
-      }
+
-      // For now, merely tries to distinguish ISO-8859-1, UTF-8 and Shift_JIS,
+  // Does it start with the UTF-8 byte order mark? then guess it's UTF-8
-      // which should be by far the most common encodings. ISO-8859-1
+  if (length > 3 && bytes[0] == (unsigned char)0xEF && bytes[1] == (unsigned char)0xBB && bytes[2]
-      // should not have bytes in the 0x80 - 0x9F range, while Shift_JIS
+      == (unsigned char)0xBF) {
-      // uses this as a first byte of a two-byte character. If we see this
+    return UTF8;
-      // followed by a valid second byte in Shift_JIS, assume it is Shift_JIS.
+  }
-      // If we see something else in that second byte, we'll make the risky guess
+  // For now, merely tries to distinguish ISO-8859-1, UTF-8 and Shift_JIS,
-      // that it's UTF-8.
+  // which should be by far the most common encodings. ISO-8859-1
-      bool canBeISO88591 = true;
+  // should not have bytes in the 0x80 - 0x9F range, while Shift_JIS
-      bool canBeShiftJIS = true;
+  // uses this as a first byte of a two-byte character. If we see this
-      bool canBeUTF8 = true;
+  // followed by a valid second byte in Shift_JIS, assume it is Shift_JIS.
-      int utf8BytesLeft = 0;
+  // If we see something else in that second byte, we'll make the risky guess
-      int maybeDoubleByteCount = 0;
+  // that it's UTF-8.
-      int maybeSingleByteKatakanaCount = 0;
+  bool canBeISO88591 = true;
-      bool sawLatin1Supplement = false;
+  bool canBeShiftJIS = true;
-      bool sawUTF8Start = false;
+  bool canBeUTF8 = true;
-      bool lastWasPossibleDoubleByteStart = false;
+  int utf8BytesLeft = 0;
-      for (int i = 0;
+  int maybeDoubleByteCount = 0;
-           i < length && (canBeISO88591 || canBeShiftJIS || canBeUTF8);
+  int maybeSingleByteKatakanaCount = 0;
-           i++) {
+  bool sawLatin1Supplement = false;
-        int value = bytes[i] & 0xFF;
+  bool sawUTF8Start = false;
-
+  bool lastWasPossibleDoubleByteStart = false;
-        // UTF-8 stuff
+  for (int i = 0;
-        if (value >= 0x80 && value <= 0xBF) {
+       i < length && (canBeISO88591 || canBeShiftJIS || canBeUTF8);
-          if (utf8BytesLeft > 0) {
+       i++) {
-            utf8BytesLeft--;
+    int value = bytes[i] & 0xFF;
-          }
+
-        } else {
+    // UTF-8 stuff
-          if (utf8BytesLeft > 0) {
+    if (value >= 0x80 && value <= 0xBF) {
-            canBeUTF8 = false;
+      if (utf8BytesLeft > 0) {
-          }
+        utf8BytesLeft--;
          if (value >= 0xC0 && value <= 0xFD) {
            sawUTF8Start = true;
            int valueCopy = value;
            while ((valueCopy & 0x40) != 0) {
              utf8BytesLeft++;
              valueCopy <<= 1;
            }
          }
        }
        // Shift_JIS stuff
        if (value >= 0xA1 && value <= 0xDF) {
          // count the number of characters that might be a Shift_JIS single-byte Katakana character
          if (!lastWasPossibleDoubleByteStart) {
            maybeSingleByteKatakanaCount++;
          }
        }
        if (!lastWasPossibleDoubleByteStart &&
            ((value >= 0xF0 && value <= 0xFF) || value == 0x80 || value == 0xA0)) {
          canBeShiftJIS = false;
        }
        if (((value >= 0x81 && value <= 0x9F) || (value >= 0xE0 && value <= 0xEF))) {
          // These start double-byte characters in Shift_JIS. Let's see if it's followed by a valid
          // second byte.
          if (lastWasPossibleDoubleByteStart) {
            // If we just checked this and the last byte for being a valid double-byte
            // char, don't check starting on this byte. If this and the last byte
            // formed a valid pair, then this shouldn't be checked to see if it starts
            // a double byte pair of course.
            lastWasPossibleDoubleByteStart = false;
          } else {
            // ... otherwise do check to see if this plus the next byte form a valid
            // double byte pair encoding a character.
            lastWasPossibleDoubleByteStart = true;
            if (i >= length - 1) {
              canBeShiftJIS = false;
            } else {
              int nextValue = bytes[i + 1] & 0xFF;
              if (nextValue < 0x40 || nextValue > 0xFC) {
                canBeShiftJIS = false;
              } else {
                maybeDoubleByteCount++;
              }
              // There is some conflicting information out there about which bytes can follow which in
              // double-byte Shift_JIS characters. The rule above seems to be the one that matches practice.
            }
          }
        } else {
          lastWasPossibleDoubleByteStart = false;
        }
      }
    } else {
      if (utf8BytesLeft > 0) {
        canBeUTF8 = false;
      }
-
+      if (value >= 0xC0 && value <= 0xFD) {
-      // Easy -- if assuming Shift_JIS and no evidence it can't be, done
+        sawUTF8Start = true;
-      if (canBeShiftJIS && ASSUME_SHIFT_JIS) {
+        int valueCopy = value;
-        return SHIFT_JIS;
+        while ((valueCopy & 0x40) != 0) {
          utf8BytesLeft++;
          valueCopy <<= 1;
        }
      }
      if (canBeUTF8 && sawUTF8Start) {
        return UTF8;
      }
      // Distinguishing Shift_JIS and ISO-8859-1 can be a little tough. The crude heuristic is:
      // - If we saw
      //   - at least 3 bytes that starts a double-byte value (bytes that are rare in ISO-8859-1), or
      //   - over 5% of bytes could be single-byte Katakana (also rare in ISO-8859-1),
      // - and, saw no sequences that are invalid in Shift_JIS, then we conclude Shift_JIS
      if (canBeShiftJIS && (maybeDoubleByteCount >= 3 || 20 * maybeSingleByteKatakanaCount > length)) {
        return SHIFT_JIS;
      }
      // Otherwise, we default to ISO-8859-1 unless we know it can't be
      if (!sawLatin1Supplement && canBeISO88591) {
        return ISO88591;
      }
      // Otherwise, we take a wild guess with platform encoding
      return PLATFORM_DEFAULT_ENCODING;
    }
-    string DecodedBitStreamParser::decode(ArrayRef<unsigned char> bytes, Version *version) {
+    // Shift_JIS stuff
-      string result;
+
-      Ref<BitSource> bits(new BitSource(bytes));
+    if (value >= 0xA1 && value <= 0xDF) {
-      Mode *mode = &Mode::TERMINATOR;
+      // count the number of characters that might be a Shift_JIS single-byte Katakana character
-      do {
+      if (!lastWasPossibleDoubleByteStart) {
-        // While still another segment to read...
+        maybeSingleByteKatakanaCount++;
-        if (bits->available() < 4) {
+      }
-          // OK, assume we're done. Really, a TERMINATOR mode should have been recorded here
+    }
-          mode = &Mode::TERMINATOR;
+    if (!lastWasPossibleDoubleByteStart &&
        ((value >= 0xF0 && value <= 0xFF) || value == 0x80 || value == 0xA0)) {
      canBeShiftJIS = false;
    }
    if (((value >= 0x81 && value <= 0x9F) || (value >= 0xE0 && value <= 0xEF))) {
      // These start double-byte characters in Shift_JIS. Let's see if it's followed by a valid
      // second byte.
      if (lastWasPossibleDoubleByteStart) {
        // If we just checked this and the last byte for being a valid double-byte
        // char, don't check starting on this byte. If this and the last byte
        // formed a valid pair, then this shouldn't be checked to see if it starts
        // a double byte pair of course.
        lastWasPossibleDoubleByteStart = false;
      } else {
        // ... otherwise do check to see if this plus the next byte form a valid
        // double byte pair encoding a character.
        lastWasPossibleDoubleByteStart = true;
        if (i >= length - 1) {
          canBeShiftJIS = false;
        } else {
-          mode = &Mode::forBits(bits->readBits(4)); // mode is encoded by 4 bits
+          int nextValue = bytes[i + 1] & 0xFF;
-        }
+          if (nextValue < 0x40 || nextValue > 0xFC) {
-        if (mode != &Mode::TERMINATOR) {
+            canBeShiftJIS = false;
          // How many characters will follow, encoded in this mode?
          int count = bits->readBits(mode->getCharacterCountBits(version));
          if (mode == &Mode::NUMERIC) {
            decodeNumericSegment(bits, result, count);
          } else if (mode == &Mode::ALPHANUMERIC) {
            decodeAlphanumericSegment(bits, result, count);
          } else if (mode == &Mode::BYTE) {
            decodeByteSegment(bits, result, count);
          } else if (mode == &Mode::KANJI) {
            decodeKanjiSegment(bits, result, count);
          } else {
-            throw ReaderException("Unsupported mode indicator");
+            maybeDoubleByteCount++;
          }
          // There is some conflicting information out there about which bytes can follow which in
          // double-byte Shift_JIS characters. The rule above seems to be the one that matches practice.
        }
-      } while (mode != &Mode::TERMINATOR);
+      }
-      return result;
+    } else {
      lastWasPossibleDoubleByteStart = false;
    }
  }
  if (utf8BytesLeft > 0) {
    canBeUTF8 = false;
  }
  // Easy -- if assuming Shift_JIS and no evidence it can't be, done
  if (canBeShiftJIS && ASSUME_SHIFT_JIS) {
    return SHIFT_JIS;
  }
  if (canBeUTF8 && sawUTF8Start) {
    return UTF8;
  }
  // Distinguishing Shift_JIS and ISO-8859-1 can be a little tough. The crude heuristic is:
  // - If we saw
  //   - at least 3 bytes that starts a double-byte value (bytes that are rare in ISO-8859-1), or
  //   - over 5% of bytes could be single-byte Katakana (also rare in ISO-8859-1),
  // - and, saw no sequences that are invalid in Shift_JIS, then we conclude Shift_JIS
  if (canBeShiftJIS && (maybeDoubleByteCount >= 3 || 20 * maybeSingleByteKatakanaCount > length)) {
    return SHIFT_JIS;
  }
  // Otherwise, we default to ISO-8859-1 unless we know it can't be
  if (!sawLatin1Supplement && canBeISO88591) {
    return ISO88591;
  }
  // Otherwise, we take a wild guess with platform encoding
  return PLATFORM_DEFAULT_ENCODING;
 }
 string DecodedBitStreamParser::decode(ArrayRef<unsigned char> bytes, Version *version) {
  string result;
  Ref<BitSource> bits(new BitSource(bytes));
  Mode *mode = &Mode::TERMINATOR;
  do {
    // While still another segment to read...
    if (bits->available() < 4) {
      // OK, assume we're done. Really, a TERMINATOR mode should have been recorded here
      mode = &Mode::TERMINATOR;
    } else {
      mode = &Mode::forBits(bits->readBits(4)); // mode is encoded by 4 bits
    }
    if (mode != &Mode::TERMINATOR) {
      // How many characters will follow, encoded in this mode?
      int count = bits->readBits(mode->getCharacterCountBits(version));
      if (mode == &Mode::NUMERIC) {
        decodeNumericSegment(bits, result, count);
      } else if (mode == &Mode::ALPHANUMERIC) {
        decodeAlphanumericSegment(bits, result, count);
      } else if (mode == &Mode::BYTE) {
        decodeByteSegment(bits, result, count);
      } else if (mode == &Mode::KANJI) {
        decodeKanjiSegment(bits, result, count);
      } else {
        throw ReaderException("Unsupported mode indicator");
      }
    }
  } while (mode != &Mode::TERMINATOR);
  return result;
 }