From 4af95ab471169d6b2064efeb621d0763ceb08f01 Mon Sep 17 00:00:00 2001 From: srowen Date: Fri, 23 Dec 2011 15:20:18 +0000 Subject: [PATCH] Use better regex-based test for URI git-svn-id: https://zxing.googlecode.com/svn/trunk@2098 59b500cc-1b3d-0410-9834-0bbf25fbcc57 --- .../zxing/client/result/URIResultParser.java | 66 ++++++------------- .../result/ParsedReaderResultTestCase.java | 7 +- .../result/URIParsedResultTestCase.java | 1 + 3 files changed, 24 insertions(+), 50 deletions(-) diff --git a/core/src/com/google/zxing/client/result/URIResultParser.java b/core/src/com/google/zxing/client/result/URIResultParser.java index b81fec7db..0543f3ac7 100644 --- a/core/src/com/google/zxing/client/result/URIResultParser.java +++ b/core/src/com/google/zxing/client/result/URIResultParser.java @@ -18,6 +18,9 @@ package com.google.zxing.client.result; import com.google.zxing.Result; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + /** * Tries to parse results that are a URI of some kind. * @@ -25,6 +28,17 @@ import com.google.zxing.Result; */ public final class URIResultParser extends ResultParser { + private static final String PATTERN_END = + "(:\\d{1,5})?" + // maybe port + "(/|\\?|$)"; // query, path or nothing + private static final Pattern URL_WITH_PROTOCOL_PATTERN = Pattern.compile( + "[a-zA-Z0-9]{2,}://" + // protocol + "[a-zA-Z0-9\\-]{2,}(\\.[a-zA-Z0-9\\-]{2,})*" + // host name elements + PATTERN_END); + private static final Pattern URL_WITHOUT_PROTOCOL_PATTERN = Pattern.compile( + "[a-zA-Z0-9\\-]{2,}(\\.[a-zA-Z0-9\\-]{2,})+" + // host name elements + PATTERN_END); + @Override public URIParsedResult parse(Result result) { String rawText = result.getText(); @@ -33,56 +47,16 @@ public final class URIResultParser extends ResultParser { rawText = rawText.substring(4); } rawText = rawText.trim(); - if (!isBasicallyValidURI(rawText)) { - return null; - } - return new URIParsedResult(rawText, null); + return isBasicallyValidURI(rawText) ? new URIParsedResult(rawText, null) : null; } - /** - * Determines whether a string is not obviously not a URI. This implements crude checks; this class does not - * intend to strictly check URIs as its only function is to represent what is in a barcode, but, it does - * need to know when a string is obviously not a URI. - */ static boolean isBasicallyValidURI(CharSequence uri) { - if (uri == null) { - return false; + Matcher m = URL_WITH_PROTOCOL_PATTERN.matcher(uri); + if (m.find() && m.start() == 0) { // match at start only + return true; } - int period = -1; - int colon = -1; - int length = uri.length(); - for (int i = length - 1; i >= 0; i--) { - char c = uri.charAt(i); - if (c <= ' ') { // covers space, newline, and more - return false; - } else if (c == '.') { - period = i; - } else if (c == ':') { - colon = i; - } - } - // Look for period in a domain but followed by at least a two-char TLD - // Forget strings that don't have a valid-looking protocol - if (period >= uri.length() - 2 || (period <= 0 && colon <= 0)) { - return false; - } - if (colon >= 0) { - if (period < 0 || period > colon) { - // colon ends the protocol - if (!isSubstringOfAlphaNumeric(uri, 0, colon)) { - return false; - } - } else { - // colon starts the port; crudely look for at least two numbers - if (colon >= uri.length() - 2) { - return false; - } - if (!isSubstringOfDigits(uri, colon + 1, 2)) { - return false; - } - } - } - return true; + m = URL_WITHOUT_PROTOCOL_PATTERN.matcher(uri); + return m.find() && m.start() == 0; } } \ No newline at end of file diff --git a/core/test/src/com/google/zxing/client/result/ParsedReaderResultTestCase.java b/core/test/src/com/google/zxing/client/result/ParsedReaderResultTestCase.java index a2201a7e7..3bfe8cf45 100644 --- a/core/test/src/com/google/zxing/client/result/ParsedReaderResultTestCase.java +++ b/core/test/src/com/google/zxing/client/result/ParsedReaderResultTestCase.java @@ -226,10 +226,9 @@ public final class ParsedReaderResultTestCase extends Assert { // Make sure illegal entries without newlines don't crash doTestResult( "BEGIN:VEVENTSUMMARY:EventDTSTART:20081030T122030ZDTEND:20081030T132030ZEND:VEVENT", - "begin:VEVENTSUMMARY:EventDTSTART:20081030T122030ZDTEND:20081030T132030ZEND:VEVENT", - ParsedResultType.URI); - // See above note on why this is URI - doTestResult("BEGIN:VEVENT", "begin:VEVENT", ParsedResultType.URI); + "BEGIN:VEVENTSUMMARY:EventDTSTART:20081030T122030ZDTEND:20081030T132030ZEND:VEVENT", + ParsedResultType.TEXT); + doTestResult("BEGIN:VEVENT", "BEGIN:VEVENT", ParsedResultType.TEXT); } @Test diff --git a/core/test/src/com/google/zxing/client/result/URIParsedResultTestCase.java b/core/test/src/com/google/zxing/client/result/URIParsedResultTestCase.java index 48fad27c3..a38adbfe9 100644 --- a/core/test/src/com/google/zxing/client/result/URIParsedResultTestCase.java +++ b/core/test/src/com/google/zxing/client/result/URIParsedResultTestCase.java @@ -55,6 +55,7 @@ public final class URIParsedResultTestCase extends Assert { doTestNotUri("google.c"); doTestNotUri(".com"); doTestNotUri(":80/"); + doTestNotUri("ABC,20.3,AB,AD"); } @Test