Use better regex-based test for URI

git-svn-id: https://zxing.googlecode.com/svn/trunk@2098 59b500cc-1b3d-0410-9834-0bbf25fbcc57
This commit is contained in:
srowen 2011-12-23 15:20:18 +00:00
parent c77baf34a8
commit 4af95ab471
3 changed files with 24 additions and 50 deletions

View file

@ -18,6 +18,9 @@ package com.google.zxing.client.result;
import com.google.zxing.Result;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Tries to parse results that are a URI of some kind.
*
@ -25,6 +28,17 @@ import com.google.zxing.Result;
*/
public final class URIResultParser extends ResultParser {
private static final String PATTERN_END =
"(:\\d{1,5})?" + // maybe port
"(/|\\?|$)"; // query, path or nothing
private static final Pattern URL_WITH_PROTOCOL_PATTERN = Pattern.compile(
"[a-zA-Z0-9]{2,}://" + // protocol
"[a-zA-Z0-9\\-]{2,}(\\.[a-zA-Z0-9\\-]{2,})*" + // host name elements
PATTERN_END);
private static final Pattern URL_WITHOUT_PROTOCOL_PATTERN = Pattern.compile(
"[a-zA-Z0-9\\-]{2,}(\\.[a-zA-Z0-9\\-]{2,})+" + // host name elements
PATTERN_END);
@Override
public URIParsedResult parse(Result result) {
String rawText = result.getText();
@ -33,56 +47,16 @@ public final class URIResultParser extends ResultParser {
rawText = rawText.substring(4);
}
rawText = rawText.trim();
if (!isBasicallyValidURI(rawText)) {
return null;
}
return new URIParsedResult(rawText, null);
return isBasicallyValidURI(rawText) ? new URIParsedResult(rawText, null) : null;
}
/**
* Determines whether a string is not obviously not a URI. This implements crude checks; this class does not
* intend to strictly check URIs as its only function is to represent what is in a barcode, but, it does
* need to know when a string is obviously not a URI.
*/
static boolean isBasicallyValidURI(CharSequence uri) {
if (uri == null) {
return false;
Matcher m = URL_WITH_PROTOCOL_PATTERN.matcher(uri);
if (m.find() && m.start() == 0) { // match at start only
return true;
}
int period = -1;
int colon = -1;
int length = uri.length();
for (int i = length - 1; i >= 0; i--) {
char c = uri.charAt(i);
if (c <= ' ') { // covers space, newline, and more
return false;
} else if (c == '.') {
period = i;
} else if (c == ':') {
colon = i;
}
}
// Look for period in a domain but followed by at least a two-char TLD
// Forget strings that don't have a valid-looking protocol
if (period >= uri.length() - 2 || (period <= 0 && colon <= 0)) {
return false;
}
if (colon >= 0) {
if (period < 0 || period > colon) {
// colon ends the protocol
if (!isSubstringOfAlphaNumeric(uri, 0, colon)) {
return false;
}
} else {
// colon starts the port; crudely look for at least two numbers
if (colon >= uri.length() - 2) {
return false;
}
if (!isSubstringOfDigits(uri, colon + 1, 2)) {
return false;
}
}
}
return true;
m = URL_WITHOUT_PROTOCOL_PATTERN.matcher(uri);
return m.find() && m.start() == 0;
}
}

View file

@ -226,10 +226,9 @@ public final class ParsedReaderResultTestCase extends Assert {
// Make sure illegal entries without newlines don't crash
doTestResult(
"BEGIN:VEVENTSUMMARY:EventDTSTART:20081030T122030ZDTEND:20081030T132030ZEND:VEVENT",
"begin:VEVENTSUMMARY:EventDTSTART:20081030T122030ZDTEND:20081030T132030ZEND:VEVENT",
ParsedResultType.URI);
// See above note on why this is URI
doTestResult("BEGIN:VEVENT", "begin:VEVENT", ParsedResultType.URI);
"BEGIN:VEVENTSUMMARY:EventDTSTART:20081030T122030ZDTEND:20081030T132030ZEND:VEVENT",
ParsedResultType.TEXT);
doTestResult("BEGIN:VEVENT", "BEGIN:VEVENT", ParsedResultType.TEXT);
}
@Test

View file

@ -55,6 +55,7 @@ public final class URIParsedResultTestCase extends Assert {
doTestNotUri("google.c");
doTestNotUri(".com");
doTestNotUri(":80/");
doTestNotUri("ABC,20.3,AB,AD");
}
@Test