mirror of
https://github.com/zxing/zxing.git
synced 2025-01-12 19:57:27 -08:00
Use better regex-based test for URI
git-svn-id: https://zxing.googlecode.com/svn/trunk@2098 59b500cc-1b3d-0410-9834-0bbf25fbcc57
This commit is contained in:
parent
c77baf34a8
commit
4af95ab471
|
@ -18,6 +18,9 @@ package com.google.zxing.client.result;
|
||||||
|
|
||||||
import com.google.zxing.Result;
|
import com.google.zxing.Result;
|
||||||
|
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tries to parse results that are a URI of some kind.
|
* Tries to parse results that are a URI of some kind.
|
||||||
*
|
*
|
||||||
|
@ -25,6 +28,17 @@ import com.google.zxing.Result;
|
||||||
*/
|
*/
|
||||||
public final class URIResultParser extends ResultParser {
|
public final class URIResultParser extends ResultParser {
|
||||||
|
|
||||||
|
private static final String PATTERN_END =
|
||||||
|
"(:\\d{1,5})?" + // maybe port
|
||||||
|
"(/|\\?|$)"; // query, path or nothing
|
||||||
|
private static final Pattern URL_WITH_PROTOCOL_PATTERN = Pattern.compile(
|
||||||
|
"[a-zA-Z0-9]{2,}://" + // protocol
|
||||||
|
"[a-zA-Z0-9\\-]{2,}(\\.[a-zA-Z0-9\\-]{2,})*" + // host name elements
|
||||||
|
PATTERN_END);
|
||||||
|
private static final Pattern URL_WITHOUT_PROTOCOL_PATTERN = Pattern.compile(
|
||||||
|
"[a-zA-Z0-9\\-]{2,}(\\.[a-zA-Z0-9\\-]{2,})+" + // host name elements
|
||||||
|
PATTERN_END);
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public URIParsedResult parse(Result result) {
|
public URIParsedResult parse(Result result) {
|
||||||
String rawText = result.getText();
|
String rawText = result.getText();
|
||||||
|
@ -33,56 +47,16 @@ public final class URIResultParser extends ResultParser {
|
||||||
rawText = rawText.substring(4);
|
rawText = rawText.substring(4);
|
||||||
}
|
}
|
||||||
rawText = rawText.trim();
|
rawText = rawText.trim();
|
||||||
if (!isBasicallyValidURI(rawText)) {
|
return isBasicallyValidURI(rawText) ? new URIParsedResult(rawText, null) : null;
|
||||||
return null;
|
|
||||||
}
|
|
||||||
return new URIParsedResult(rawText, null);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Determines whether a string is not obviously not a URI. This implements crude checks; this class does not
|
|
||||||
* intend to strictly check URIs as its only function is to represent what is in a barcode, but, it does
|
|
||||||
* need to know when a string is obviously not a URI.
|
|
||||||
*/
|
|
||||||
static boolean isBasicallyValidURI(CharSequence uri) {
|
static boolean isBasicallyValidURI(CharSequence uri) {
|
||||||
if (uri == null) {
|
Matcher m = URL_WITH_PROTOCOL_PATTERN.matcher(uri);
|
||||||
return false;
|
if (m.find() && m.start() == 0) { // match at start only
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
int period = -1;
|
m = URL_WITHOUT_PROTOCOL_PATTERN.matcher(uri);
|
||||||
int colon = -1;
|
return m.find() && m.start() == 0;
|
||||||
int length = uri.length();
|
|
||||||
for (int i = length - 1; i >= 0; i--) {
|
|
||||||
char c = uri.charAt(i);
|
|
||||||
if (c <= ' ') { // covers space, newline, and more
|
|
||||||
return false;
|
|
||||||
} else if (c == '.') {
|
|
||||||
period = i;
|
|
||||||
} else if (c == ':') {
|
|
||||||
colon = i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Look for period in a domain but followed by at least a two-char TLD
|
|
||||||
// Forget strings that don't have a valid-looking protocol
|
|
||||||
if (period >= uri.length() - 2 || (period <= 0 && colon <= 0)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (colon >= 0) {
|
|
||||||
if (period < 0 || period > colon) {
|
|
||||||
// colon ends the protocol
|
|
||||||
if (!isSubstringOfAlphaNumeric(uri, 0, colon)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// colon starts the port; crudely look for at least two numbers
|
|
||||||
if (colon >= uri.length() - 2) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (!isSubstringOfDigits(uri, colon + 1, 2)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
|
@ -226,10 +226,9 @@ public final class ParsedReaderResultTestCase extends Assert {
|
||||||
// Make sure illegal entries without newlines don't crash
|
// Make sure illegal entries without newlines don't crash
|
||||||
doTestResult(
|
doTestResult(
|
||||||
"BEGIN:VEVENTSUMMARY:EventDTSTART:20081030T122030ZDTEND:20081030T132030ZEND:VEVENT",
|
"BEGIN:VEVENTSUMMARY:EventDTSTART:20081030T122030ZDTEND:20081030T132030ZEND:VEVENT",
|
||||||
"begin:VEVENTSUMMARY:EventDTSTART:20081030T122030ZDTEND:20081030T132030ZEND:VEVENT",
|
"BEGIN:VEVENTSUMMARY:EventDTSTART:20081030T122030ZDTEND:20081030T132030ZEND:VEVENT",
|
||||||
ParsedResultType.URI);
|
ParsedResultType.TEXT);
|
||||||
// See above note on why this is URI
|
doTestResult("BEGIN:VEVENT", "BEGIN:VEVENT", ParsedResultType.TEXT);
|
||||||
doTestResult("BEGIN:VEVENT", "begin:VEVENT", ParsedResultType.URI);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
@ -55,6 +55,7 @@ public final class URIParsedResultTestCase extends Assert {
|
||||||
doTestNotUri("google.c");
|
doTestNotUri("google.c");
|
||||||
doTestNotUri(".com");
|
doTestNotUri(".com");
|
||||||
doTestNotUri(":80/");
|
doTestNotUri(":80/");
|
||||||
|
doTestNotUri("ABC,20.3,AB,AD");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
Loading…
Reference in a new issue