Be more stringent on email validation rules (#1870)
Some checks failed
CodeQL / Analyze (java) (push) Has been cancelled
Test Java 17 / build (push) Has been cancelled
Test Java 21 / build (push) Has been cancelled
Test Java 8 / build (push) Has been cancelled

* be more stringent on email validation rules
add UT to validate various email formats

* fix indentation

* fix failing UT case

* refine regex for email's domain:
* only letters in the last part
* no part starting or ending with a - (- allowed inside the part)

add UT to cover such cases
This commit is contained in:
François Tiercelin 2024-10-19 02:38:56 +01:00 committed by GitHub
parent 9598db91c7
commit e750aca9e5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 32 additions and 4 deletions

View file

@ -29,8 +29,10 @@ import java.util.regex.Pattern;
*/
public final class EmailDoCoMoResultParser extends AbstractDoCoMoResultParser {
private static final Pattern ATEXT_ALPHANUMERIC = Pattern.compile("[a-zA-Z0-9@.!#$%&'*+\\-/=?^_`{|}~]+");
private static final String EMAIL_LOCAL = "[^:]+";
private static final String EMAIL_DOMAIN = "([0-9a-zA-Z]+[0-9a-zA-Z\\-]+[0-9a-zA-Z]+\\.)+[a-zA-Z]{2,}";
private static final Pattern EMAIL = Pattern.compile("^" + EMAIL_LOCAL + "@" + EMAIL_DOMAIN + "$");
@Override
public EmailAddressParsedResult parse(Result result) {
String rawText = getMassagedText(result);
@ -58,7 +60,7 @@ public final class EmailDoCoMoResultParser extends AbstractDoCoMoResultParser {
* in a barcode, not "judge" it.
*/
static boolean isBasicallyValidEmailAddress(String email) {
return email != null && ATEXT_ALPHANUMERIC.matcher(email).matches() && email.indexOf('@') >= 0;
return email != null && EMAIL.matcher(email).matches();
}
}

View file

@ -28,12 +28,37 @@ import org.junit.Test;
*/
public final class EmailAddressParsedResultTestCase extends Assert {
@Test
public void testEmailAddresses() {
assertFalse(EmailDoCoMoResultParser.isBasicallyValidEmailAddress(null));
assertFalse(EmailDoCoMoResultParser.isBasicallyValidEmailAddress(""));
assertFalse(EmailDoCoMoResultParser.isBasicallyValidEmailAddress("123.365.com"));
assertFalse(EmailDoCoMoResultParser.isBasicallyValidEmailAddress("abc.def.com"));
assertFalse(EmailDoCoMoResultParser.isBasicallyValidEmailAddress("123@abcd.c"));
assertFalse(EmailDoCoMoResultParser.isBasicallyValidEmailAddress("123@abcd"));
assertFalse(EmailDoCoMoResultParser.isBasicallyValidEmailAddress("123@ab,cd.com"));
assertFalse(EmailDoCoMoResultParser.isBasicallyValidEmailAddress("123@ab#cd.com"));
assertFalse(EmailDoCoMoResultParser.isBasicallyValidEmailAddress("123@ab!#cd.com"));
assertFalse(EmailDoCoMoResultParser.isBasicallyValidEmailAddress("123@ab_cd.com"));
assertFalse(EmailDoCoMoResultParser.isBasicallyValidEmailAddress("123@-abcd.com"));
assertFalse(EmailDoCoMoResultParser.isBasicallyValidEmailAddress("123@abcd-.com"));
assertFalse(EmailDoCoMoResultParser.isBasicallyValidEmailAddress("123@abcd.c-m"));
assertTrue(EmailDoCoMoResultParser.isBasicallyValidEmailAddress("123@abcd.com"));
assertTrue(EmailDoCoMoResultParser.isBasicallyValidEmailAddress("123@ab-cd.com"));
assertTrue(EmailDoCoMoResultParser.isBasicallyValidEmailAddress("abc.456@ab-cd.com"));
assertTrue(EmailDoCoMoResultParser.isBasicallyValidEmailAddress("abc.456@ab-cd.BB-EZ-12.com"));
assertTrue(EmailDoCoMoResultParser.isBasicallyValidEmailAddress("建設省.456@ab-cd.com"));
assertTrue(EmailDoCoMoResultParser.isBasicallyValidEmailAddress("abc.Z456@ab-Cd9Z.co"));
assertTrue(EmailDoCoMoResultParser.isBasicallyValidEmailAddress("建設省.aZ456@Ab-cd9Z.co"));
}
@Test
public void testEmailAddress() {
doTest("srowen@example.org", "srowen@example.org", null, null);
doTest("mailto:srowen@example.org", "srowen@example.org", null, null);
}
@Test
public void testTos() {
doTest("mailto:srowen@example.org,bob@example.org",

View file

@ -92,7 +92,8 @@ public final class ParsedReaderResultTestCase extends Assert {
doTestResult("srowen@example.org", "srowen@example.org", ParsedResultType.EMAIL_ADDRESS);
doTestResult("mailto:srowen@example.org", "srowen@example.org", ParsedResultType.EMAIL_ADDRESS);
doTestResult("MAILTO:srowen@example.org", "srowen@example.org", ParsedResultType.EMAIL_ADDRESS);
doTestResult("srowen@example", "srowen@example", ParsedResultType.EMAIL_ADDRESS);
doTestResult("srowen@example.com", "srowen@example.com", ParsedResultType.EMAIL_ADDRESS);
doTestResult("srowen@example", "srowen@example", ParsedResultType.TEXT);
doTestResult("srowen", "srowen", ParsedResultType.TEXT);
doTestResult("Let's meet @ 2", "Let's meet @ 2", ParsedResultType.TEXT);
}