- Updates import email regex to use standard HTML5 regex

- Improves email detection/filtering logic
This commit is contained in:
Vlad
2016-05-25 10:40:29 -04:00
parent 0e24174373
commit da147047ec
2 changed files with 21 additions and 20 deletions

View File

@@ -304,28 +304,28 @@ define(
advancedOptionDelimiter = '',
advancedOptionNewline = '',
advancedOptionComments = false,
// trim spaces, commas, periods,
// single/double quotes and convert to lowercase
// trim spaces, commas, periods,
// single/double quotes and convert to lowercase
detectAndCleanupEmail = function (email) {
email = email.toLowerCase();
var test,
cleanEmail =
email
// left/right trim spaces, punctuation (e.g., " 'email@email.com'; ")
// right trim non-printable characters (e.g., "email@email.com<EFBFBD>")
.replace(/^["';.,\s]+|[^\x20-\x7E]+$|["';.,_\s]+$/g, '')
// remove spaces (e.g., "email @ email . com")
// remove urlencoded characters
.replace(/\s+|%\d+|,+/g, '')
.toLowerCase();
// detect e-mails that will otherwise be rejected by ^email_regex$
var test;
// decode HTML entities
email = jQuery('<div />').html(email).text();
email = email
.toLowerCase()
// left/right trim spaces, punctuation (e.g., " 'email@email.com'; ")
// right trim non-printable characters (e.g., "email@email.com<6F>")
.replace(/^["';.,\s]+|[^\x20-\x7E]+$|["';.,_\s]+$/g, '')
// remove spaces (e.g., "email @ email . com")
// remove urlencoded characters
.replace(/\s+|%\d+|,+/g, '');
// detect e-mails that will be otherwise rejected by email regex
if (test = /<(.*?)>/.exec(email)) {
// is email inside angle brackets (e.g., 'some@email.com <some@email.com>')?
return test[1].trim();
// is the email inside angle brackets (e.g., 'some@email.com <some@email.com>')?
email = test[1].trim();
}
else if (test = /mailto:(?:\s+)?(.*)/.exec(email)) {
// is email in 'mailto:email' format?
return test[1].trim();
if (test = /mailto:(?:\s+)?(.*)/.exec(email)) {
// is the email in 'mailto:email' format?
email = test[1].trim();
}
return email;
};