Move data sanitisation to separate file
[MAILPOET-1794]
This commit is contained in:
@@ -5,6 +5,7 @@ import MailPoet from 'mailpoet';
|
|||||||
import Handlebars from 'handlebars';
|
import Handlebars from 'handlebars';
|
||||||
import Papa from 'papaparse';
|
import Papa from 'papaparse';
|
||||||
import Moment from 'moment';
|
import Moment from 'moment';
|
||||||
|
import sanitizeCSVData from './sanitize_csv_data.jsx';
|
||||||
|
|
||||||
jQuery(document).ready(() => {
|
jQuery(document).ready(() => {
|
||||||
if (!jQuery('#mailpoet_subscribers_import').length) {
|
if (!jQuery('#mailpoet_subscribers_import').length) {
|
||||||
@@ -96,138 +97,19 @@ jQuery(document).ready(() => {
|
|||||||
element.closest('table a').addClass(disabled);
|
element.closest('table a').addClass(disabled);
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseCSV(isFile) {
|
function papaParserConfig(isFile) {
|
||||||
let processedSubscribers = [];
|
|
||||||
const parsedEmails = [];
|
|
||||||
const duplicateEmails = [];
|
|
||||||
const invalidEmails = [];
|
|
||||||
let emailColumnPosition = null;
|
|
||||||
let columnCount = null;
|
|
||||||
let isHeaderFound = false;
|
|
||||||
const advancedOptionHeader = true;
|
|
||||||
const advancedOptionDelimiter = '';
|
|
||||||
const advancedOptionNewline = '';
|
|
||||||
const advancedOptionComments = false;
|
|
||||||
// trim spaces, commas, periods,
|
|
||||||
// single/double quotes and convert to lowercase
|
|
||||||
const detectAndCleanupEmail = (emailString) => {
|
|
||||||
let test;
|
|
||||||
// decode HTML entities
|
|
||||||
let email = jQuery('<div />').html(emailString).text();
|
|
||||||
email = email
|
|
||||||
.toLowerCase()
|
|
||||||
// left/right trim spaces, punctuation (e.g., " 'email@email.com'; ")
|
|
||||||
// right trim non-printable characters (e.g., "email@email.com<6F>")
|
|
||||||
.replace(/^["';.,\s]+|[^\x20-\x7E]+$|["';.,_\s]+$/g, '')
|
|
||||||
// remove spaces (e.g., "email @ email . com")
|
|
||||||
// remove urlencoded characters
|
|
||||||
.replace(/\s+|%\d+|,+/g, '');
|
|
||||||
// detect e-mails that will be otherwise rejected by email regex
|
|
||||||
test = /<(.*?)>/.exec(email);
|
|
||||||
if (test) {
|
|
||||||
// is the email inside angle brackets (e.g., 'some@email.com <some@email.com>')?
|
|
||||||
email = test[1].trim();
|
|
||||||
}
|
|
||||||
test = /mailto:(?:\s+)?(.*)/.exec(email);
|
|
||||||
if (test) {
|
|
||||||
// is the email in 'mailto:email' format?
|
|
||||||
email = test[1].trim();
|
|
||||||
}
|
|
||||||
|
|
||||||
// validate email
|
|
||||||
if (!window.mailpoet_email_regex.test(email)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return email;
|
|
||||||
};
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
skipEmptyLines: true,
|
skipEmptyLines: true,
|
||||||
delimiter: advancedOptionDelimiter,
|
|
||||||
newline: advancedOptionNewline,
|
|
||||||
comments: advancedOptionComments,
|
|
||||||
error() {
|
error() {
|
||||||
MailPoet.Notice.hide();
|
MailPoet.Notice.hide();
|
||||||
MailPoet.Notice.error(MailPoet.I18n.t('dataProcessingError'));
|
MailPoet.Notice.error(MailPoet.I18n.t('dataProcessingError'));
|
||||||
},
|
},
|
||||||
complete(CSV) {
|
complete(CSV) {
|
||||||
let email;
|
const sanitizedData = sanitizeCSVData(CSV.data);
|
||||||
let emailAddress;
|
if (sanitizedData) {
|
||||||
let rowData;
|
|
||||||
let rowColumnCount;
|
|
||||||
let errorNotice;
|
|
||||||
Object.keys(CSV.data).forEach((rowCount) => {
|
|
||||||
rowData = CSV.data[rowCount].map(el => el.trim());
|
|
||||||
rowColumnCount = rowData.length;
|
|
||||||
// set the number of row elements based on the first non-empty row
|
|
||||||
if (columnCount === null) {
|
|
||||||
columnCount = rowColumnCount;
|
|
||||||
}
|
|
||||||
// Process the row with the following assumptions:
|
|
||||||
// 1. Each row should contain the same number of elements
|
|
||||||
// 2. There should be at least 1 valid (as per HTML5 e-mail regex)
|
|
||||||
// e-mail address on each row EXCEPT when the header option is set to true
|
|
||||||
// 3. Duplicate addresses are skipped
|
|
||||||
if (rowColumnCount === columnCount) {
|
|
||||||
// determine position of email address inside an array; this is
|
|
||||||
// done once and then email regex is run just on that element for each row
|
|
||||||
if (emailColumnPosition === null) {
|
|
||||||
Object.keys(rowData).forEach((column) => {
|
|
||||||
emailAddress = detectAndCleanupEmail(rowData[column]);
|
|
||||||
if (emailColumnPosition === null
|
|
||||||
&& window.mailpoet_email_regex.test(emailAddress)) {
|
|
||||||
emailColumnPosition = column;
|
|
||||||
// add current e-mail to an object index
|
|
||||||
parsedEmails[emailAddress] = true;
|
|
||||||
rowData[column] = emailAddress;
|
|
||||||
processedSubscribers[emailAddress] = rowData;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
if (emailColumnPosition === null
|
|
||||||
&& advancedOptionHeader
|
|
||||||
&& parseInt(rowCount, 10) === 0) {
|
|
||||||
isHeaderFound = true;
|
|
||||||
processedSubscribers[0] = rowData;
|
|
||||||
}
|
|
||||||
} else if (rowData[emailColumnPosition] !== '') {
|
|
||||||
email = detectAndCleanupEmail(rowData[emailColumnPosition]);
|
|
||||||
if (_.has(parsedEmails, email)) {
|
|
||||||
duplicateEmails.push(email);
|
|
||||||
} else if (!window.mailpoet_email_regex.test(email)) {
|
|
||||||
invalidEmails.push(rowData[emailColumnPosition]);
|
|
||||||
} else {
|
|
||||||
// if we haven't yet processed this e-mail and it passed
|
|
||||||
// the regex test, then process the row
|
|
||||||
parsedEmails[email] = true;
|
|
||||||
rowData[emailColumnPosition] = email;
|
|
||||||
processedSubscribers[email] = rowData;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
// reindex array to avoid non-numeric indices
|
|
||||||
processedSubscribers = _.values(processedSubscribers);
|
|
||||||
// if the header options is set, there should be at least
|
|
||||||
// 2 data rows, otherwise at least 1 data row
|
|
||||||
if (
|
|
||||||
processedSubscribers
|
|
||||||
&& (
|
|
||||||
(isHeaderFound && processedSubscribers.length >= 2)
|
|
||||||
|| (!isHeaderFound && processedSubscribers.length >= 1)
|
|
||||||
)
|
|
||||||
) {
|
|
||||||
// since we assume that the header line is always present, we need
|
// since we assume that the header line is always present, we need
|
||||||
// to detect the header by checking if it contains a valid e-mail address
|
// to detect the header by checking if it contains a valid e-mail address
|
||||||
window.importData.step1 = {
|
window.importData.step1 = sanitizedData;
|
||||||
header: (!window.mailpoet_email_regex.test(
|
|
||||||
processedSubscribers[0][emailColumnPosition]
|
|
||||||
)
|
|
||||||
) ? processedSubscribers.shift() : null,
|
|
||||||
subscribers: processedSubscribers,
|
|
||||||
subscribersCount: processedSubscribers.length,
|
|
||||||
duplicate: duplicateEmails,
|
|
||||||
invalid: invalidEmails,
|
|
||||||
};
|
|
||||||
MailPoet.trackEvent('Subscribers import started', {
|
MailPoet.trackEvent('Subscribers import started', {
|
||||||
source: isFile ? 'file upload' : 'pasted data',
|
source: isFile ? 'file upload' : 'pasted data',
|
||||||
'MailPoet Free version': window.mailpoet_version,
|
'MailPoet Free version': window.mailpoet_version,
|
||||||
@@ -235,7 +117,7 @@ jQuery(document).ready(() => {
|
|||||||
router.navigate('step2', { trigger: true });
|
router.navigate('step2', { trigger: true });
|
||||||
} else {
|
} else {
|
||||||
MailPoet.Modal.loading(false);
|
MailPoet.Modal.loading(false);
|
||||||
errorNotice = MailPoet.I18n.t('noValidRecords');
|
let errorNotice = MailPoet.I18n.t('noValidRecords');
|
||||||
errorNotice = errorNotice.replace('[link]', MailPoet.I18n.t('csvKBLink'));
|
errorNotice = errorNotice.replace('[link]', MailPoet.I18n.t('csvKBLink'));
|
||||||
errorNotice = errorNotice.replace('[/link]', '</a>');
|
errorNotice = errorNotice.replace('[/link]', '</a>');
|
||||||
MailPoet.Notice.error(errorNotice);
|
MailPoet.Notice.error(errorNotice);
|
||||||
@@ -306,7 +188,7 @@ jQuery(document).ready(() => {
|
|||||||
// delay loading indicator for 10ms or else it's just too fast :)
|
// delay loading indicator for 10ms or else it's just too fast :)
|
||||||
MailPoet.Modal.loading(true);
|
MailPoet.Modal.loading(true);
|
||||||
setTimeout(() => {
|
setTimeout(() => {
|
||||||
Papa.parse(pasteInputElement.val(), parseCSV(false));
|
Papa.parse(pasteInputElement.val(), papaParserConfig(false));
|
||||||
}, 10);
|
}, 10);
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -333,7 +215,7 @@ jQuery(document).ready(() => {
|
|||||||
MailPoet.Modal.loading(true);
|
MailPoet.Modal.loading(true);
|
||||||
setTimeout(() => {
|
setTimeout(() => {
|
||||||
uploadElement.parse({
|
uploadElement.parse({
|
||||||
config: parseCSV(true),
|
config: papaParserConfig(true),
|
||||||
});
|
});
|
||||||
}, 10);
|
}, 10);
|
||||||
}
|
}
|
||||||
|
124
assets/js/src/subscribers/importExport/sanitize_csv_data.jsx
Normal file
124
assets/js/src/subscribers/importExport/sanitize_csv_data.jsx
Normal file
@@ -0,0 +1,124 @@
|
|||||||
|
// trim spaces, commas, periods,
|
||||||
|
// single/double quotes and convert to lowercase
|
||||||
|
import jQuery from 'jquery';
|
||||||
|
import _ from 'underscore';
|
||||||
|
|
||||||
|
const detectAndCleanupEmail = (emailString) => {
|
||||||
|
let test;
|
||||||
|
// decode HTML entities
|
||||||
|
let email = jQuery('<div />').html(emailString).text();
|
||||||
|
email = email
|
||||||
|
.toLowerCase()
|
||||||
|
// left/right trim spaces, punctuation (e.g., " 'email@email.com'; ")
|
||||||
|
// right trim non-printable characters (e.g., "email@email.com<6F>")
|
||||||
|
.replace(/^["';.,\s]+|[^\x20-\x7E]+$|["';.,_\s]+$/g, '')
|
||||||
|
// remove spaces (e.g., "email @ email . com")
|
||||||
|
// remove urlencoded characters
|
||||||
|
.replace(/\s+|%\d+|,+/g, '');
|
||||||
|
// detect e-mails that will be otherwise rejected by email regex
|
||||||
|
test = /<(.*?)>/.exec(email);
|
||||||
|
if (test) {
|
||||||
|
// is the email inside angle brackets (e.g., 'some@email.com <some@email.com>')?
|
||||||
|
email = test[1].trim();
|
||||||
|
}
|
||||||
|
test = /mailto:(?:\s+)?(.*)/.exec(email);
|
||||||
|
if (test) {
|
||||||
|
// is the email in 'mailto:email' format?
|
||||||
|
email = test[1].trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
// validate email
|
||||||
|
if (!window.mailpoet_email_regex.test(email)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return email;
|
||||||
|
};
|
||||||
|
|
||||||
|
function sanitizeCSVData(csvData) {
|
||||||
|
let processedSubscribers = [];
|
||||||
|
const parsedEmails = [];
|
||||||
|
const duplicateEmails = [];
|
||||||
|
const invalidEmails = [];
|
||||||
|
let emailColumnPosition = null;
|
||||||
|
let columnCount = null;
|
||||||
|
let isHeaderFound = false;
|
||||||
|
let email;
|
||||||
|
let emailAddress;
|
||||||
|
let rowData;
|
||||||
|
let rowColumnCount;
|
||||||
|
Object.keys(csvData).forEach((rowCount) => {
|
||||||
|
rowData = csvData[rowCount].map(el => el.trim());
|
||||||
|
rowColumnCount = rowData.length;
|
||||||
|
// set the number of row elements based on the first non-empty row
|
||||||
|
if (columnCount === null) {
|
||||||
|
columnCount = rowColumnCount;
|
||||||
|
}
|
||||||
|
// Process the row with the following assumptions:
|
||||||
|
// 1. Each row should contain the same number of elements
|
||||||
|
// 2. There should be at least 1 valid (as per HTML5 e-mail regex)
|
||||||
|
// e-mail address on each row EXCEPT when the header option is set to true
|
||||||
|
// 3. Duplicate addresses are skipped
|
||||||
|
if (rowColumnCount === columnCount) {
|
||||||
|
// determine position of email address inside an array; this is
|
||||||
|
// done once and then email regex is run just on that element for each row
|
||||||
|
if (emailColumnPosition === null) {
|
||||||
|
Object.keys(rowData).forEach((column) => {
|
||||||
|
emailAddress = detectAndCleanupEmail(rowData[column]);
|
||||||
|
if (emailColumnPosition === null
|
||||||
|
&& window.mailpoet_email_regex.test(emailAddress)) {
|
||||||
|
emailColumnPosition = column;
|
||||||
|
// add current e-mail to an object index
|
||||||
|
parsedEmails[emailAddress] = true;
|
||||||
|
rowData[column] = emailAddress;
|
||||||
|
processedSubscribers[emailAddress] = rowData;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
if (emailColumnPosition === null
|
||||||
|
&& parseInt(rowCount, 10) === 0) {
|
||||||
|
isHeaderFound = true;
|
||||||
|
processedSubscribers[0] = rowData;
|
||||||
|
}
|
||||||
|
} else if (rowData[emailColumnPosition] !== '') {
|
||||||
|
email = detectAndCleanupEmail(rowData[emailColumnPosition]);
|
||||||
|
if (_.has(parsedEmails, email)) {
|
||||||
|
duplicateEmails.push(email);
|
||||||
|
} else if (!window.mailpoet_email_regex.test(email)) {
|
||||||
|
invalidEmails.push(rowData[emailColumnPosition]);
|
||||||
|
} else {
|
||||||
|
// if we haven't yet processed this e-mail and it passed
|
||||||
|
// the regex test, then process the row
|
||||||
|
parsedEmails[email] = true;
|
||||||
|
rowData[emailColumnPosition] = email;
|
||||||
|
processedSubscribers[email] = rowData;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
// reindex array to avoid non-numeric indices
|
||||||
|
processedSubscribers = _.values(processedSubscribers);
|
||||||
|
// if the header options is set, there should be at least
|
||||||
|
// 2 data rows, otherwise at least 1 data row
|
||||||
|
if (
|
||||||
|
processedSubscribers
|
||||||
|
&& (
|
||||||
|
(isHeaderFound && processedSubscribers.length >= 2)
|
||||||
|
|| (!isHeaderFound && processedSubscribers.length >= 1)
|
||||||
|
)
|
||||||
|
) {
|
||||||
|
// since we assume that the header line is always present, we need
|
||||||
|
// to detect the header by checking if it contains a valid e-mail address
|
||||||
|
return {
|
||||||
|
header: (!window.mailpoet_email_regex.test(
|
||||||
|
processedSubscribers[0][emailColumnPosition]
|
||||||
|
)
|
||||||
|
) ? processedSubscribers.shift() : null,
|
||||||
|
subscribers: processedSubscribers,
|
||||||
|
subscribersCount: processedSubscribers.length,
|
||||||
|
duplicate: duplicateEmails,
|
||||||
|
invalid: invalidEmails,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export default sanitizeCSVData;
|
Reference in New Issue
Block a user