Merge pull request #524 from mailpoet/link_processing_fix

Prevents URLs in link titles from being processed when tracking is enabled
This commit is contained in:
Jonathan Labreuille
2016-06-14 15:21:18 +02:00
committed by GitHub
4 changed files with 712 additions and 259 deletions

View File

@@ -1,4 +1,10 @@
{
"repositories": [
{
"type": "vcs",
"url": "https://github.com/mailpoet/html2text"
}
],
"require": {
"php": ">=5.3.3",
"twig/twig": "1.*",
@@ -11,7 +17,7 @@
"phpseclib/phpseclib": "*",
"mtdowling/cron-expression": "^1.1",
"nesbot/carbon": "^1.21",
"soundasleep/html2text": "^0.3.0"
"soundasleep/html2text": "dev-master"
},
"require-dev": {
"codeception/codeception": "*",

897
composer.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -7,8 +7,10 @@ use MailPoet\Util\Security;
class Links {
const DATA_TAG = '[mailpoet_data]';
const HASH_LENGTH = 5;
static function extract($content) {
$extracted_links = array();
// adopted from WP's wp_extract_urls() function & modified to work on hrefs
# match href=' or href="
$regex = '#(?:href.*?=.*?)(["\']?)('
@@ -29,31 +31,60 @@ class Links {
// extract shortcodes with [link:*] format
$shortcodes = new Shortcodes();
$shortcodes = $shortcodes->extract($content, $categories = array('link'));
// extract links
preg_match_all($regex, $content, $links);
return array_merge(
array_unique($links[2]),
$shortcodes
);
$extracted_links = array_map(function($shortcode) {
return array(
'html' => $shortcode,
'link' => $shortcode
);
}, $shortcodes);
// extract urls with href="url" format
preg_match_all($regex, $content, $matched_urls);
$matched_urls_count = count($matched_urls[1]);
if($matched_urls_count) {
for($index = 0; $index <= $matched_urls_count; $index++) {
$extracted_links[] = array(
'html' => $matched_urls[0][$index],
'link' => $matched_urls[2][$index]
);
}
}
return $extracted_links;
}
static function process($content) {
$links = self::extract($content);
$extracted_links = self::extract($content);
$processed_links = array();
foreach($links as $link) {
$hash = Security::generateRandomString(5);
foreach($extracted_links as $extracted_link) {
$hash = Security::generateRandomString(self::HASH_LENGTH);
$processed_links[] = array(
'hash' => $hash,
'url' => $link
'url' => $extracted_link['link']
);
$encoded_link = sprintf(
'%s/?mailpoet&endpoint=track&action=click&data=%s-%s',
home_url(),
self::DATA_TAG,
$hash
$params = array(
'mailpoet' => '',
'endpoint' => 'track',
'action' => 'click',
'data' => self::DATA_TAG . '-' . $hash
);
$tracked_link = add_query_arg($params, home_url());
// first, replace URL in the extracted HTML source with encoded link
$tracked_link_html_source = str_replace(
$extracted_link['link'], $tracked_link,
$extracted_link['html']
);
// second, replace original extracted HTML source with tracked URL source
$content = str_replace(
$extracted_link['html'], $tracked_link_html_source, $content
);
// third, replace text version URL with tracked link: [description](url)
// regex is used to avoid replacing description URLs that are wrapped in round brackets
// i.e., <a href="http://google.com">(http://google.com)</a> => [(http://google.com)](http://tracked_link)
$regex_escaped_tracked_link = preg_quote($tracked_link, '/');
$content = preg_replace(
'/(\[' . $regex_escaped_tracked_link . '\])(\(' . $regex_escaped_tracked_link . '\))/',
'[$1](' . $tracked_link . ')',
$content
);
$link_regex = '/' . preg_quote($link, '/') . '/';
$content = preg_replace($link_regex, $encoded_link, $content);
}
return array(
$content,

View File

@@ -87,7 +87,6 @@ class Renderer {
}
function renderTextVersion($template) {
$template = mb_convert_encoding($template, 'HTML-ENTITIES', 'UTF-8');
return \Html2Text\Html2Text::convert($template);
}