Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions commons-email2-jakarta/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,18 @@
<artifactId>mockito-core</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.openjdk.jmh</groupId>
<artifactId>jmh-core</artifactId>
<version>${jmh.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.openjdk.jmh</groupId>
<artifactId>jmh-generator-annprocess</artifactId>
<version>${jmh.version}</version>
<scope>test</scope>
</dependency>
</dependencies>

<properties>
Expand All @@ -92,6 +104,7 @@
<commons.jacoco.branchRatio>0.18</commons.jacoco.branchRatio>
<commons.jacoco.lineRatio>0.84</commons.jacoco.lineRatio>
<commons.jacoco.complexityRatio>0.76</commons.jacoco.complexityRatio>
<jmh.version>1.37</jmh.version>
</properties>
<build>
<resources>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,9 @@

/**
* <p>
* Small wrapper class on top of HtmlEmail which encapsulates the required logic to retrieve images that are contained in "&lt;img src=../&gt;" elements in the
* HTML code. This is done by replacing all img-src-elements with "cid:"-entries and embedding images in the email.
* Small wrapper class on top of HtmlEmail which encapsulates the required logic to retrieve images and scripts that are contained in
* "&lt;img src=../&gt;" and "&lt;script src=../&gt;" elements in the HTML code.
* This is done by replacing all the src-elements with "cid:"-entries and embedding the images/scripts in the email.
* </p>
* <p>
* For local files the class tries to either load them via an absolute path or - if available - use a relative path starting from a base directory. For files
Expand All @@ -49,10 +50,10 @@ public class ImageHtmlEmail extends HtmlEmail {
// can be arbitrary text between "IMG" and "SRC" like IDs and other things.

/** Regexp for extracting {@code <img>} tags */
public static final String REGEX_IMG_SRC = "(<[Ii][Mm][Gg]\\s*[^>]*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])";
public static final String REGEX_IMG_SRC = "(<[Ii][Mm][Gg](?=\\s)[^>]*?\\s[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+)([\"'])";

/** Regexp for extracting {@code <script>} tags */
public static final String REGEX_SCRIPT_SRC = "(<[Ss][Cc][Rr][Ii][Pp][Tt]\\s*.*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])";
public static final String REGEX_SCRIPT_SRC = "(<[Ss][Cc][Rr][Ii][Pp][Tt](?=\\s)[^>]*?\\s[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+)([\"'])";

// this pattern looks for the HTML image tag which indicates embedded images,
// the grouping is necessary to allow to replace the element with the CID
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.mail2.jakarta;

import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.infra.Blackhole;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class ImageHtmlEmailBenchmark {

private final static int MATCHES_TO_FIND = 50;

private final static String TEST_HTML = testHtml();

public final static TestHarness testHarnessCurrentRegex = new TestHarness(
ImageHtmlEmail.REGEX_IMG_SRC,
ImageHtmlEmail.REGEX_SCRIPT_SRC
);

/**
* The original regex pre EMAIL-198
*/
public final static TestHarness testHarnessOriginalRegex = new TestHarness(
"(<[Ii][Mm][Gg]\\s*[^>]*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])",
"(<[Ss][Cc][Rr][Ii][Pp][Tt]\\s*.*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])"
);

/**
* Test an alternative regex with a non-greedy url: note the {@code ([^"']+?)} capture.
* This should be slower than the current regex test case.
*/
public final static TestHarness testHarnessCurrentRegexNonGreedyUrl = new TestHarness(
"(<[Ii][Mm][Gg](?=\\s)[^>]*?\\s[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])",
"(<[Ss][Cc][Rr][Ii][Pp][Tt](?=\\s)[^>]*?\\s[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])"
);

@Benchmark
public void testCaseCurrentRegex(Blackhole blackhole) {
testHarnessCurrentRegex.runTest(blackhole);
}

@Benchmark
public void testCaseOriginalRegex(Blackhole blackhole) {
testHarnessOriginalRegex.runTest(blackhole);
}

@Benchmark
public void testCaseCurrentRegexNonGreedyUrl(Blackhole blackhole) {
testHarnessCurrentRegexNonGreedyUrl.runTest(blackhole);
}

public static class TestHarness {
private Matcher matcherImg;
private Matcher matcherScript;

public TestHarness(String patternImg, String patternScript){
matcherImg = prepareRegex(patternImg);
matcherScript = prepareRegex(patternScript);
}

public void runTest(Blackhole blackhole) {
for (int i = 0; i < MATCHES_TO_FIND; i++) {
blackhole.consume(matcherImg.find());
blackhole.consume(matcherScript.find());
}
matcherImg.reset();
matcherScript.reset();
}

public Matcher prepareRegex(String pattern) {
return Pattern.compile(pattern).matcher(TEST_HTML);
}
}

private static String testHtml() {
String longUrl = new String(new char[200]).replace("\0", "a");
String longSpace = new String(new char[100]).replace("\0", " ");
StringBuilder html = new StringBuilder();
html.append("<html><body><pre>");
html.append("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.");
for (int i = 0; i < MATCHES_TO_FIND; i++) {
html.append("<img" + longSpace + "xxx=\"no-src-attribute\">");
html.append("<script some=\"true\" other=\"1\" attributes=\"yes\" src = \"" + longUrl + "\">");
html.append("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.");
}
html.append("</pre></body></html>");
return html.toString();
}

public static void main(String[] args) throws Exception {
org.openjdk.jmh.Main.main(args);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package org.apache.commons.mail2.jakarta;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;

Expand All @@ -25,11 +26,13 @@
import java.io.InputStream;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Stream;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
Expand All @@ -45,6 +48,9 @@

import jakarta.activation.DataSource;
import jakarta.mail.internet.MimeMessage;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

class ImageHtmlEmailTest extends HtmlEmailTest {

Expand All @@ -70,6 +76,8 @@ public DataSource resolve(final String resourceLocation, final boolean isLenient

private static final URL TEST2_HTML_URL = ImageHtmlEmailTest.class.getResource("/attachments/classpathtest.html");

private static final Pattern imgSrcPattern = Pattern.compile(ImageHtmlEmail.REGEX_IMG_SRC);
private static final Pattern scriptSrcPattern = Pattern.compile(ImageHtmlEmail.REGEX_SCRIPT_SRC);
private MockImageHtmlEmailConcrete email;

private String loadUrlContent(final URL url) throws IOException {
Expand Down Expand Up @@ -196,6 +204,57 @@ void testRegex() {
assertEquals("file1", matcher.group(2));
assertTrue(matcher.find());
assertEquals("file2", matcher.group(2));

// should not match any other tags
matcher = pattern.matcher("<nomatch src=\"s\" />");
assertFalse(matcher.find());

matcher = pattern.matcher("<imgx src=\"file1\">");
assertFalse(matcher.find());

// should not match any other attribute
matcher = pattern.matcher("<img xsrc=\"file1\">");
assertFalse(matcher.find());
}

@ParameterizedTest
@MethodSource
public void testScriptRegex(String inputHtml, List<String> srcMatches) {
Matcher matcher = scriptSrcPattern.matcher(inputHtml);
for (String expectedMatch : srcMatches) {
assertTrue(matcher.find());
assertEquals(expectedMatch, matcher.group(2));
}
assertFalse(matcher.find());
}

private static Stream<Arguments> testScriptRegex() {
Stream<Arguments> argumentsStream = Stream.of(
// ensure that the regex that we use is catching the cases correctly
Arguments.of("<html><body><script src=\"s\"></script></body></html>", Arrays.asList("s")),
Arguments.of("<html><body><script blocking=\"render\" async src=\"s\"></script></body></html>", Arrays.asList("s")),
// uppercase
Arguments.of("<html><body><SCRIPT BLOCKING=\"render\" ASYNC SRC=\"s\"></script></body></html>", Arrays.asList("s")),
// matches twice
Arguments.of("<html><body><script src=\"s1\"></script><script src=\"s2\"></script></body></html>", Arrays.asList("s1", "s2")),
// what about newlines
Arguments.of("<html><body><script\n \rsrc=\"s1\"></script><script \nsrc=\"s2\"></script></body></html>", Arrays.asList("s1", "s2")),
// what about newlines and other whitespaces
Arguments.of("<html><body><script\n \rsrc = \t \"s1\" ></script><script \nsrc =\t\"s2\" ></script></body></html>", Arrays.asList("s1", "s2")),
// what about some real markup
Arguments.of("<script defer=\"\" nomodule=\"\" nonce=\"\" src=\"/jkao/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js\"></script>", Arrays.asList("/jkao/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js")),
// try with 5
Arguments.of("<html><body><script src=\"s1\"></script><script src=\"s2\"></script><script src=\"s3\"></script><script src=\"s4\"></script><script src=\"s5\"></script></body></html>", Arrays.asList("s1", "s2", "s3", "s4", "s5")),
// try with invalid scripts
Arguments.of("<script src=\"s\" />", Arrays.asList("s")),
Arguments.of("<script src=\"s\">", Arrays.asList("s")),
// should not match any other tags
Arguments.of("<nomatch src=\"s\" />", Arrays.asList()),
Arguments.of("<scriptx src=\"s\" />", Arrays.asList()),
// should not match any other attribute
Arguments.of("<script xsrc=\"s\" />", Arrays.asList())
);
return argumentsStream;
}

@Test
Expand Down Expand Up @@ -486,4 +545,40 @@ void testSendHtmlUrl() throws Exception {
validateSend(fakeMailServer, strSubject, email.getHtml(), email.getFromAddress(), email.getToAddresses(), email.getCcAddresses(),
email.getBccAddresses(), true);
}

@Test
public void testEmailWithScript() throws Exception {
Logger.getLogger(ImageHtmlEmail.class.getName()).setLevel(Level.FINEST);

getMailServer();

final String strSubject = "Test HTML Send with Script";

// Create the email message
email = new MockImageHtmlEmailConcrete();
final DataSourceResolver dataSourceResolver = new DataSourceClassPathResolver("/", true);

email.setDataSourceResolver(dataSourceResolver);
email.setHostName(strTestMailServer);
email.setSmtpPort(getMailServerPort());
email.setFrom(strTestMailFrom);
email.addTo(strTestMailTo);
email.setSubject(strSubject);

// set the html message
email.setHtmlMsg("<html><body><script type=\"text/javascript\" src=\"scripts/example-script.js\"/></body></html>");

// send the email
email.send();

fakeMailServer.stop();

assertEquals(1, fakeMailServer.getMessages().size());
final MimeMessage mimeMessage = fakeMailServer.getMessages().get(0).getMimeMessage();
MimeMessageUtils.writeMimeMessage(mimeMessage, new File("./target/test-emails/testEmailWithScript.eml"));

final MimeMessageParser mimeMessageParser = new MimeMessageParser(mimeMessage).parse();
assertTrue(mimeMessageParser.getHtmlContent().contains("\"cid:"));
assertEquals(1, mimeMessageParser.getAttachmentList().size());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
function hello(){
return "Hello, World!";
}