Skip to content

Commit 6a01f64

Browse files
committed
Ensure URL encoding issues trigger errors.
Should be a NO-OP unless someone has a Rewrite valve configuration that is inconsistent with the Connector's URI encoding - in which case the new IAE will highlight this. Originated from a report from OSS-Fuzz that found some cases where a round-trip encode/decode have the same result as the input.
1 parent ba9d407 commit 6a01f64

File tree

3 files changed

+43
-3
lines changed

3 files changed

+43
-3
lines changed

java/org/apache/catalina/util/URLEncoder.java

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import java.io.IOException;
2121
import java.io.OutputStreamWriter;
2222
import java.nio.charset.Charset;
23+
import java.nio.charset.CodingErrorAction;
2324
import java.util.BitSet;
2425

2526
/**
@@ -146,7 +147,15 @@ public String encode(String path, Charset charset) {
146147
int maxBytesPerChar = 10;
147148
StringBuilder rewrittenPath = new StringBuilder(path.length());
148149
ByteArrayOutputStream buf = new ByteArrayOutputStream(maxBytesPerChar);
149-
OutputStreamWriter writer = new OutputStreamWriter(buf, charset);
150+
/*
151+
* Most calls to this method use UTF-8 where malformed input and unmappable character issues are not expected to
152+
* happen. The only Tomcat code that currently (January 2026) might call this method with something other than
153+
* UTF-8 is the rewrite valve. In that case, the rewrite rules should be consistent with the configured URI
154+
* encoding on the Connector. Given all of this, the IAE is only expected to be thrown as a result of
155+
* configuration errors.
156+
*/
157+
OutputStreamWriter writer = new OutputStreamWriter(buf, charset.newEncoder()
158+
.onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(CodingErrorAction.REPORT));
150159

151160
for (int i = 0; i < path.length(); i++) {
152161
int c = path.charAt(i);
@@ -160,8 +169,7 @@ public String encode(String path, Charset charset) {
160169
writer.write((char) c);
161170
writer.flush();
162171
} catch (IOException ioe) {
163-
buf.reset();
164-
continue;
172+
throw new IllegalArgumentException(ioe);
165173
}
166174
byte[] ba = buf.toByteArray();
167175
for (byte toEncode : ba) {

test/org/apache/catalina/util/TestURLEncoder.java

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,14 @@
1616
*/
1717
package org.apache.catalina.util;
1818

19+
import java.nio.charset.Charset;
1920
import java.nio.charset.StandardCharsets;
2021

2122
import org.junit.Assert;
2223
import org.junit.Test;
2324

25+
import org.apache.tomcat.util.buf.UDecoder;
26+
2427
public class TestURLEncoder {
2528

2629
private static final String SPACE = " ";
@@ -53,4 +56,29 @@ public void testRemoveSafeCharacter() {
5356
xml.removeSafeCharacter('&');
5457
Assert.assertEquals(AMPERSAND_ENCODED, xml.encode(AMPERSAND, StandardCharsets.UTF_8));
5558
}
59+
60+
61+
@Test(expected = IllegalArgumentException.class)
62+
public void testOssFuzz01() {
63+
/*
64+
* Round-trip URL encoding with ASCII only works for valid ASCII characters.
65+
*/
66+
testRoundTrip("\uFFFD", StandardCharsets.US_ASCII);
67+
}
68+
69+
70+
@Test
71+
public void testOssFuzz02() {
72+
testRoundTrip("\uFFFD", StandardCharsets.UTF_8);
73+
}
74+
75+
76+
private void testRoundTrip(String input, Charset charset) {
77+
URLEncoder encoder = new URLEncoder();
78+
79+
String encoded = encoder.encode(input, charset);
80+
String decoded = UDecoder.URLDecode(encoded, charset);
81+
82+
Assert.assertEquals(input, decoded);
83+
}
5684
}

webapps/docs/changelog.xml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,10 @@
130130
Authenticators that provides a per Authenticator override of the SSO
131131
Valve <code>requireReauthentication</code> attribute. (markt)
132132
</add>
133+
<fix>
134+
Ensure URL encoding errors in the Rewrite Valve trigger an exception
135+
rather than silently using a replacement character. (markt)
136+
</fix>
133137
</changelog>
134138
</subsection>
135139
<subsection name="Coyote">

0 commit comments

Comments
 (0)