Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 81 additions & 0 deletions core/src/main/java/org/owasp/encoder/Encode.java
Original file line number Diff line number Diff line change
Expand Up @@ -868,6 +868,87 @@ public static void forXmlComment(Writer out, String input)
encode(Encoders.XML_COMMENT_ENCODER, out, input);
}

/**
* Encoder for XML 1.1 contexts. Similar to {@link #forXml(String)} but
* follows the XML 1.1 specification which allows all control characters
* (except null) to be encoded as character references. This method encodes
* control characters in the ranges [#x1-#x8, #xB-#xC, #xE-#x1F, #x7F-#x9F]
* as character references (e.g., {@code }), while tab, line feed,
* and carriage return are passed through unencoded. This is safe for use
* in both XML 1.1 content and attributes.
*
* @param input the input to encode
* @return the encoded result
*/
public static String forXml11(String input) {
return encode(Encoders.XML_11_ENCODER, input);
}

/**
* See {@link #forXml11(String)} for description of encoding. This
* version writes directly to a Writer without an intervening string.
*
* @param out where to write encoded output
* @param input the input string to encode
* @throws IOException if thrown by writer
*/
public static void forXml11(Writer out, String input)
throws IOException
{
encode(Encoders.XML_11_ENCODER, out, input);
}

/**
* Encoder for XML 1.1 content. Similar to {@link #forXmlContent(String)}
* but follows the XML 1.1 specification for control character handling.
*
* @param input the input to encode
* @return the encoded result
*/
public static String forXml11Content(String input) {
return encode(Encoders.XML_11_CONTENT_ENCODER, input);
}

/**
* See {@link #forXml11Content(String)} for description of encoding. This
* version writes directly to a Writer without an intervening string.
*
* @param out where to write encoded output
* @param input the input string to encode
* @throws IOException if thrown by writer
*/
public static void forXml11Content(Writer out, String input)
throws IOException
{
encode(Encoders.XML_11_CONTENT_ENCODER, out, input);
}

/**
* Encoder for XML 1.1 attribute content. Similar to
* {@link #forXmlAttribute(String)} but follows the XML 1.1 specification
* for control character handling.
*
* @param input the input to encode
* @return the encoded result
*/
public static String forXml11Attribute(String input) {
return encode(Encoders.XML_11_ATTRIBUTE_ENCODER, input);
}

/**
* See {@link #forXml11Attribute(String)} for description of encoding. This
* version writes directly to a Writer without an intervening string.
*
* @param out where to write encoded output
* @param input the input string to encode
* @throws IOException if thrown by writer
*/
public static void forXml11Attribute(Writer out, String input)
throws IOException
{
encode(Encoders.XML_11_ATTRIBUTE_ENCODER, out, input);
}

/**
* Encodes data for an XML CDATA section. On the chance that the input
* contains a terminating {@code "]]>"}, it will be replaced by
Expand Down
27 changes: 27 additions & 0 deletions core/src/main/java/org/owasp/encoder/Encoders.java
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,18 @@ public final class Encoders {
* Name of {@linkplain Encode#forXmlComment(String) XML comment} context.
*/
public static final String XML_COMMENT = "xml-comment";
/**
* Name of XML 1.1 general context.
*/
public static final String XML_11 = "xml-1.1";
/**
* Name of XML 1.1 content context.
*/
public static final String XML_11_CONTENT = "xml-1.1-content";
/**
* Name of XML 1.1 attribute context.
*/
public static final String XML_11_ATTRIBUTE = "xml-1.1-attribute";
/**
* Name of {@linkplain Encode#forCDATA(String) CDATA} context.
*/
Expand Down Expand Up @@ -160,6 +172,21 @@ public final class Encoders {
*/
static final XMLCommentEncoder XML_COMMENT_ENCODER
= map(XML_COMMENT, new XMLCommentEncoder());
/**
* Encoder for general XML 1.1 contexts.
*/
static final XMLEncoder XML_11_ENCODER
= map(XML_11, new XMLEncoder(XMLEncoder.Mode.ALL, XMLEncoder.Version.XML_1_1));
/**
* Encoder for XML 1.1 content contexts.
*/
static final XMLEncoder XML_11_CONTENT_ENCODER
= map(XML_11_CONTENT, new XMLEncoder(XMLEncoder.Mode.CONTENT, XMLEncoder.Version.XML_1_1));
/**
* Encoder for XML 1.1 attribute contexts.
*/
static final XMLEncoder XML_11_ATTRIBUTE_ENCODER
= map(XML_11_ATTRIBUTE, new XMLEncoder(XMLEncoder.Mode.ATTRIBUTE, XMLEncoder.Version.XML_1_1));
/**
* Encoder for CDATA contexts.
*/
Expand Down
100 changes: 87 additions & 13 deletions core/src/main/java/org/owasp/encoder/XMLEncoder.java
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,27 @@ class XMLEncoder extends Encoder {
* The encoded length of a double-quotation character.
*/
static final int QUOT_LENGTH = 5;
/**
* The encoded length of a control character reference (e.g., ).
*/
static final int CONTROL_CHAR_REF_LENGTH = 6;

/**
* An enum of supported XML versions for the XMLEncoder.
*/
enum Version {
/**
* XML 1.0 - control characters (except tab, lf, cr) are replaced with space.
* Valid chars: #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
*/
XML_1_0,
/**
* XML 1.1 - control characters (except tab, lf, cr) are encoded as character references.
* All chars [#x1-#x10FFFF] are allowed (excluding noncharacters).
* Restricted chars [#x1-#x8, #xB-#xC, #xE-#x1F, #x7F-#x9F] must be encoded.
*/
XML_1_1
}

/**
* An enum of supported "modes" of operation for the XMLEncoder.
Expand Down Expand Up @@ -174,27 +195,45 @@ long validMask() {
* implementation.
*/
private final Mode _mode;
/**
* The XML version for this encoder.
*/
private final Version _version;

/**
* Default constructor--equivalent to XMLEncoder(Mode.ALL).
* Default constructor--equivalent to XMLEncoder(Mode.ALL, Version.XML_1_0).
*/
XMLEncoder() {
this(Mode.ALL);
this(Mode.ALL, Version.XML_1_0);
}

/**
* Creates an XMLEncoder for the specified mode constant.
* Creates an XMLEncoder for the specified mode constant with XML 1.0.
*
* @param mode the mode of the encoder.
*/
XMLEncoder(Mode mode) {
this(mode, Version.XML_1_0);
}

/**
* Creates an XMLEncoder for the specified mode and version.
*
* @param mode the mode of the encoder.
* @param version the XML version for the encoder.
*/
XMLEncoder(Mode mode, Version version) {
_mode = mode;
_version = version;
_validMask = mode.validMask();
}

@Override
public int maxEncodedLength(int n) {
// "&" = 5 chars.
// "&" = 5 chars, "" = 6 chars (XML 1.1 control chars)
if (_version == Version.XML_1_1) {
return n * CONTROL_CHAR_REF_LENGTH;
}
return n * MAX_ENCODED_CHAR_LENGTH;
}

Expand All @@ -213,6 +252,7 @@ public int firstEncodedOffset(String input, int off, int len) {
}
} else if (ch < Character.MIN_HIGH_SURROGATE) {
if (ch <= Unicode.MAX_C1_CTRL_CHAR && ch != Unicode.NEL) {
// C1 control character - needs encoding in XML 1.1 or replacement in XML 1.0
return i;
// } else {
// // valid
Expand Down Expand Up @@ -314,23 +354,57 @@ protected CoderResult encodeArrays(CharBuffer input, CharBuffer output, boolean
out[j++] = ';';
break;
default:
// invalid character
if (j >= m) {
return overflow(input, i, output, j);
// invalid character for XML 1.0
if (_version == Version.XML_1_1 && ch != 0) {
// In XML 1.1, encode C0 control characters (except null) as character references
if (j + CONTROL_CHAR_REF_LENGTH > m) {
return overflow(input, i, output, j);
}
out[j++] = '&';
out[j++] = '#';
out[j++] = 'x';
int val = ch;
out[j++] = Character.forDigit((val >> 4) & 0xF, 16);
out[j++] = Character.forDigit(val & 0xF, 16);
out[j++] = ';';
} else {
// XML 1.0: replace invalid character with space
// XML 1.1: null is still invalid, replace with space
if (j >= m) {
return overflow(input, i, output, j);
}
out[j++] = INVALID_CHARACTER_REPLACEMENT;
}
out[j++] = INVALID_CHARACTER_REPLACEMENT;
break;
}
}
} else if (ch < Character.MIN_HIGH_SURROGATE) {
if (j >= m) {
return overflow(input, i, output, j);
}
if (ch > Unicode.MAX_C1_CTRL_CHAR || ch == Unicode.NEL) {
if (j >= m) {
return overflow(input, i, output, j);
}
out[j++] = ch;
} else {
// C1 control code
out[j++] = INVALID_CHARACTER_REPLACEMENT;
if (_version == Version.XML_1_1) {
// In XML 1.1, encode C1 control characters (except NEL) as character references
if (j + CONTROL_CHAR_REF_LENGTH > m) {
return overflow(input, i, output, j);
}
out[j++] = '&';
out[j++] = '#';
out[j++] = 'x';
int val = ch;
out[j++] = Character.forDigit((val >> 4) & 0xF, 16);
out[j++] = Character.forDigit(val & 0xF, 16);
out[j++] = ';';
} else {
// XML 1.0: replace invalid character with space
if (j >= m) {
return overflow(input, i, output, j);
}
out[j++] = INVALID_CHARACTER_REPLACEMENT;
}
}
} else if (ch <= Character.MAX_HIGH_SURROGATE) {
if (i + 1 < n) {
Expand Down Expand Up @@ -389,6 +463,6 @@ protected CoderResult encodeArrays(CharBuffer input, CharBuffer output, boolean

@Override
public String toString() {
return "XMLEncoder(" + _mode + ")";
return "XMLEncoder(" + _mode + ", " + _version + ")";
}
}
Loading
Loading