Overlong UTF-8 Encoding Attack
For XSS, CRLF WAF bypass
%C0%8A / %E0%80%8A ⇒ %0A
%C0%8D / %E0%80%8D ⇒ %0D
%C0%BE / %E0%80%BE ⇒ %3E (>)
%C0%BC / %E0%80%BC ⇒ %3C (<)
%C0%A2 / %E0%80%A2 ⇒ %22 (")
%C0%A7 / %E0%80%A7 ⇒ %27 (')
function toOverlongUTF8(hex){
const codePoint = parseInt(hex, 16);
let m = (b) => '%' + b.toString(16).toUpperCase();
if(codePoint < 0x80){
let two_byte = [0xC0 | (codePoint >> 6), 0x80 | (codePoint & 0x3F)]
let three_byte = [0xE0,0x80,0x80 | (codePoint & 0x3F)]
return two_byte.map(m).join('') + ' / ' + three_byte.map(m).join('');
} else {
throw new Error("Only works for ASCII characters");
}
}
// the output will be: %C0%A2 / %E0%80%A2
console.log(toOverlongUTF8("22"));