Skip to content

Overlong UTF-8 Encoding Attack

For XSS, CRLF WAF bypass

%C0%8A / %E0%80%8A ⇒ %0A
%C0%8D / %E0%80%8D ⇒ %0D
%C0%BE / %E0%80%BE ⇒ %3E (>)
%C0%BC / %E0%80%BC ⇒ %3C (<)
%C0%A2 / %E0%80%A2 ⇒ %22 (")
%C0%A7 / %E0%80%A7 ⇒ %27 (')
function toOverlongUTF8(hex){
    const codePoint = parseInt(hex, 16);
    let m = (b) => '%' + b.toString(16).toUpperCase();
    if(codePoint < 0x80){
        let two_byte = [0xC0 | (codePoint >> 6), 0x80 | (codePoint & 0x3F)]
        let three_byte = [0xE0,0x80,0x80 | (codePoint & 0x3F)]
        return two_byte.map(m).join('') + ' / ' + three_byte.map(m).join('');
    } else {
        throw new Error("Only works for ASCII characters");
    }
}

// the output will be: %C0%A2 / %E0%80%A2
console.log(toOverlongUTF8("22"));