'use strict'; /** * Checks if a given buffer contains only correct UTF-8. * Ported from https://www.cl.cam.ac.uk/%7Emgk25/ucs/utf8_check.c by * Markus Kuhn. * * @param {Buffer} buf The buffer to check * @return {Boolean} `true` if `buf` contains only correct UTF-8, else `false` * @public */ function isValidUTF8(buf) { const len = buf.length; let i = 0; while (i < len) { if ((buf[i] & 0x80) === 0x00) { // 0xxxxxxx i++; } else if ((buf[i] & 0xe0) === 0xc0) { // 110xxxxx 10xxxxxx if ( i + 1 === len || (buf[i + 1] & 0xc0) !== 0x80 || (buf[i] & 0xfe) === 0xc0 // overlong ) { return false; } i += 2; } else if ((buf[i] & 0xf0) === 0xe0) { // 1110xxxx 10xxxxxx 10xxxxxx if ( i + 2 >= len || (buf[i + 1] & 0xc0) !== 0x80 || (buf[i + 2] & 0xc0) !== 0x80 || buf[i] === 0xe0 && (buf[i + 1] & 0xe0) === 0x80 || // overlong buf[i] === 0xed && (buf[i + 1] & 0xe0) === 0xa0 // surrogate (U+D800 - U+DFFF) ) { return false; } i += 3; } else if ((buf[i] & 0xf8) === 0xf0) { // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx if ( i + 3 >= len || (buf[i + 1] & 0xc0) !== 0x80 || (buf[i + 2] & 0xc0) !== 0x80 || (buf[i + 3] & 0xc0) !== 0x80 || buf[i] === 0xf0 && (buf[i + 1] & 0xf0) === 0x80 || // overlong buf[i] === 0xf4 && buf[i + 1] > 0x8f || buf[i] > 0xf4 // > U+10FFFF ) { return false; } i += 4; } else { return false; } } return true; } module.exports = isValidUTF8;