]> scripts.mit.edu Git - autoinstalls/mediawiki.git/blob - resources/src/jquery/jquery.byteLength.js
MediaWiki 1.30.2
[autoinstalls/mediawiki.git] / resources / src / jquery / jquery.byteLength.js
1 /**
2  * @class jQuery.plugin.byteLength
3  * @author Jan Paul Posma, 2011
4  * @author Timo Tijhof, 2012
5  * @author David Chan, 2013
6  */
7
8 /**
9  * Calculate the byte length of a string (accounting for UTF-8).
10  *
11  * @static
12  * @inheritable
13  * @param {string} str
14  * @return {number}
15  */
16 jQuery.byteLength = function ( str ) {
17         // This basically figures out how many bytes a UTF-16 string (which is what js sees)
18         // will take in UTF-8 by replacing a 2 byte character with 2 *'s, etc, and counting that.
19         // Note, surrogate (\uD800-\uDFFF) characters are counted as 2 bytes, since there's two of them
20         // and the actual character takes 4 bytes in UTF-8 (2*2=4). Might not work perfectly in
21         // edge cases such as illegal sequences, but that should never happen.
22
23         // https://en.wikipedia.org/wiki/UTF-8#Description
24         // The mapping from UTF-16 code units to UTF-8 bytes is as follows:
25         // > Range 0000-007F: codepoints that become 1 byte of UTF-8
26         // > Range 0080-07FF: codepoints that become 2 bytes of UTF-8
27         // > Range 0800-D7FF: codepoints that become 3 bytes of UTF-8
28         // > Range D800-DFFF: Surrogates (each pair becomes 4 bytes of UTF-8)
29         // > Range E000-FFFF: codepoints that become 3 bytes of UTF-8 (continued)
30
31         return str
32                 .replace( /[\u0080-\u07FF\uD800-\uDFFF]/g, '**' )
33                 .replace( /[\u0800-\uD7FF\uE000-\uFFFF]/g, '***' )
34                 .length;
35 };
36
37 /**
38  * @class jQuery
39  * @mixins jQuery.plugin.byteLength
40  */