]> scripts.mit.edu Git - autoinstalls/wordpress.git/blob - wp-admin/js/word-count.js
WordPress 4.7.2
[autoinstalls/wordpress.git] / wp-admin / js / word-count.js
1 /**
2  * Word or character counting functionality. Count words or characters in a provided text string.
3  *
4  * @summary   Count words or characters in a text.
5  *
6  * @namespace wp.utils
7  * @since     2.6
8  */
9
10 ( function() {
11         /**
12          * Word counting utility
13          *
14          * @namespace wp.utils.wordcounter
15          * @memberof  wp.utils
16          *
17          * @class
18          *
19          * @param {Object} settings                                   Optional. Key-value object containing overrides for
20          *                                                            settings.
21          * @param {RegExp} settings.HTMLRegExp                        Optional. Regular expression to find HTML elements.
22          * @param {RegExp} settings.HTMLcommentRegExp                 Optional. Regular expression to find HTML comments.
23          * @param {RegExp} settings.spaceRegExp                       Optional. Regular expression to find irregular space
24          *                                                            characters.
25          * @param {RegExp} settings.HTMLEntityRegExp                  Optional. Regular expression to find HTML entities.
26          * @param {RegExp} settings.connectorRegExp                   Optional. Regular expression to find connectors that
27          *                                                            split words.
28          * @param {RegExp} settings.removeRegExp                      Optional. Regular expression to find remove unwanted
29          *                                                            characters to reduce false-positives.
30          * @param {RegExp} settings.astralRegExp                      Optional. Regular expression to find unwanted
31          *                                                            characters when searching for non-words.
32          * @param {RegExp} settings.wordsRegExp                       Optional. Regular expression to find words by spaces.
33          * @param {RegExp} settings.characters_excluding_spacesRegExp Optional. Regular expression to find characters which
34          *                                                            are non-spaces.
35          * @param {RegExp} settings.characters_including_spacesRegExp Optional. Regular expression to find characters
36          *                                                            including spaces.
37          * @param {RegExp} settings.shortcodesRegExp                  Optional. Regular expression to find shortcodes.
38          * @param {Object} settings.l10n                              Optional. Localization object containing specific
39          *                                                            configuration for the current localization.
40          * @param {String} settings.l10n.type                         Optional. Method of finding words to count.
41          * @param {Array}  settings.l10n.shortcodes                   Optional. Array of shortcodes that should be removed
42          *                                                            from the text.
43          *
44          * @return void
45          */
46         function WordCounter( settings ) {
47                 var key,
48                         shortcodes;
49
50                 // Apply provided settings to object settings.
51                 if ( settings ) {
52                         for ( key in settings ) {
53
54                                 // Only apply valid settings.
55                                 if ( settings.hasOwnProperty( key ) ) {
56                                         this.settings[ key ] = settings[ key ];
57                                 }
58                         }
59                 }
60
61                 shortcodes = this.settings.l10n.shortcodes;
62
63                 // If there are any localization shortcodes, add this as type in the settings.
64                 if ( shortcodes && shortcodes.length ) {
65                         this.settings.shortcodesRegExp = new RegExp( '\\[\\/?(?:' + shortcodes.join( '|' ) + ')[^\\]]*?\\]', 'g' );
66                 }
67         }
68
69         // Default settings.
70         WordCounter.prototype.settings = {
71                 HTMLRegExp: /<\/?[a-z][^>]*?>/gi,
72                 HTMLcommentRegExp: /<!--[\s\S]*?-->/g,
73                 spaceRegExp: /&nbsp;|&#160;/gi,
74                 HTMLEntityRegExp: /&\S+?;/g,
75
76                 // \u2014 = em-dash
77                 connectorRegExp: /--|\u2014/g,
78
79                 // Characters to be removed from input text.
80                 removeRegExp: new RegExp( [
81                         '[',
82
83                                 // Basic Latin (extract)
84                                 '\u0021-\u0040\u005B-\u0060\u007B-\u007E',
85
86                                 // Latin-1 Supplement (extract)
87                                 '\u0080-\u00BF\u00D7\u00F7',
88
89                                 /*
90                                  * The following range consists of:
91                                  * General Punctuation
92                                  * Superscripts and Subscripts
93                                  * Currency Symbols
94                                  * Combining Diacritical Marks for Symbols
95                                  * Letterlike Symbols
96                                  * Number Forms
97                                  * Arrows
98                                  * Mathematical Operators
99                                  * Miscellaneous Technical
100                                  * Control Pictures
101                                  * Optical Character Recognition
102                                  * Enclosed Alphanumerics
103                                  * Box Drawing
104                                  * Block Elements
105                                  * Geometric Shapes
106                                  * Miscellaneous Symbols
107                                  * Dingbats
108                                  * Miscellaneous Mathematical Symbols-A
109                                  * Supplemental Arrows-A
110                                  * Braille Patterns
111                                  * Supplemental Arrows-B
112                                  * Miscellaneous Mathematical Symbols-B
113                                  * Supplemental Mathematical Operators
114                                  * Miscellaneous Symbols and Arrows
115                                  */
116                                 '\u2000-\u2BFF',
117
118                                 // Supplemental Punctuation
119                                 '\u2E00-\u2E7F',
120                         ']'
121                 ].join( '' ), 'g' ),
122
123                 // Remove UTF-16 surrogate points, see https://en.wikipedia.org/wiki/UTF-16#U.2BD800_to_U.2BDFFF
124                 astralRegExp: /[\uD800-\uDBFF][\uDC00-\uDFFF]/g,
125                 wordsRegExp: /\S\s+/g,
126                 characters_excluding_spacesRegExp: /\S/g,
127
128                 /*
129                  * Match anything that is not a formatting character, excluding:
130                  * \f = form feed
131                  * \n = new line
132                  * \r = carriage return
133                  * \t = tab
134                  * \v = vertical tab
135                  * \u00AD = soft hyphen
136                  * \u2028 = line separator
137                  * \u2029 = paragraph separator
138                  */
139                 characters_including_spacesRegExp: /[^\f\n\r\t\v\u00AD\u2028\u2029]/g,
140                 l10n: window.wordCountL10n || {}
141         };
142
143         /**
144          * Counts the number of words (or other specified type) in the specified text.
145          *
146          * @summary  Count the number of elements in a text.
147          *
148          * @since    2.6
149          * @memberof wp.utils.wordcounter
150          *
151          * @param {String}  text Text to count elements in.
152          * @param {String}  type Optional. Specify type to use.
153          *
154          * @return {Number} The number of items counted.
155          */
156         WordCounter.prototype.count = function( text, type ) {
157                 var count = 0;
158
159                 // Use default type if none was provided.
160                 type = type || this.settings.l10n.type;
161
162                 // Sanitize type to one of three possibilities: 'words', 'characters_excluding_spaces' or 'characters_including_spaces'.
163                 if ( type !== 'characters_excluding_spaces' && type !== 'characters_including_spaces' ) {
164                         type = 'words';
165                 }
166
167                 // If we have any text at all.
168                 if ( text ) {
169                         text = text + '\n';
170
171                         // Replace all HTML with a new-line.
172                         text = text.replace( this.settings.HTMLRegExp, '\n' );
173
174                         // Remove all HTML comments.
175                         text = text.replace( this.settings.HTMLcommentRegExp, '' );
176
177                         // If a shortcode regular expression has been provided use it to remove shortcodes.
178                         if ( this.settings.shortcodesRegExp ) {
179                                 text = text.replace( this.settings.shortcodesRegExp, '\n' );
180                         }
181
182                         // Normalize non-breaking space to a normal space.
183                         text = text.replace( this.settings.spaceRegExp, ' ' );
184
185                         if ( type === 'words' ) {
186
187                                 // Remove HTML Entities.
188                                 text = text.replace( this.settings.HTMLEntityRegExp, '' );
189
190                                 // Convert connectors to spaces to count attached text as words.
191                                 text = text.replace( this.settings.connectorRegExp, ' ' );
192
193                                 // Remove unwanted characters.
194                                 text = text.replace( this.settings.removeRegExp, '' );
195                         } else {
196
197                                 // Convert HTML Entities to "a".
198                                 text = text.replace( this.settings.HTMLEntityRegExp, 'a' );
199
200                                 // Remove surrogate points.
201                                 text = text.replace( this.settings.astralRegExp, 'a' );
202                         }
203
204                         // Match with the selected type regular expression to count the items.
205                         text = text.match( this.settings[ type + 'RegExp' ] );
206
207                         // If we have any matches, set the count to the number of items found.
208                         if ( text ) {
209                                 count = text.length;
210                         }
211                 }
212
213                 return count;
214         };
215
216         // Add the WordCounter to the WP Utils.
217         window.wp = window.wp || {};
218         window.wp.utils = window.wp.utils || {};
219         window.wp.utils.WordCounter = WordCounter;
220 } )();