X-Git-Url: https://scripts.mit.edu/gitweb/autoinstalls/wordpress.git/blobdiff_plain/8d3bb1a5dcfdea9857d3c88c3751f09593e34dc8..ef91a7f4f3c6468973e192335a27ec0e0faca0b5:/wp-admin/js/word-count.js diff --git a/wp-admin/js/word-count.js b/wp-admin/js/word-count.js index c0255fd2..89afdcc0 100644 --- a/wp-admin/js/word-count.js +++ b/wp-admin/js/word-count.js @@ -1,10 +1,57 @@ +/** + * Word or character counting functionality. Count words or characters in a provided text string. + * + * @summary Count words or characters in a text. + * + * @namespace wp.utils + * @since 2.6 + */ + ( function() { + /** + * Word counting utility + * + * @namespace wp.utils.wordcounter + * @memberof wp.utils + * + * @class + * + * @param {Object} settings Optional. Key-value object containing overrides for + * settings. + * @param {RegExp} settings.HTMLRegExp Optional. Regular expression to find HTML elements. + * @param {RegExp} settings.HTMLcommentRegExp Optional. Regular expression to find HTML comments. + * @param {RegExp} settings.spaceRegExp Optional. Regular expression to find irregular space + * characters. + * @param {RegExp} settings.HTMLEntityRegExp Optional. Regular expression to find HTML entities. + * @param {RegExp} settings.connectorRegExp Optional. Regular expression to find connectors that + * split words. + * @param {RegExp} settings.removeRegExp Optional. Regular expression to find remove unwanted + * characters to reduce false-positives. + * @param {RegExp} settings.astralRegExp Optional. Regular expression to find unwanted + * characters when searching for non-words. + * @param {RegExp} settings.wordsRegExp Optional. Regular expression to find words by spaces. + * @param {RegExp} settings.characters_excluding_spacesRegExp Optional. Regular expression to find characters which + * are non-spaces. + * @param {RegExp} settings.characters_including_spacesRegExp Optional. Regular expression to find characters + * including spaces. + * @param {RegExp} settings.shortcodesRegExp Optional. Regular expression to find shortcodes. + * @param {Object} settings.l10n Optional. Localization object containing specific + * configuration for the current localization. + * @param {String} settings.l10n.type Optional. Method of finding words to count. + * @param {Array} settings.l10n.shortcodes Optional. Array of shortcodes that should be removed + * from the text. + * + * @return void + */ function WordCounter( settings ) { var key, shortcodes; + // Apply provided settings to object settings. if ( settings ) { for ( key in settings ) { + + // Only apply valid settings. if ( settings.hasOwnProperty( key ) ) { this.settings[ key ] = settings[ key ]; } @@ -13,91 +60,151 @@ shortcodes = this.settings.l10n.shortcodes; + // If there are any localization shortcodes, add this as type in the settings. if ( shortcodes && shortcodes.length ) { this.settings.shortcodesRegExp = new RegExp( '\\[\\/?(?:' + shortcodes.join( '|' ) + ')[^\\]]*?\\]', 'g' ); } } + // Default settings. WordCounter.prototype.settings = { HTMLRegExp: /<\/?[a-z][^>]*?>/gi, HTMLcommentRegExp: //g, spaceRegExp: / | /gi, HTMLEntityRegExp: /&\S+?;/g, + + // \u2014 = em-dash connectorRegExp: /--|\u2014/g, + + // Characters to be removed from input text. removeRegExp: new RegExp( [ '[', + // Basic Latin (extract) '\u0021-\u0040\u005B-\u0060\u007B-\u007E', + // Latin-1 Supplement (extract) '\u0080-\u00BF\u00D7\u00F7', - // General Punctuation - // Superscripts and Subscripts - // Currency Symbols - // Combining Diacritical Marks for Symbols - // Letterlike Symbols - // Number Forms - // Arrows - // Mathematical Operators - // Miscellaneous Technical - // Control Pictures - // Optical Character Recognition - // Enclosed Alphanumerics - // Box Drawing - // Block Elements - // Geometric Shapes - // Miscellaneous Symbols - // Dingbats - // Miscellaneous Mathematical Symbols-A - // Supplemental Arrows-A - // Braille Patterns - // Supplemental Arrows-B - // Miscellaneous Mathematical Symbols-B - // Supplemental Mathematical Operators - // Miscellaneous Symbols and Arrows + + /* + * The following range consists of: + * General Punctuation + * Superscripts and Subscripts + * Currency Symbols + * Combining Diacritical Marks for Symbols + * Letterlike Symbols + * Number Forms + * Arrows + * Mathematical Operators + * Miscellaneous Technical + * Control Pictures + * Optical Character Recognition + * Enclosed Alphanumerics + * Box Drawing + * Block Elements + * Geometric Shapes + * Miscellaneous Symbols + * Dingbats + * Miscellaneous Mathematical Symbols-A + * Supplemental Arrows-A + * Braille Patterns + * Supplemental Arrows-B + * Miscellaneous Mathematical Symbols-B + * Supplemental Mathematical Operators + * Miscellaneous Symbols and Arrows + */ '\u2000-\u2BFF', + // Supplemental Punctuation '\u2E00-\u2E7F', ']' ].join( '' ), 'g' ), + + // Remove UTF-16 surrogate points, see https://en.wikipedia.org/wiki/UTF-16#U.2BD800_to_U.2BDFFF astralRegExp: /[\uD800-\uDBFF][\uDC00-\uDFFF]/g, wordsRegExp: /\S\s+/g, characters_excluding_spacesRegExp: /\S/g, + + /* + * Match anything that is not a formatting character, excluding: + * \f = form feed + * \n = new line + * \r = carriage return + * \t = tab + * \v = vertical tab + * \u00AD = soft hyphen + * \u2028 = line separator + * \u2029 = paragraph separator + */ characters_including_spacesRegExp: /[^\f\n\r\t\v\u00AD\u2028\u2029]/g, l10n: window.wordCountL10n || {} }; + /** + * Counts the number of words (or other specified type) in the specified text. + * + * @summary Count the number of elements in a text. + * + * @since 2.6 + * @memberof wp.utils.wordcounter + * + * @param {String} text Text to count elements in. + * @param {String} type Optional. Specify type to use. + * + * @return {Number} The number of items counted. + */ WordCounter.prototype.count = function( text, type ) { var count = 0; + // Use default type if none was provided. type = type || this.settings.l10n.type; + // Sanitize type to one of three possibilities: 'words', 'characters_excluding_spaces' or 'characters_including_spaces'. if ( type !== 'characters_excluding_spaces' && type !== 'characters_including_spaces' ) { type = 'words'; } + // If we have any text at all. if ( text ) { text = text + '\n'; + // Replace all HTML with a new-line. text = text.replace( this.settings.HTMLRegExp, '\n' ); + + // Remove all HTML comments. text = text.replace( this.settings.HTMLcommentRegExp, '' ); + // If a shortcode regular expression has been provided use it to remove shortcodes. if ( this.settings.shortcodesRegExp ) { text = text.replace( this.settings.shortcodesRegExp, '\n' ); } + // Normalize non-breaking space to a normal space. text = text.replace( this.settings.spaceRegExp, ' ' ); if ( type === 'words' ) { + + // Remove HTML Entities. text = text.replace( this.settings.HTMLEntityRegExp, '' ); + + // Convert connectors to spaces to count attached text as words. text = text.replace( this.settings.connectorRegExp, ' ' ); + + // Remove unwanted characters. text = text.replace( this.settings.removeRegExp, '' ); } else { + + // Convert HTML Entities to "a". text = text.replace( this.settings.HTMLEntityRegExp, 'a' ); + + // Remove surrogate points. text = text.replace( this.settings.astralRegExp, 'a' ); } + // Match with the selected type regular expression to count the items. text = text.match( this.settings[ type + 'RegExp' ] ); + // If we have any matches, set the count to the number of items found. if ( text ) { count = text.length; } @@ -106,6 +213,7 @@ return count; }; + // Add the WordCounter to the WP Utils. window.wp = window.wp || {}; window.wp.utils = window.wp.utils || {}; window.wp.utils.WordCounter = WordCounter;