<?php
/**
+ * Simplified Chinese
+ *
* @ingroup Language
*/
class LanguageZh_hans extends Language {
* for now just treat each character as a word.
* @todo Fixme: only do this for Han characters...
*/
- function wordSegmentation( $string ) {
+ function segmentByWord( $string ) {
$reg = "/([\\xc0-\\xff][\\x80-\\xbf]*)/";
$s = self::insertSpace( $string, $reg );
return $s;
}
- function normalizeForSearch( $string ) {
+ function normalizeForSearch( $s ) {
wfProfileIn( __METHOD__ );
// Double-width roman characters
- $s = self::convertDoubleWidth( $string );
- $s = trim( $s );
$s = parent::normalizeForSearch( $s );
+ $s = trim( $s );
+ $s = $this->segmentByWord( $s );
wfProfileOut( __METHOD__ );
return $s;
}
-}
\ No newline at end of file
+}