- substr( $comment1['comment_author'], 0, 255 ) == substr( $comment2['comment_author'], 0, 255 )
- || substr( stripslashes( $comment1['comment_author'] ), 0, 255 ) == substr( $comment2['comment_author'], 0, 255 )
- || substr( $comment1['comment_author'], 0, 255 ) == substr( stripslashes( $comment2['comment_author'] ), 0, 255 )
+ // If the comment author includes multibyte characters right around the 255-byte mark, they
+ // may be stripped when the author is saved in the DB, so a 300+ char author may turn into
+ // a 253-char author when it's saved, not 255 exactly. The longest possible character is
+ // theoretically 6 bytes, so we'll only look at the first 248 bytes to be safe.
+ substr( $comment1['comment_author'], 0, 248 ) == substr( $comment2['comment_author'], 0, 248 )
+ || substr( stripslashes( $comment1['comment_author'] ), 0, 248 ) == substr( $comment2['comment_author'], 0, 248 )
+ || substr( $comment1['comment_author'], 0, 248 ) == substr( stripslashes( $comment2['comment_author'] ), 0, 248 )
+ // Certain long comment author names will be truncated to nothing, depending on their encoding.
+ || ( ! $comment1['comment_author'] && strlen( $comment2['comment_author'] ) > 248 )
+ || ( ! $comment2['comment_author'] && strlen( $comment1['comment_author'] ) > 248 )