3 * RFC 822 Email address list validation Utility
9 * Copyright (c) 2001-2017, Chuck Hagenbuch & Richard Heyes
10 * All rights reserved.
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
23 * 3. Neither the name of the copyright holder nor the names of its
24 * contributors may be used to endorse or promote products derived from
25 * this software without specific prior written permission.
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 * @author Richard Heyes <richard@phpguru.org>
42 * @author Chuck Hagenbuch <chuck@horde.org
43 * @copyright 2001-2017 Richard Heyes
44 * @license http://opensource.org/licenses/BSD-3-Clause New BSD License
46 * @link http://pear.php.net/package/Mail/
50 * RFC 822 Email address list validation Utility
54 * This class will take an address string, and parse it into it's consituent
55 * parts, be that either addresses, groups, or combinations. Nested groups
56 * are not supported. The structure it returns is pretty straight forward,
57 * and is similar to that provided by the imap_rfc822_parse_adrlist(). Use
58 * print_r() to view the structure.
62 * $address_string = 'My Group: "Richard" <richard@localhost> (A comment), ted@example.com (Ted Bloggs), Barney;';
63 * $structure = Mail_RFC822::parseAddressList($address_string, 'example.com', true)
64 * print_r($structure);
66 * @author Richard Heyes <richard@phpguru.org>
67 * @author Chuck Hagenbuch <chuck@horde.org>
75 * The address being parsed by the RFC822 object.
76 * @var string $address
81 * The default domain to use for unqualified addresses.
82 * @var string $default_domain
84 var $default_domain = 'localhost';
87 * Should we return a nested array showing groups, or flatten everything?
88 * @var boolean $nestGroups
90 var $nestGroups = true;
93 * Whether or not to validate atoms for non-ascii characters.
94 * @var boolean $validate
99 * The array of raw addresses built up as we parse.
100 * @var array $addresses
102 var $addresses = array();
105 * The final array of parsed address information that we build up.
106 * @var array $structure
108 var $structure = array();
111 * The current error message, if any.
117 * An internal counter/pointer.
118 * @var integer $index
123 * The number of groups that have been found in the address list.
124 * @var integer $num_groups
130 * A variable so that we can tell whether or not we're inside a
131 * Mail_RFC822 object.
132 * @var boolean $mailRFC822
134 var $mailRFC822 = true;
137 * A limit after which processing stops
143 * Sets up the object. The address must either be set here or when
144 * calling parseAddressList(). One or the other.
146 * @param string $address The address(es) to validate.
147 * @param string $default_domain Default domain/host etc. If not supplied, will be set to localhost.
148 * @param boolean $nest_groups Whether to return the structure with groups nested for easier viewing.
149 * @param boolean $validate Whether to validate atoms. Turn this off if you need to run addresses through before encoding the personal names, for instance.
151 * @return object Mail_RFC822 A new Mail_RFC822 object.
153 public function __construct($address = null, $default_domain = null, $nest_groups = null, $validate = null, $limit = null)
155 if (isset($address)) $this->address = $address;
156 if (isset($default_domain)) $this->default_domain = $default_domain;
157 if (isset($nest_groups)) $this->nestGroups = $nest_groups;
158 if (isset($validate)) $this->validate = $validate;
159 if (isset($limit)) $this->limit = $limit;
163 * Starts the whole process. The address must either be set here
164 * or when creating the object. One or the other.
166 * @param string $address The address(es) to validate.
167 * @param string $default_domain Default domain/host etc.
168 * @param boolean $nest_groups Whether to return the structure with groups nested for easier viewing.
169 * @param boolean $validate Whether to validate atoms. Turn this off if you need to run addresses through before encoding the personal names, for instance.
171 * @return array A structured array of addresses.
173 public function parseAddressList($address = null, $default_domain = null, $nest_groups = null, $validate = null, $limit = null)
175 if (!isset($this) || !isset($this->mailRFC822)) {
176 $obj = new Mail_RFC822($address, $default_domain, $nest_groups, $validate, $limit);
177 return $obj->parseAddressList();
180 if (isset($address)) $this->address = $address;
181 if (isset($default_domain)) $this->default_domain = $default_domain;
182 if (isset($nest_groups)) $this->nestGroups = $nest_groups;
183 if (isset($validate)) $this->validate = $validate;
184 if (isset($limit)) $this->limit = $limit;
186 $this->structure = array();
187 $this->addresses = array();
191 // Unfold any long lines in $this->address.
192 $this->address = preg_replace('/\r?\n/', "\r\n", $this->address);
193 $this->address = preg_replace('/\r\n(\t| )+/', ' ', $this->address);
195 while ($this->address = $this->_splitAddresses($this->address));
197 if ($this->address === false || isset($this->error)) {
198 require_once 'PEAR.php';
199 return PEAR::raiseError($this->error);
202 // Validate each address individually. If we encounter an invalid
203 // address, stop iterating and return an error immediately.
204 foreach ($this->addresses as $address) {
205 $valid = $this->_validateAddress($address);
207 if ($valid === false || isset($this->error)) {
208 require_once 'PEAR.php';
209 return PEAR::raiseError($this->error);
212 if (!$this->nestGroups) {
213 $this->structure = array_merge($this->structure, $valid);
215 $this->structure[] = $valid;
219 return $this->structure;
223 * Splits an address into separate addresses.
225 * @param string $address The addresses to split.
226 * @return boolean Success or failure.
228 protected function _splitAddresses($address)
230 if (!empty($this->limit) && count($this->addresses) == $this->limit) {
234 if ($this->_isGroup($address) && !isset($this->error)) {
237 } elseif (!isset($this->error)) {
240 } elseif (isset($this->error)) {
244 // Split the string based on the above ten or so lines.
245 $parts = explode($split_char, $address);
246 $string = $this->_splitCheck($parts, $split_char);
250 // If $string does not contain a colon outside of
251 // brackets/quotes etc then something's fubar.
253 // First check there's a colon at all:
254 if (strpos($string, ':') === false) {
255 $this->error = 'Invalid address: ' . $string;
259 // Now check it's outside of brackets/quotes:
260 if (!$this->_splitCheck(explode(':', $string), ':')) {
264 // We must have a group at this point, so increase the counter:
268 // $string now contains the first full address/group.
269 // Add to the addresses array.
270 $this->addresses[] = array(
271 'address' => trim($string),
275 // Remove the now stored address from the initial line, the +1
276 // is to account for the explode character.
277 $address = trim(substr($address, strlen($string) + 1));
279 // If the next char is a comma and this was a group, then
280 // there are more addresses, otherwise, if there are any more
281 // chars, then there is another address.
282 if ($is_group && substr($address, 0, 1) == ','){
283 $address = trim(substr($address, 1));
286 } elseif (strlen($address) > 0) {
293 // If you got here then something's off
298 * Checks for a group at the start of the string.
300 * @param string $address The address to check.
301 * @return boolean Whether or not there is a group at the start of the string.
303 protected function _isGroup($address)
305 // First comma not in quotes, angles or escaped:
306 $parts = explode(',', $address);
307 $string = $this->_splitCheck($parts, ',');
309 // Now we have the first address, we can reliably check for a
310 // group by searching for a colon that's not escaped or in
311 // quotes or angle brackets.
312 if (count($parts = explode(':', $string)) > 1) {
313 $string2 = $this->_splitCheck($parts, ':');
314 return ($string2 !== $string);
321 * A common function that will check an exploded string.
323 * @param array $parts The exloded string.
324 * @param string $char The char that was exploded on.
325 * @return mixed False if the string contains unclosed quotes/brackets, or the string on success.
327 protected function _splitCheck($parts, $char)
331 for ($i = 0; $i < count($parts); $i++) {
332 if ($this->_hasUnclosedQuotes($string)
333 || $this->_hasUnclosedBrackets($string, '<>')
334 || $this->_hasUnclosedBrackets($string, '[]')
335 || $this->_hasUnclosedBrackets($string, '()')
336 || substr($string, -1) == '\\') {
337 if (isset($parts[$i + 1])) {
338 $string = $string . $char . $parts[$i + 1];
340 $this->error = 'Invalid address spec. Unclosed bracket or quotes';
353 * Checks if a string has unclosed quotes or not.
355 * @param string $string The string to check.
356 * @return boolean True if there are unclosed quotes inside the string,
359 protected function _hasUnclosedQuotes($string)
361 $string = trim($string);
362 $iMax = strlen($string);
366 for (; $i < $iMax; ++$i) {
367 switch ($string[$i]) {
373 if ($slashes % 2 == 0) {
374 $in_quote = !$in_quote;
376 // Fall through to default action below.
388 * Checks if a string has an unclosed brackets or not. IMPORTANT:
389 * This function handles both angle brackets and square brackets;
391 * @param string $string The string to check.
392 * @param string $chars The characters to check for.
393 * @return boolean True if there are unclosed brackets inside the string, false otherwise.
395 protected function _hasUnclosedBrackets($string, $chars)
397 $num_angle_start = substr_count($string, $chars[0]);
398 $num_angle_end = substr_count($string, $chars[1]);
400 $this->_hasUnclosedBracketsSub($string, $num_angle_start, $chars[0]);
401 $this->_hasUnclosedBracketsSub($string, $num_angle_end, $chars[1]);
403 if ($num_angle_start < $num_angle_end) {
404 $this->error = 'Invalid address spec. Unmatched quote or bracket (' . $chars . ')';
407 return ($num_angle_start > $num_angle_end);
412 * Sub function that is used only by hasUnclosedBrackets().
414 * @param string $string The string to check.
415 * @param integer &$num The number of occurences.
416 * @param string $char The character to count.
417 * @return integer The number of occurences of $char in $string, adjusted for backslashes.
419 protected function _hasUnclosedBracketsSub($string, &$num, $char)
421 $parts = explode($char, $string);
422 for ($i = 0; $i < count($parts); $i++){
423 if (substr($parts[$i], -1) == '\\' || $this->_hasUnclosedQuotes($parts[$i]))
425 if (isset($parts[$i + 1]))
426 $parts[$i + 1] = $parts[$i] . $char . $parts[$i + 1];
433 * Function to begin checking the address.
435 * @param string $address The address to validate.
436 * @return mixed False on failure, or a structured array of address information on success.
438 protected function _validateAddress($address)
441 $addresses = array();
443 if ($address['group']) {
446 // Get the group part of the name
447 $parts = explode(':', $address['address']);
448 $groupname = $this->_splitCheck($parts, ':');
449 $structure = array();
451 // And validate the group part of the name.
452 if (!$this->_validatePhrase($groupname)){
453 $this->error = 'Group name did not validate.';
456 // Don't include groups if we are not nesting
457 // them. This avoids returning invalid addresses.
458 if ($this->nestGroups) {
459 $structure = new stdClass;
460 $structure->groupname = $groupname;
464 $address['address'] = ltrim(substr($address['address'], strlen($groupname . ':')));
467 // If a group then split on comma and put into an array.
468 // Otherwise, Just put the whole address in an array.
470 while (strlen($address['address']) > 0) {
471 $parts = explode(',', $address['address']);
472 $addresses[] = $this->_splitCheck($parts, ',');
473 $address['address'] = trim(substr($address['address'], strlen(end($addresses) . ',')));
476 $addresses[] = $address['address'];
479 // Trim the whitespace from all of the address strings.
480 array_map('trim', $addresses);
482 // Validate each mailbox.
483 // Format could be one of: name <geezer@domain.com>
486 // ... or any other format valid by RFC 822.
487 for ($i = 0; $i < count($addresses); $i++) {
488 if (!$this->validateMailbox($addresses[$i])) {
489 if (empty($this->error)) {
490 $this->error = 'Validation failed for: ' . $addresses[$i];
497 if ($this->nestGroups) {
499 $structure->addresses = $addresses;
501 $structure = $addresses[0];
507 $structure = array_merge($structure, $addresses);
509 $structure = $addresses;
517 * Function to validate a phrase.
519 * @param string $phrase The phrase to check.
520 * @return boolean Success or failure.
522 protected function _validatePhrase($phrase)
524 // Splits on one or more Tab or space.
525 $parts = preg_split('/[ \\x09]+/', $phrase, -1, PREG_SPLIT_NO_EMPTY);
527 $phrase_parts = array();
528 while (count($parts) > 0){
529 $phrase_parts[] = $this->_splitCheck($parts, ' ');
530 for ($i = 0; $i < $this->index + 1; $i++)
534 foreach ($phrase_parts as $part) {
536 if (substr($part, 0, 1) == '"') {
537 if (!$this->_validateQuotedString($part)) {
543 // Otherwise it's an atom:
544 if (!$this->_validateAtom($part)) return false;
551 * Function to validate an atom which from rfc822 is:
552 * atom = 1*<any CHAR except specials, SPACE and CTLs>
554 * If validation ($this->validate) has been turned off, then
555 * validateAtom() doesn't actually check anything. This is so that you
556 * can split a list of addresses up before encoding personal names
557 * (umlauts, etc.), for example.
559 * @param string $atom The string to check.
560 * @return boolean Success or failure.
562 protected function _validateAtom($atom)
564 if (!$this->validate) {
565 // Validation has been turned off; assume the atom is okay.
569 // Check for any char from ASCII 0 - ASCII 127
570 if (!preg_match('/^[\\x00-\\x7E]+$/i', $atom, $matches)) {
574 // Check for specials:
575 if (preg_match('/[][()<>@,;\\:". ]/', $atom)) {
579 // Check for control characters (ASCII 0-31):
580 if (preg_match('/[\\x00-\\x1F]+/', $atom)) {
588 * Function to validate quoted string, which is:
589 * quoted-string = <"> *(qtext/quoted-pair) <">
591 * @param string $qstring The string to check
592 * @return boolean Success or failure.
594 protected function _validateQuotedString($qstring)
596 // Leading and trailing "
597 $qstring = substr($qstring, 1, -1);
599 // Perform check, removing quoted characters first.
600 return !preg_match('/[\x0D\\\\"]/', preg_replace('/\\\\./', '', $qstring));
604 * Function to validate a mailbox, which is:
605 * mailbox = addr-spec ; simple address
606 * / phrase route-addr ; name and route-addr
608 * @param string &$mailbox The string to check.
609 * @return boolean Success or failure.
611 public function validateMailbox(&$mailbox)
613 // A couple of defaults.
618 // Catch any RFC822 comments and store them separately.
619 $_mailbox = $mailbox;
620 while (strlen(trim($_mailbox)) > 0) {
621 $parts = explode('(', $_mailbox);
622 $before_comment = $this->_splitCheck($parts, '(');
623 if ($before_comment != $_mailbox) {
624 // First char should be a (.
625 $comment = substr(str_replace($before_comment, '', $_mailbox), 1);
626 $parts = explode(')', $comment);
627 $comment = $this->_splitCheck($parts, ')');
628 $comments[] = $comment;
630 // +2 is for the brackets
631 $_mailbox = substr($_mailbox, strpos($_mailbox, '('.$comment)+strlen($comment)+2);
637 foreach ($comments as $comment) {
638 $mailbox = str_replace("($comment)", '', $mailbox);
641 $mailbox = trim($mailbox);
643 // Check for name + route-addr
644 if (substr($mailbox, -1) == '>' && substr($mailbox, 0, 1) != '<') {
645 $parts = explode('<', $mailbox);
646 $name = $this->_splitCheck($parts, '<');
648 $phrase = trim($name);
649 $route_addr = trim(substr($mailbox, strlen($name.'<'), -1));
651 if ($this->_validatePhrase($phrase) === false || ($route_addr = $this->_validateRouteAddr($route_addr)) === false) {
655 // Only got addr-spec
657 // First snip angle brackets if present.
658 if (substr($mailbox, 0, 1) == '<' && substr($mailbox, -1) == '>') {
659 $addr_spec = substr($mailbox, 1, -1);
661 $addr_spec = $mailbox;
664 if (($addr_spec = $this->_validateAddrSpec($addr_spec)) === false) {
669 // Construct the object that will be returned.
670 $mbox = new stdClass();
672 // Add the phrase (even if empty) and comments
673 $mbox->personal = $phrase;
674 $mbox->comment = isset($comments) ? $comments : array();
676 if (isset($route_addr)) {
677 $mbox->mailbox = $route_addr['local_part'];
678 $mbox->host = $route_addr['domain'];
679 $route_addr['adl'] !== '' ? $mbox->adl = $route_addr['adl'] : '';
681 $mbox->mailbox = $addr_spec['local_part'];
682 $mbox->host = $addr_spec['domain'];
690 * This function validates a route-addr which is:
691 * route-addr = "<" [route] addr-spec ">"
693 * Angle brackets have already been removed at the point of
694 * getting to this function.
696 * @param string $route_addr The string to check.
697 * @return mixed False on failure, or an array containing validated address/route information on success.
699 protected function _validateRouteAddr($route_addr)
702 if (strpos($route_addr, ':') !== false) {
703 $parts = explode(':', $route_addr);
704 $route = $this->_splitCheck($parts, ':');
706 $route = $route_addr;
709 // If $route is same as $route_addr then the colon was in
710 // quotes or brackets or, of course, non existent.
711 if ($route === $route_addr){
713 $addr_spec = $route_addr;
714 if (($addr_spec = $this->_validateAddrSpec($addr_spec)) === false) {
718 // Validate route part.
719 if (($route = $this->_validateRoute($route)) === false) {
723 $addr_spec = substr($route_addr, strlen($route . ':'));
725 // Validate addr-spec part.
726 if (($addr_spec = $this->_validateAddrSpec($addr_spec)) === false) {
732 $return['adl'] = $route;
737 $return = array_merge($return, $addr_spec);
742 * Function to validate a route, which is:
743 * route = 1#("@" domain) ":"
745 * @param string $route The string to check.
746 * @return mixed False on failure, or the validated $route on success.
748 protected function _validateRoute($route)
751 $domains = explode(',', trim($route));
753 foreach ($domains as $domain) {
754 $domain = str_replace('@', '', trim($domain));
755 if (!$this->_validateDomain($domain)) return false;
762 * Function to validate a domain, though this is not quite what
763 * you expect of a strict internet domain.
765 * domain = sub-domain *("." sub-domain)
767 * @param string $domain The string to check.
768 * @return mixed False on failure, or the validated domain on success.
770 protected function _validateDomain($domain)
772 // Note the different use of $subdomains and $sub_domains
773 $subdomains = explode('.', $domain);
775 while (count($subdomains) > 0) {
776 $sub_domains[] = $this->_splitCheck($subdomains, '.');
777 for ($i = 0; $i < $this->index + 1; $i++)
778 array_shift($subdomains);
781 foreach ($sub_domains as $sub_domain) {
782 if (!$this->_validateSubdomain(trim($sub_domain)))
786 // Managed to get here, so return input.
791 * Function to validate a subdomain:
792 * subdomain = domain-ref / domain-literal
794 * @param string $subdomain The string to check.
795 * @return boolean Success or failure.
797 protected function _validateSubdomain($subdomain)
799 if (preg_match('|^\[(.*)]$|', $subdomain, $arr)){
800 if (!$this->_validateDliteral($arr[1])) return false;
802 if (!$this->_validateAtom($subdomain)) return false;
805 // Got here, so return successful.
810 * Function to validate a domain literal:
811 * domain-literal = "[" *(dtext / quoted-pair) "]"
813 * @param string $dliteral The string to check.
814 * @return boolean Success or failure.
816 protected function _validateDliteral($dliteral)
818 return !preg_match('/(.)[][\x0D\\\\]/', $dliteral, $matches) && ((! isset($matches[1])) || $matches[1] != '\\');
822 * Function to validate an addr-spec.
824 * addr-spec = local-part "@" domain
826 * @param string $addr_spec The string to check.
827 * @return mixed False on failure, or the validated addr-spec on success.
829 protected function _validateAddrSpec($addr_spec)
831 $addr_spec = trim($addr_spec);
833 // Split on @ sign if there is one.
834 if (strpos($addr_spec, '@') !== false) {
835 $parts = explode('@', $addr_spec);
836 $local_part = $this->_splitCheck($parts, '@');
837 $domain = substr($addr_spec, strlen($local_part . '@'));
839 // No @ sign so assume the default domain.
841 $local_part = $addr_spec;
842 $domain = $this->default_domain;
845 if (($local_part = $this->_validateLocalPart($local_part)) === false) return false;
846 if (($domain = $this->_validateDomain($domain)) === false) return false;
848 // Got here so return successful.
849 return array('local_part' => $local_part, 'domain' => $domain);
853 * Function to validate the local part of an address:
854 * local-part = word *("." word)
856 * @param string $local_part
857 * @return mixed False on failure, or the validated local part on success.
859 protected function _validateLocalPart($local_part)
861 $parts = explode('.', $local_part);
864 // Split the local_part into words.
865 while (count($parts) > 0) {
866 $words[] = $this->_splitCheck($parts, '.');
867 for ($i = 0; $i < $this->index + 1; $i++) {
872 // Validate each word.
873 foreach ($words as $word) {
874 // word cannot be empty (#17317)
878 // If this word contains an unquoted space, it is invalid. (6.2.4)
879 if (strpos($word, ' ') && $word[0] !== '"')
884 if ($this->_validatePhrase(trim($word)) === false) return false;
887 // Managed to get here, so return the input.
892 * Returns an approximate count of how many addresses are in the
893 * given string. This is APPROXIMATE as it only splits based on a
894 * comma which has no preceding backslash. Could be useful as
895 * large amounts of addresses will end up producing *large*
896 * structures when used with parseAddressList().
898 * @param string $data Addresses to count
899 * @return int Approximate count
901 public function approximateCount($data)
903 return count(preg_split('/(?<!\\\\),/', $data));
907 * This is a email validating function separate to the rest of the
908 * class. It simply validates whether an email is of the common
909 * internet form: <user>@<domain>. This can be sufficient for most
910 * people. Optional stricter mode can be utilised which restricts
911 * mailbox characters allowed to alphanumeric, full stop, hyphen
914 * @param string $data Address to check
915 * @param boolean $strict Optional stricter mode
916 * @return mixed False if it fails, an indexed array
917 * username/domain if it matches
919 public function isValidInetAddress($data, $strict = false)
921 $regex = $strict ? '/^([.0-9a-z_+-]+)@(([0-9a-z-]+\.)+[0-9a-z]{2,})$/i' : '/^([*+!.&#$|\'\\%\/0-9a-z^_`{}=?~:-]+)@(([0-9a-z-]+\.)+[0-9a-z]{2,})$/i';
922 if (preg_match($regex, trim($data), $matches)) {
923 return array($matches[1], $matches[2]);