: I've been using a Regular expression to test for valid email
: addresses. It looks like:
:
: \w+([-+.]\w+)*@\w+([-.]\w+)*\.\w+([-.]\w+)*
:
: I've now had 2 occassions where it has rejected and email address with
: a "&" character in the local part. I know I should be able to work it
: out myself, but I'd like to ask anyone to suggest the best way to
: update it to allow the "&" character.
The address format from RFC 2822 is pretty hairy and a tall (but doable)
order for a regular expression.
The CPAN has an Email::Address module that constructs a behemoth of a
regular expression from subpatterns as specified in RFC 2822:
http://search.cpan.org/src/CWEST/Email-Address-1.80/lib/Email/Address.pm
You could transliterate that Perl code to C#, but you'd have to be
careful to avoid subtleties of semantics. A less sophisticated, albeit
quicker in terms of development time, approach is to dump the regular
expression from Perl and include it literally in C# code.
Looking at the code below may cause you to rethink your approach. Are
you trying to validate addresses? If so, you might apply a very
forgiving heuristic and then attempt to send to addresses that pass.
Your message should contain random data that the recipient can turn
around and repeat back to you to verify his address.
You can see where I left room using the abstract test pattern for you
to attempt an alternative that might hope to be maintainable.
I'm afraid she won't win many beauty contests.
Enjoy,
Greg
-------------------------------------------------------------
using System;
using System.Text.RegularExpressions;
using NUnit.Framework;
namespace Application
{
class App
{
[STAThread]
static void Main(string[] args)
{
SyntaxChecker check = new EmailAddress();
Console.WriteLine(check.WellFormed("foo@bar"));
}
}
public interface SyntaxChecker
{
bool WellFormed(string input);
}
public class EmailAddress : SyntaxChecker
{
// derivative of work with the following copyright and license:
// Copyright (c) 2004 Casey West. All rights reserved.
// This module is free software; you can redistribute it and/or
// modify it under the same terms as Perl itself.
// see
http://search.cpan.org/~cwest/Email-Address-1.80/
#region dumped regular expression
private static string gibberish = @"
(?-xism
?
?-xism
?-xism
?-xism
?-xism
?-xism
?-xism:\
s*\((?:\s*(?-xism
?-xism
?>[^()\\]+))|(?-xism:\\(?-xism:[^
\x0A\x0D]))|(?-xism:\s*\((?:\s*(?-xism
?-xism
?>[^()\\]+))
|(?-xism:\\(?-xism:[^\x0A\x0D]))|)+)*\s*\)\s*))+)*\s*\)\s*)+
|\s+)*[^\x00-\x1F\x7F()<>\[\]:;@\,.<DQ>\s]+(?-xism
?-xism:\
s*\((?:\s*(?-xism
?-xism
?>[^()\\]+))|(?-xism:\\(?-xism:[^
\x0A\x0D]))|(?-xism:\s*\((?:\s*(?-xism
?-xism
?>[^()\\]+))
|(?-xism:\\(?-xism:[^\x0A\x0D]))|)+)*\s*\)\s*))+)*\s*\)\s*)+
|\s+)*)|(?-xism
?-xism
?-xism:\s*\((?:\s*(?-xism
?-xism
?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|(?-xism:\s*\((?
:\s*(?-xism
?-xism
?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\x
0D]))|)+)*\s*\)\s*))+)*\s*\)\s*)+|\s+)*<DQ>(?-xism
?-xism:[
^\\<DQ>])|(?-xism:\\(?-xism:[^\x0A\x0D])))+<DQ>(?-xism
?-xi
sm:\s*\((?:\s*(?-xism
?-xism
?>[^()\\]+))|(?-xism:\\(?-xis
m:[^\x0A\x0D]))|(?-xism:\s*\((?:\s*(?-xism
?-xism
?>[^()\\
]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|)+)*\s*\)\s*))+)*\s*\)\
s*)+|\s+)*))+)?(?-xism
?-xism
?-xism:\s*\((?:\s*(?-xism
?
-xism
?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|(?-xism:
\s*\((?:\s*(?-xism
?-xism
?>[^()\\]+))|(?-xism:\\(?-xism:[
^\x0A\x0D]))|)+)*\s*\)\s*))+)*\s*\)\s*)+|\s+)*<(?-xism
?-xi
sm
?-xism
?-xism
?-xism:\s*\((?:\s*(?-xism
?-xism
?>[^(
)\\]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|(?-xism:\s*\((?:\s*(
?-xism
?-xism
?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))
|)+)*\s*\)\s*))+)*\s*\)\s*)+|\s+)*(?-xism:[^\x00-\x1F\x7F()<
\[\]:;@\,.<DQ>\s]+(?:\.[^\x00-\x1F\x7F()<>\[\]:;@\,.<DQ>\s] +)*)(?-xism
?-xism:\s*\((?:\s*(?-xism
?-xism
?>[^()\\]+))
|(?-xism:\\(?-xism:[^\x0A\x0D]))|(?-xism:\s*\((?:\s*(?-xism:
(?-xism
?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|)+)*\s
*\)\s*))+)*\s*\)\s*)+|\s+)*)|(?-xism
?-xism
?-xism:\s*\((?
:\s*(?-xism
?-xism
?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\x
0D]))|(?-xism:\s*\((?:\s*(?-xism
?-xism
?>[^()\\]+))|(?-xi
sm:\\(?-xism:[^\x0A\x0D]))|)+)*\s*\)\s*))+)*\s*\)\s*)+|\s+)*
<DQ>(?-xism
?-xism:[^\\<DQ>])|(?-xism:\\(?-xism:[^\x0A\x0D]
)))+<DQ>(?-xism
?-xism:\s*\((?:\s*(?-xism
?-xism
?>[^()\\
]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|(?-xism:\s*\((?:\s*(?-x
ism
?-xism
?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|)+
)*\s*\)\s*))+)*\s*\)\s*)+|\s+)*))\@(?-xism
?-xism
?-xism
?-xism:\s*\((?:\s*(?-xism
?-xism
?>[^()\\]+))|(?-xism:\\(?
-xism:[^\x0A\x0D]))|(?-xism:\s*\((?:\s*(?-xism
?-xism
?>[^
()\\]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|)+)*\s*\)\s*))+)*\s
*\)\s*)+|\s+)*(?-xism:[^\x00-\x1F\x7F()<>\[\]:;@\,.<DQ>\s]+(
?:\.[^\x00-\x1F\x7F()<>\[\]:;@\,.<DQ>\s]+)*)(?-xism
?-xism:
\s*\((?:\s*(?-xism
?-xism
?>[^()\\]+))|(?-xism:\\(?-xism:[
^\x0A\x0D]))|(?-xism:\s*\((?:\s*(?-xism
?-xism
?>[^()\\]+)
)|(?-xism:\\(?-xism:[^\x0A\x0D]))|)+)*\s*\)\s*))+)*\s*\)\s*)
+|\s+)*)|(?-xism
?-xism
?-xism:\s*\((?:\s*(?-xism
?-xism:
(?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|(?-xism:\s*\((
?:\s*(?-xism
?-xism
?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\
x0D]))|)+)*\s*\)\s*))+)*\s*\)\s*)+|\s+)*\[(?:\s*(?-xism
?-x
ism:[^\[\]\\])|(?-xism:\\(?-xism:[^\x0A\x0D])))+)*\s*\](?-xi
sm
?-xism:\s*\((?:\s*(?-xism
?-xism
?>[^()\\]+))|(?-xism:
\\(?-xism:[^\x0A\x0D]))|(?-xism:\s*\((?:\s*(?-xism
?-xism
?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|)+)*\s*\)\s*))+
)*\s*\)\s*)+|\s+)*)))>(?-xism
?-xism:\s*\((?:\s*(?-xism
?-
xism
?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|(?-xism:\
s*\((?:\s*(?-xism
?-xism
?>[^()\\]+))|(?-xism:\\(?-xism:[^
\x0A\x0D]))|)+)*\s*\)\s*))+)*\s*\)\s*)+|\s+)*))|(?-xism
?-x
ism
?-xism
?-xism
?-xism:\s*\((?:\s*(?-xism
?-xism
?>[^
()\\]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|(?-xism:\s*\((?:\s*
(?-xism
?-xism
?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\x0D])
)|)+)*\s*\)\s*))+)*\s*\)\s*)+|\s+)*(?-xism:[^\x00-\x1F\x7F()
<>\[\]:;@\,.<DQ>\s]+(?:\.[^\x00-\x1F\x7F()<>\[\]:;@\,.<DQ>\s
]+)*)(?-xism
?-xism:\s*\((?:\s*(?-xism
?-xism
?>[^()\\]+)
)|(?-xism:\\(?-xism:[^\x0A\x0D]))|(?-xism:\s*\((?:\s*(?-xism
?-xism
?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|)+)*\
s*\)\s*))+)*\s*\)\s*)+|\s+)*)|(?-xism
?-xism
?-xism:\s*\((
?:\s*(?-xism
?-xism
?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\
x0D]))|(?-xism:\s*\((?:\s*(?-xism
?-xism
?>[^()\\]+))|(?-x
ism:\\(?-xism:[^\x0A\x0D]))|)+)*\s*\)\s*))+)*\s*\)\s*)+|\s+)
*<DQ>(?-xism
?-xism:[^\\<DQ>])|(?-xism:\\(?-xism:[^\x0A\x0D
])))+<DQ>(?-xism
?-xism:\s*\((?:\s*(?-xism
?-xism
?>[^()\
\]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|(?-xism:\s*\((?:\s*(?-
xism
?-xism
?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|)
+)*\s*\)\s*))+)*\s*\)\s*)+|\s+)*))\@(?-xism
?-xism
?-xism:
(?-xism:\s*\((?:\s*(?-xism
?-xism
?>[^()\\]+))|(?-xism:\\(
?-xism:[^\x0A\x0D]))|(?-xism:\s*\((?:\s*(?-xism
?-xism
?>[
^()\\]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|)+)*\s*\)\s*))+)*\
s*\)\s*)+|\s+)*(?-xism:[^\x00-\x1F\x7F()<>\[\]:;@\,.<DQ>\s]+
(?:\.[^\x00-\x1F\x7F()<>\[\]:;@\,.<DQ>\s]+)*)(?-xism
?-xism
:\s*\((?:\s*(?-xism
?-xism
?>[^()\\]+))|(?-xism:\\(?-xism:
[^\x0A\x0D]))|(?-xism:\s*\((?:\s*(?-xism
?-xism
?>[^()\\]+
))|(?-xism:\\(?-xism:[^\x0A\x0D]))|)+)*\s*\)\s*))+)*\s*\)\s*
)+|\s+)*)|(?-xism
?-xism
?-xism:\s*\((?:\s*(?-xism
?-xism
?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|(?-xism:\s*\(
(?:\s*(?-xism
?-xism
?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A
\x0D]))|)+)*\s*\)\s*))+)*\s*\)\s*)+|\s+)*\[(?:\s*(?-xism
?-
xism:[^\[\]\\])|(?-xism:\\(?-xism:[^\x0A\x0D])))+)*\s*\](?-x
ism
?-xism:\s*\((?:\s*(?-xism
?-xism
?>[^()\\]+))|(?-xism
:\\(?-xism:[^\x0A\x0D]))|(?-xism:\s*\((?:\s*(?-xism
?-xism:
(?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|)+)*\s*\)\s*))
+)*\s*\)\s*)+|\s+)*))))(?-xism:\s*\((?:\s*(?-xism
?-xism
?
[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|(?-xism:\s*\((?:
\s*(?-xism
?-xism
?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\x0
D]))|)+)*\s*\)\s*))+)*\s*\)\s*)*)"
.Replace("<DQ>", "\"")
.Replace("\t", "")
.Replace(" ", "")
.Replace("\r", "")
.Replace("\n", "");
#endregion
private static Regex mailbox =
new Regex(gibberish, RegexOptions.ExplicitCapture);
public bool WellFormed(string address)
{
return mailbox.IsMatch(address);
}
}
[TestFixture]
public abstract class Test
{
protected SyntaxChecker checker = null;
public abstract SyntaxChecker CreateChecker();
[SetUp]
public void SetUp()
{
checker = CreateChecker();
}
[Test]
public void SimpleAddress()
{
Assert.IsTrue(checker.WellFormed("(e-mail address removed)"));
}
[Test]
public void MustHaveLocalAndDomain()
{
Assert.IsFalse(checker.WellFormed("justlocal"));
}
[Test]
public void AmpersandInLocalPart()
{
Assert.IsTrue(checker.WellFormed("\"foo & bar\"@example.com"));
}
[Test]
public void PhraseAndAngles()
{
Assert.IsTrue(checker.WellFormed("Joe <
[email protected]>"));
}
[Test]
public void AddressAndComment()
{
Assert.IsTrue(checker.WellFormed("(e-mail address removed) (Joe)"));
}
}
[TestFixture]
public class TestUsingMachineDump : Test
{
public override SyntaxChecker CreateChecker()
{
return new EmailAddress();
}
}
}