Regex performance issue

G

Guest

Hi all,

Sorry for the lengthy post but as I learned I should post
concise-and-complete code.

So the code belows shows that the execution of ValidateAddress consumes a
lot of time. In the test it is called a 100 times but in my real app it
could be called 50000 or more times.

So my question is if it is somehow possible to speed this up and if so how
this can be done.

Thanks a lot in advance,

Bart

------ Code -----

using System;
using System.Text.RegularExpressions;

namespace ValidateAddress_speed_test
{
class Program
{
#region Regular expression strings

private const string dbBoolAddress_pattern =
@"^(DB)([0-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-6][0-6][0-5][0-3][0-6])(\.)(DBX)([0-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-6][0-6][0-5][0-3][0-6])(\.)[0-7]$";
private const string dbMemAddress_pattern =
@"^(DB)([0-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-6][0-6][0-5][0-3][0-6])(\.)(DBB|DBW|DBD|DBR)([0-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-6][0-6][0-5][0-3][0-6])$";

private const string boolAddress_pattern =
@"^(M|E|A)([0-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-6][0-6][0-5][0-3][0-6])(\.)[0-7]$";
private const string memAddress_pattern =
@"^(EB|EW|ED|AB|AW|AD|MB|MW|MD|MR)([0-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-6][0-6][0-5][0-3][0-6])$";

#endregion

private static void ValidateAddress(string address)
{
if (address == string.Empty)
throw new ArgumentOutOfRangeException("The addres cannot be
an empty string.");


Regex dbBool_Regex = new Regex(dbBoolAddress_pattern,
RegexOptions.Compiled | RegexOptions.IgnoreCase);

if (dbBool_Regex.IsMatch(address))
return;

Regex dbMem_Regex = new Regex(dbMemAddress_pattern,
RegexOptions.Compiled | RegexOptions.IgnoreCase);

if (dbMem_Regex.IsMatch(address))
return;

Regex boolMem_Regex = new Regex(boolAddress_pattern,
RegexOptions.Compiled | RegexOptions.IgnoreCase);

if (boolMem_Regex.IsMatch(address))
return;

Regex Mem_Regex = new Regex(memAddress_pattern,
RegexOptions.Compiled | RegexOptions.IgnoreCase);

if (Mem_Regex.IsMatch(address))
return;


throw new ArgumentOutOfRangeException(string.Format("{0} is not
a valid address.", address));
}


static void Main(string[] args)
{
Console.WriteLine("Test started...");
System.Diagnostics.Stopwatch sw = new
System.Diagnostics.Stopwatch();
sw.Start();
for (int i = 0; i < 100; i++)
{
//ValidateAddress("DB0.DBX0.0");
//ValidateAddress("DB0.DBW0");
//ValidateAddress("M0.0");
ValidateAddress("MB0");
}

sw.Stop();
Console.WriteLine(sw.ElapsedMilliseconds.ToString() + " ms");
Console.WriteLine("Press any key to quit");
Console.ReadLine();
}
}
}
 
J

Jesse Houwing

Hello Bart,

In your validateAddress function you're recompiling the same regexes over
and over again. A compiled regex is faster than an uncompiled one, but the
compilation takes time.

To solve this, put your regexes in a private static readonly Regex instance
and reuse that. Like this:

private static readonly Regex dbBoolAddressRegex = new Regex(dbBoolAddress_pattern,
RegexOptions.Compiled | RegexOptions.IgnoreCase);

then from your validate method, use this instance.

Be sure to read up on thread safety. I'm nnot sure if you'll need to make
sure calls to the regex instances are synchronized. But that is something
you'll probably find in the docs, or which doesn't apply to you anyway.

Jesse
Hi all,

Sorry for the lengthy post but as I learned I should post
concise-and-complete code.

So the code belows shows that the execution of ValidateAddress
consumes a lot of time. In the test it is called a 100 times but in my
real app it could be called 50000 or more times.

So my question is if it is somehow possible to speed this up and if so
how this can be done.

Thanks a lot in advance,

Bart

------ Code -----

using System;
using System.Text.RegularExpressions;
namespace ValidateAddress_speed_test
{
class Program
{
#region Regular expression strings
private const string dbBoolAddress_pattern =
@"^(DB)([0-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-6][0-
6][0-5][0-3][0-6])(\.)(DBX)([0-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9
][0-9][0-9]|[1-6][0-6][0-5][0-3][0-6])(\.)[0-7]$";
private const string dbMemAddress_pattern =
@"^(DB)([0-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-6][0-
6][0-5][0-3][0-6])(\.)(DBB|DBW|DBD|DBR)([0-9]|[1-9][0-9]|[1-9][0-9][0-
9]|[1-9][0-9][0-9][0-9]|[1-6][0-6][0-5][0-3][0-6])$";
private const string boolAddress_pattern =
@"^(M|E|A)([0-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-6]
[0-6][0-5][0-3][0-6])(\.)[0-7]$";
private const string memAddress_pattern =
@"^(EB|EW|ED|AB|AW|AD|MB|MW|MD|MR)([0-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1
-9][0-9][0-9][0-9]|[1-6][0-6][0-5][0-3][0-6])$";
#endregion

private static void ValidateAddress(string address)
{
if (address == string.Empty)
throw new ArgumentOutOfRangeException("The addres
cannot be
an empty string.");
Regex dbBool_Regex = new Regex(dbBoolAddress_pattern,
RegexOptions.Compiled | RegexOptions.IgnoreCase);

if (dbBool_Regex.IsMatch(address))
return;
Regex dbMem_Regex = new Regex(dbMemAddress_pattern,
RegexOptions.Compiled | RegexOptions.IgnoreCase);

if (dbMem_Regex.IsMatch(address))
return;
Regex boolMem_Regex = new Regex(boolAddress_pattern,
RegexOptions.Compiled | RegexOptions.IgnoreCase);

if (boolMem_Regex.IsMatch(address))
return;
Regex Mem_Regex = new Regex(memAddress_pattern,
RegexOptions.Compiled | RegexOptions.IgnoreCase);

if (Mem_Regex.IsMatch(address))
return;
throw new ArgumentOutOfRangeException(string.Format("{0}
is not
a valid address.", address));
}
static void Main(string[] args)
{
Console.WriteLine("Test started...");
System.Diagnostics.Stopwatch sw = new
System.Diagnostics.Stopwatch();
sw.Start();
for (int i = 0; i < 100; i++)
{
//ValidateAddress("DB0.DBX0.0");
//ValidateAddress("DB0.DBW0");
//ValidateAddress("M0.0");
ValidateAddress("MB0");
}
sw.Stop();
Console.WriteLine(sw.ElapsedMilliseconds.ToString() + "
ms");
Console.WriteLine("Press any key to quit");
Console.ReadLine();
}
}
}
 
H

Hans Kesting

bart brought next idea :
Hi all,

Sorry for the lengthy post but as I learned I should post
concise-and-complete code.

So the code belows shows that the execution of ValidateAddress consumes a lot
of time. In the test it is called a 100 times but in my real app it could be
called 50000 or more times.

So my question is if it is somehow possible to speed this up and if so how
this can be done.

Thanks a lot in advance,

Bart

------ Code -----

using System;
using System.Text.RegularExpressions;

namespace ValidateAddress_speed_test
{
class Program
{
#region Regular expression strings

private const string dbBoolAddress_pattern =
@"^(DB)([0-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-6][0-6][0-5][0-3][0-6])(\.)(DBX)([0-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-6][0-6][0-5][0-3][0-6])(\.)[0-7]$";
private const string dbMemAddress_pattern =
@"^(DB)([0-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-6][0-6][0-5][0-3][0-6])(\.)(DBB|DBW|DBD|DBR)([0-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-6][0-6][0-5][0-3][0-6])$";

private const string boolAddress_pattern =
@"^(M|E|A)([0-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-6][0-6][0-5][0-3][0-6])(\.)[0-7]$";
private const string memAddress_pattern =
@"^(EB|EW|ED|AB|AW|AD|MB|MW|MD|MR)([0-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-6][0-6][0-5][0-3][0-6])$";

#endregion

private static void ValidateAddress(string address)
{
if (address == string.Empty)
throw new ArgumentOutOfRangeException("The addres cannot be
an empty string.");


Regex dbBool_Regex = new Regex(dbBoolAddress_pattern,
RegexOptions.Compiled | RegexOptions.IgnoreCase);

if (dbBool_Regex.IsMatch(address))
return;

Regex dbMem_Regex = new Regex(dbMemAddress_pattern,
RegexOptions.Compiled | RegexOptions.IgnoreCase);

if (dbMem_Regex.IsMatch(address))
return;

Regex boolMem_Regex = new Regex(boolAddress_pattern,
RegexOptions.Compiled | RegexOptions.IgnoreCase);

if (boolMem_Regex.IsMatch(address))
return;

Regex Mem_Regex = new Regex(memAddress_pattern,
RegexOptions.Compiled | RegexOptions.IgnoreCase);

if (Mem_Regex.IsMatch(address))
return;


throw new ArgumentOutOfRangeException(string.Format("{0} is not a
valid address.", address));
}


static void Main(string[] args)
{
Console.WriteLine("Test started...");
System.Diagnostics.Stopwatch sw = new
System.Diagnostics.Stopwatch();
sw.Start();
for (int i = 0; i < 100; i++)
{
//ValidateAddress("DB0.DBX0.0");
//ValidateAddress("DB0.DBW0");
//ValidateAddress("M0.0");
ValidateAddress("MB0");
}

sw.Stop();
Console.WriteLine(sw.ElapsedMilliseconds.ToString() + " ms");
Console.WriteLine("Press any key to quit");
Console.ReadLine();
}
}
}

Compiling a regex will cost some time, saving a bit when you use it.
The best way (I think) to use a compiled regex:
Make a static readonly Regex variable with that compiled expression,
then use it multiple times.
This means you get the compile-cost just once and the speed benefit
(which in my experience is not huge but still present) every time.

Hans Kesting
 
M

Marc Gravell

Further to Jesse's point - the Regex class is itself immutable; it is
my /understanding/ that methods like IsMatch etc are thread-safe. MSDN
doesn't make it very clear, though.

Marc
 
G

Guest

To solve this, put your regexes in a private static readonly Regex
instance and reuse that. Like this:

private static readonly Regex dbBoolAddressRegex = new
Regex(dbBoolAddress_pattern, RegexOptions.Compiled |
RegexOptions.IgnoreCase);

Thanks,

This is a huge performance boost :)

A 100000 times takes now about 763 ms

So this is great...

Bart
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Top