Hmmm... as expected, there is no clear winner.
Compiled with from the command line using "Visual C# 2008 Compiler version
3.5.21022.8 " with /o.
Run on an AMD Phenom 9500 quad-core 2.2GHz.
Iterations: 5000
Data elements: 5000
Data range: 65536
Tests to run: 1, 1, 1, 1, 1, 2, 2, 2, 2, 2
_HistDict2: 00:00:13.2856168
_HistDict2: 00:00:13.2247140
_HistDict2: 00:00:13.2682774
_HistDict2: 00:00:13.2376446
_HistDict2: 00:00:13.2230366
_HistDict3: 00:00:13.3116609
_HistDict3: 00:00:13.2944652
_HistDict3: 00:00:13.2405608
_HistDict3: 00:00:13.2495046
_HistDict3: 00:00:13.2582542
Validating results...done.
Willy.
Nothing would make me happier than to have another pair of eyes look at
the benchmark and explain my observations (if only to show that I did
something wrong in timing the code).
I don't have handy access to the code right now, but I'll post when I do.
Fair warning: I don't have time to clean it up and it was written in
pursuit of a different question. (Here's the context:
http://groups.google.com/group/micr...5a45ca37ee0/4fc79d93deffc692#4fc79d93deffc692)
Okay Ben...have at it.
I've copied the basic test program below. In order to keep things brief,
I've deleted most of the tests that actually existed in the original
program, leaving just the two relevant ones.
On my computer, using a command line of "TestHistogramBrief.exe 5000 5000
-1 1 1 1 1 1 2 2 2 2 2" to run each test five times, I get the following
output (_HistDict2 uses Contains(), _HistDict3 uses TryGetValue()):
_HistDict2: 00:00:14.9067082
_HistDict2: 00:00:14.1235553
_HistDict2: 00:00:13.6588232
_HistDict2: 00:00:14.0332028
_HistDict2: 00:00:13.6914594
_HistDict3: 00:00:15.3459849
_HistDict3: 00:00:14.7904829
_HistDict3: 00:00:15.2897546
_HistDict3: 00:00:14.9534329
_HistDict3: 00:00:15.4209609
Tossing out the high and low values, averaging the remaining three for
each, I get 13.949406 for the Contains() version, and 15.196391 for the
TryGetValue() version.
The difference is relatively slight (just over 8% faster for Contains()),
and as you can see the actual times vary a bit. But for me, it looks like
a reasonably reliable result, and sometimes an 8% difference is important.
And no, I don't understand the difference. If you (or anyone else) can
shed some light, I'm all ears. Even if all that it turns out to be is
some mistake in my implementation.
Pete
using System;
using System.Collections.Generic;
using System.Text;
using System.Diagnostics;
using System.IO;
namespace TestHistogram
{
class Program
{
static int imaxRange = 65536;
const int ctestMax = 2;
struct HistPair
{
public int value;
public int count;
public HistPair(int v, int c)
{
value = v;
count = c;
}
}
class TestScenario
{
private readonly int _citer;
private readonly int _cvalues;
private readonly int _imaxRange;
private readonly int[] _rgiValues;
private readonly Stopwatch _sw = new Stopwatch();
public TestScenario(int citer, int cvalues, int imaxRange)
{
_citer = citer;
_cvalues = cvalues;
_imaxRange = imaxRange;
Random rnd = new Random(0);
_rgiValues = new int[_cvalues];
for (int ivalue = 0; ivalue < _cvalues; ivalue++)
{
_rgiValues[ivalue] = rnd.Next(_imaxRange);
}
}
public HistPair[] RghpRunTest(int itest)
{
HistPair[] rghpRet = null;
switch (itest)
{
case 1:
rghpRet = _RghpRunHptest(_HistDict2);
break;
case 2:
rghpRet = _RghpRunHptest(_HistDict3);
break;
default:
Console.WriteLine("invalid test selected ("
+ itest.ToString() + ")");
break;
}
return rghpRet;
}
private delegate HistPair[] HptestDelegate(int[] rgi);
private HistPair[] _RghpRunHptest(HptestDelegate hptd)
{
HistPair[] rghpRet = null;
GC.Collect();
GC.WaitForPendingFinalizers();
_sw.Reset();
_sw.Start();
for (int iiter = 0; iiter < _citer; iiter++)
{
rghpRet = hptd(_rgiValues);
}
_sw.Stop();
Console.WriteLine(hptd.Method.Name + ": "
+ _sw.Elapsed.ToString());
return rghpRet;
}
// HistDict1, but using ContainsKey() to check for invalid keys
private HistPair[] _HistDict2(int[] rgiValues)
{
Dictionary<int, int> dict = new Dictionary<int, int>();
foreach (int value in rgiValues)
{
if (dict.ContainsKey(value))
{
dict[value] = dict[value] + 1;
}
else
{
dict[value] = 1;
}
}
int ihp;
HistPair[] rghpRet = new HistPair[dict.Count];
ihp = 0;
foreach (KeyValuePair<int, int> kvp in dict)
{
rghpRet[ihp++] = new HistPair(kvp.Key, kvp.Value);
}
Array.Sort(rghpRet, delegate(HistPair hpA, HistPair hpB)
{
return hpB.count.CompareTo(hpA.count);
});
return rghpRet;
}
// HistDict1, but using TryGetValue to check for invalid keys
and
// retrieve value at the same time
private HistPair[] _HistDict3(int[] rgiValues)
{
Dictionary<int, int> dict = new Dictionary<int, int>();
foreach (int value in rgiValues)
{
int count;
if (dict.TryGetValue(value, out count))
{
dict[value] = count + 1;
}
else
{
dict[value] = 1;
}
}
int ihp;
HistPair[] rghpRet = new HistPair[dict.Count];
ihp = 0;
foreach (KeyValuePair<int, int> kvp in dict)
{
rghpRet[ihp++] = new HistPair(kvp.Key, kvp.Value);
}
Array.Sort(rghpRet, delegate(HistPair hpA, HistPair hpB)
{
return hpB.count.CompareTo(hpA.count);
});
return rghpRet;
}
}
static void Main(string[] args)
{
int citer = 10000;
int cvalues = 10000;
List<int> rgitest = new List<int>();
switch (args.Length)
{
default:
goto case 4;
case 4:
for (int iarg = 3; iarg < args.Length; iarg++)
{
int itest;
if (!int.TryParse(args[iarg], out itest))
{
rgitest = null;
break;
}
rgitest.Add(itest);
}
goto case 3;
case 3:
ArgInt(args[2], ref imaxRange);
goto case 2;
case 2:
ArgInt(args[1], ref cvalues);
goto case 1;
case 1:
ArgInt(args[0], ref citer);
goto case 0;
case 0:
break;
}
Console.Error.WriteLine("Iterations: " + citer.ToString());
Console.Error.WriteLine("Data elements: "
+ cvalues.ToString());
Console.Error.WriteLine("Data range: " + imaxRange.ToString());
Console.Error.Write("Tests to run: ");
if (rgitest != null)
{
bool fFirst = true;
foreach (int itest in rgitest)
{
if (!fFirst)
{
Console.Error.Write(", ");
}
fFirst = false;
Console.Error.Write(itest.ToString());
}
}
else
{
rgitest = new List<int>(ctestMax);
for (int itest = 1; itest <= ctestMax; itest++)
{
rgitest.Add(itest);
}
Console.Error.Write("all tests (1-" + ctestMax.ToString()
+ ")");
}
Console.Error.WriteLine(Environment.NewLine);
TestScenario tests = new TestScenario(citer, cvalues,
imaxRange);
List<HistPair[]> rgrghpResults = new List<HistPair[]>();
foreach (int itest in rgitest)
{
HistPair[] rghpT = tests.RghpRunTest(itest);
if (rghpT != null)
{
rgrghpResults.Add(rghpT);
}
}
ValidateResults(rgrghpResults);
if (Debugger.IsAttached)
{
Console.Error.WriteLine("Press Enter/Return to exit");
Console.ReadLine();
}
}
static void ArgInt(string strArg, ref int iarg)
{
int iT;
if (int.TryParse(strArg, out iT) && iT > 0)
{
iarg = iT;
}
}
static void ValidateResults(List<HistPair[]> rgrghp)
{
Console.Error.WriteLine();
Console.Error.Write("Validating results...");
// First check each array individually for correctness
foreach (HistPair[] rghp in rgrghp)
{
int countPrev = rghp[0].count;
for (int ihp = 1; ihp < rghp.Length; ihp++)
{
if (countPrev < rghp[ihp].count)
{
Console.Error.WriteLine("array order error! index
" + ihp.ToString() +
" has a count (" + rghp[ihp].count.ToString() +
") greater than previous element ("
+ countPrev.ToString() + ")");
break;
}
countPrev = rghp[ihp].count;
}
// Reorder the array in a consistent way for later
comparison
Array.Sort(rghp, delegate(HistPair hpA, HistPair hpB)
{
if (hpA.count == hpB.count)
{
return hpB.value.CompareTo(hpA.value);
}
return hpB.count.CompareTo(hpA.count);
});
}
// Then compare each array to each other
for (int irghp = 0; irghp < rgrghp.Count; irghp++)
{
int irghpNext = (irghp + 1) % rgrghp.Count;
HistPair[] rghp = rgrghp[irghp],
rghpNext = rgrghp[irghpNext];
if (rghp.Length != rghpNext.Length)
{
Console.Error.WriteLine("array length mismatch! array
#" +
irghp.ToString() + " has "
+ rghp.Length.ToString() +
" elements, array #" + irghpNext.ToString() + "
has " +
rghpNext.Length.ToString() + " elements");
continue;
}
for (int ihp = 0; ihp < rghp.Length; ihp++)
{
if (rghp[ihp].value != rghpNext[ihp].value)
{
Console.Error.WriteLine("element value mismatch!
index " +
ihp.ToString() + ", array #"
+ irghp.ToString() +
": " + rghp[ihp].value + ", array #"
+ irghpNext.ToString() +
": " + rghpNext[ihp].value);
break;
}
if (rghp[ihp].count != rghpNext[ihp].count)
{
Console.Error.WriteLine("element count mismatch!
index " +
ihp.ToString() + ", array #"
+ irghp.ToString() +
": " + rghp[ihp].count + ", array #"
+ irghpNext.ToString() +
": " + rghpNext[ihp].count);
break;
}
}
}
Console.Error.WriteLine("done.");
}
}
}