C
Christopher Van Kirk
Hi all,
I have a need to order a collection of byte arrays and I'm looking for
a smart way to improve the performance. One optimization that I am
aware of for comparing byte arrays for equality purposes is to convert
the array of bytes into an array of integers and compare each integer
one at a time. This improves performance by about 4 times, but it only
lets you determine whether the two byte arrays are equal. Sample code
for this optimization is here:
public unsafe static bool Compare(byte[] b1, byte[] b2)
{
if( b1 == null && b2 == null )
return true;
if( b1 == null || b2 == null )
return false;
if ( b1.Length != b2.Length )
return false;
int ints = b1.Length / 4,
bytes = b1.length % 4,
byteStart = ints * 4;
fixed (byte* bp1 = b1)
{
fixed (byte* bp2 = b2)
{
int* ip1 = (int*)bp1;
int* ip2 = (int*)bp2;
for (int i = 0; i < ints; i++)
{
if (ip2 != ip1)
return false;
}
}
}
for( int i = byteStart; i < bytes; i++ )
{
if( b1 != b2 )
{
return false;
}
}
return true;
}
For ordering purposes I need to know which array is greater than the
other, and this adaptation of the above function doesn't seem to work:
public unsafe static int Compare(byte[] b1, byte[] b2)
{
if( b1 == null && b2 == null )
return 0;
if( b1 == null )
return 1;
if( b2 == null )
return -1;
if ( b1.Length < b2.Length )
return -1;
if ( b1.Length < b2.Length )
return 1;
int ints = b1.Length / 4,
bytes = b1.length % 4,
byteStart = ints * 4,
result = 0;
fixed (byte* bp1 = b1)
{
fixed (byte* bp2 = b2)
{
int* ip1 = (int*)bp1;
int* ip2 = (int*)bp2;
for (int i = 0; i < ints; i++)
{
result = ip2 - ip1;
if( result != 0 )
{
break;
}
}
}
}
if( result != 0 )
{
for( int i = byteStart; i < bytes; i++ )
{
result = b2 - b1;
if( result == 0 )
{
break;
}
}
}
return result;
}
The problem here is that the int subtraction isnt equivalent to
subtraction of an array of four bytes. Is there a better way to
achieve the performance potential of integer conversion with an
ordering comparison output?
One thing I've considered is using the equality approach above to find
a four byte series that mismatch and then do up to four individual
byte compares to find out from which byte the difference begins, as
follows:
public unsafe static int Compare(byte[] b1, byte[] b2)
{
// If both are null, they're equal
if (b1 == null && b2 == null)
{
return 0;
}
// If either but not both are null, they're not equal
if (b1 == null)
{
return -1;
}
if (b2 == null)
{
return 1;
}
if (b1.Length < b2.Length)
{
return -1;
}
if (b1.Length > b2.Length)
{
return 1;
}
int count = b1.Length,
words = count / sizeof(int),
bytes = count % sizeof(int);
fixed (byte* pLeft = b1)
{
byte* pLeftCursor = pLeft;
fixed (byte* pRight = b2)
{
byte* pRightCursor = pRight;
for (int i = 0; i < words; i++)
{
if( *((int*)pLeftCursor) !=
*((int*)pRightCursor) )
{
for (int j = 0; j < sizeof(int); j++)
{
if( *pRightCursor != *pLeftCursor )
{
return *pRightCursor -
*pLeftCursor;
}
pLeftCursor += sizeof(byte);
pRightCursor += sizeof(byte);
}
}
pLeftCursor += sizeof(int);
pRightCursor += sizeof(int);
}
if (bytes > 0)
{
for (int i = 0; i < bytes; i++)
{
if( *pRightCursor != *pLeftCursor )
{
return *pRightCursor - *pLeftCursor;;
}
pLeftCursor += sizeof(byte);
pRightCursor += sizeof(byte);
}
}
return result;
}
}
}
This is what I'm using now. I'm hoping theres an even better way that
I'm not thinking of. Anyone have any ideas?
I have a need to order a collection of byte arrays and I'm looking for
a smart way to improve the performance. One optimization that I am
aware of for comparing byte arrays for equality purposes is to convert
the array of bytes into an array of integers and compare each integer
one at a time. This improves performance by about 4 times, but it only
lets you determine whether the two byte arrays are equal. Sample code
for this optimization is here:
public unsafe static bool Compare(byte[] b1, byte[] b2)
{
if( b1 == null && b2 == null )
return true;
if( b1 == null || b2 == null )
return false;
if ( b1.Length != b2.Length )
return false;
int ints = b1.Length / 4,
bytes = b1.length % 4,
byteStart = ints * 4;
fixed (byte* bp1 = b1)
{
fixed (byte* bp2 = b2)
{
int* ip1 = (int*)bp1;
int* ip2 = (int*)bp2;
for (int i = 0; i < ints; i++)
{
if (ip2 != ip1)
return false;
}
}
}
for( int i = byteStart; i < bytes; i++ )
{
if( b1 != b2 )
{
return false;
}
}
return true;
}
For ordering purposes I need to know which array is greater than the
other, and this adaptation of the above function doesn't seem to work:
public unsafe static int Compare(byte[] b1, byte[] b2)
{
if( b1 == null && b2 == null )
return 0;
if( b1 == null )
return 1;
if( b2 == null )
return -1;
if ( b1.Length < b2.Length )
return -1;
if ( b1.Length < b2.Length )
return 1;
int ints = b1.Length / 4,
bytes = b1.length % 4,
byteStart = ints * 4,
result = 0;
fixed (byte* bp1 = b1)
{
fixed (byte* bp2 = b2)
{
int* ip1 = (int*)bp1;
int* ip2 = (int*)bp2;
for (int i = 0; i < ints; i++)
{
result = ip2 - ip1;
if( result != 0 )
{
break;
}
}
}
}
if( result != 0 )
{
for( int i = byteStart; i < bytes; i++ )
{
result = b2 - b1;
if( result == 0 )
{
break;
}
}
}
return result;
}
The problem here is that the int subtraction isnt equivalent to
subtraction of an array of four bytes. Is there a better way to
achieve the performance potential of integer conversion with an
ordering comparison output?
One thing I've considered is using the equality approach above to find
a four byte series that mismatch and then do up to four individual
byte compares to find out from which byte the difference begins, as
follows:
public unsafe static int Compare(byte[] b1, byte[] b2)
{
// If both are null, they're equal
if (b1 == null && b2 == null)
{
return 0;
}
// If either but not both are null, they're not equal
if (b1 == null)
{
return -1;
}
if (b2 == null)
{
return 1;
}
if (b1.Length < b2.Length)
{
return -1;
}
if (b1.Length > b2.Length)
{
return 1;
}
int count = b1.Length,
words = count / sizeof(int),
bytes = count % sizeof(int);
fixed (byte* pLeft = b1)
{
byte* pLeftCursor = pLeft;
fixed (byte* pRight = b2)
{
byte* pRightCursor = pRight;
for (int i = 0; i < words; i++)
{
if( *((int*)pLeftCursor) !=
*((int*)pRightCursor) )
{
for (int j = 0; j < sizeof(int); j++)
{
if( *pRightCursor != *pLeftCursor )
{
return *pRightCursor -
*pLeftCursor;
}
pLeftCursor += sizeof(byte);
pRightCursor += sizeof(byte);
}
}
pLeftCursor += sizeof(int);
pRightCursor += sizeof(int);
}
if (bytes > 0)
{
for (int i = 0; i < bytes; i++)
{
if( *pRightCursor != *pLeftCursor )
{
return *pRightCursor - *pLeftCursor;;
}
pLeftCursor += sizeof(byte);
pRightCursor += sizeof(byte);
}
}
return result;
}
}
}
This is what I'm using now. I'm hoping theres an even better way that
I'm not thinking of. Anyone have any ideas?