A
Alexander Muylaert
Hi
I've made this small routine that performs a case insensitive
String.IndexOf().
It is badly coded, but still it is 30% faster than the "ToUpper" equivalent.
Perhaps someone can give me some more tips. I'm pretty new on C#. It is
only my second day. I think major performance is gained from removing
boundary checks from arrays etc.
Kind regards
Alexander
private void button4_Click(object sender, System.EventArgs e)
{
int D = Environment.TickCount;
int Cnt = 0;
for (int I = 0; I < 100000; I++){
if (textBox1.Text.ToUpper().IndexOf(textBox2.Text.ToUpper()) > 0) Cnt++;
}
D = Environment.TickCount - D;
listBox1.Items.Add("IndexOf: " + D.ToString() + " Hits: " + Cnt.ToString());
}
private char[] MapTable = new char[] {
/* basic ascii set */
'\x00','\x01','\x02','\x03','\x04','\x05','\x06','\x07','\x08','\x09','\x0A'
,'\x0B','\x0C','\x0D','\x0E','\x0F',
'\x10','\x11','\x12','\x13','\x14','\x15','\x16','\x17','\x18','\x19','\x1A'
,'\x1B','\x1C','\x1D','\x1E','\x1F',
' '
,'\x21','\x22','\x23','\x24','\x25','\x26','\x27','\x28','\x29','\x2A','\x2B
','\x2C','\x2D','\x2E','\x2F',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'\x3A','\x3B','\x3C','\x3D','\x3E','\x3F',
'\x40','A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
'O',
'P', 'Q', 'R', 'S' , 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'\x5B','\x5C','\x5D','\x5E','\x5F',
'\x60','A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
'O',
'P', 'Q', 'R', 'S' , 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'\x7B','\x7C','\x7D','\x7E','\x7F',
/*Extended set*/
'C', 'U', 'E' , 'A' , 'A' , 'A' , 'A' , 'C', 'E', 'E', 'E' , 'I', 'I', 'I',
'A', 'A',
'E', '\x91','\x92','O', 'O', 'O', 'U', 'U', '\x98', 'O', 'U',
'\x9B','\x8C','\x8D','\x8E', 'A',
'I', 'O', 'U', 'N', 'N',
'\xA5','\xA6','\xA7','\xA8','\xA9','\xAA','\xAB','\xAC','\xAD','\xAE','\xAF'
,
'\xB0','\xB1','\xB2','\xB2','\xA5','\xB5','\xB6','\xB7','\xA8','\xB9','\xAA'
,'\xBB','\xA3','\xBD','\xBD','\xAF',
'\xC0','\xC1','\xC2','\xC3','\xC4','\xC5','\xC6','\xC7','\xC8','\xC9','\xCA'
,'\xCB','\xCC','\xCD','\xCE','\xCF',
'\xD0','\xD1','\xD2','\xD3','\xD4','\xD5','\xD6','\xD7','\xD8','\xD9','\xDA'
,'\xDB','\xDC','\xDD','\xDE','\xDF',
'\xC0','\xC1','\xC2','\xC3','\xC4','\xC5','\xC6','\xC7','\xC8','\xC9','\xCA'
,'\xCB','\xCC','\xCD','\xCE','\xCF',
'\xD0','\xD1','\xD2','\xD3','\xD4','\xD5','\xD6','\xD7','\xD8','\xD9','\xDA'
,'\xDB','\xDC','\xDD','\xDE','\xDF'};
private int BruteSearch(string SourceString, string SubString){
if ((SubString.Length > 0) && (SubString.Length <= SourceString.Length)) {
char SourceC;
char SubC;
char StartC = SubString[0];
char StartCMapped = (StartC <= '\xFF')? MapTable[StartC] : StartC;
for (int I = 0; I < SourceString.Length - SubString.Length; I++) {
SourceC = SourceString;
if ((SourceC == StartC) ||
(SourceC == StartCMapped) ||
((SourceC <= '\xFF') && (MapTable[SourceC] == StartCMapped))){
bool found = true;
for (int J = 1; J < SubString.Length; J++) {
// we don't have to compare the first char again, we can start at J = 1
SubC = SubString[J];
SourceC = SourceString[I + J];
if (SubC != SourceC) {
if (SubC <= '\xFF') SubC = MapTable[SubC];
if (SourceC <= '\xFF') SourceC = MapTable[SourceC];
if (SourceC != SubC) {
found = false;
break;
}
}
}
if (found) return I;
}
}
}
return -1;
}
I've made this small routine that performs a case insensitive
String.IndexOf().
It is badly coded, but still it is 30% faster than the "ToUpper" equivalent.
Perhaps someone can give me some more tips. I'm pretty new on C#. It is
only my second day. I think major performance is gained from removing
boundary checks from arrays etc.
Kind regards
Alexander
private void button4_Click(object sender, System.EventArgs e)
{
int D = Environment.TickCount;
int Cnt = 0;
for (int I = 0; I < 100000; I++){
if (textBox1.Text.ToUpper().IndexOf(textBox2.Text.ToUpper()) > 0) Cnt++;
}
D = Environment.TickCount - D;
listBox1.Items.Add("IndexOf: " + D.ToString() + " Hits: " + Cnt.ToString());
}
private char[] MapTable = new char[] {
/* basic ascii set */
'\x00','\x01','\x02','\x03','\x04','\x05','\x06','\x07','\x08','\x09','\x0A'
,'\x0B','\x0C','\x0D','\x0E','\x0F',
'\x10','\x11','\x12','\x13','\x14','\x15','\x16','\x17','\x18','\x19','\x1A'
,'\x1B','\x1C','\x1D','\x1E','\x1F',
' '
,'\x21','\x22','\x23','\x24','\x25','\x26','\x27','\x28','\x29','\x2A','\x2B
','\x2C','\x2D','\x2E','\x2F',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'\x3A','\x3B','\x3C','\x3D','\x3E','\x3F',
'\x40','A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
'O',
'P', 'Q', 'R', 'S' , 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'\x5B','\x5C','\x5D','\x5E','\x5F',
'\x60','A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
'O',
'P', 'Q', 'R', 'S' , 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'\x7B','\x7C','\x7D','\x7E','\x7F',
/*Extended set*/
'C', 'U', 'E' , 'A' , 'A' , 'A' , 'A' , 'C', 'E', 'E', 'E' , 'I', 'I', 'I',
'A', 'A',
'E', '\x91','\x92','O', 'O', 'O', 'U', 'U', '\x98', 'O', 'U',
'\x9B','\x8C','\x8D','\x8E', 'A',
'I', 'O', 'U', 'N', 'N',
'\xA5','\xA6','\xA7','\xA8','\xA9','\xAA','\xAB','\xAC','\xAD','\xAE','\xAF'
,
'\xB0','\xB1','\xB2','\xB2','\xA5','\xB5','\xB6','\xB7','\xA8','\xB9','\xAA'
,'\xBB','\xA3','\xBD','\xBD','\xAF',
'\xC0','\xC1','\xC2','\xC3','\xC4','\xC5','\xC6','\xC7','\xC8','\xC9','\xCA'
,'\xCB','\xCC','\xCD','\xCE','\xCF',
'\xD0','\xD1','\xD2','\xD3','\xD4','\xD5','\xD6','\xD7','\xD8','\xD9','\xDA'
,'\xDB','\xDC','\xDD','\xDE','\xDF',
'\xC0','\xC1','\xC2','\xC3','\xC4','\xC5','\xC6','\xC7','\xC8','\xC9','\xCA'
,'\xCB','\xCC','\xCD','\xCE','\xCF',
'\xD0','\xD1','\xD2','\xD3','\xD4','\xD5','\xD6','\xD7','\xD8','\xD9','\xDA'
,'\xDB','\xDC','\xDD','\xDE','\xDF'};
private int BruteSearch(string SourceString, string SubString){
if ((SubString.Length > 0) && (SubString.Length <= SourceString.Length)) {
char SourceC;
char SubC;
char StartC = SubString[0];
char StartCMapped = (StartC <= '\xFF')? MapTable[StartC] : StartC;
for (int I = 0; I < SourceString.Length - SubString.Length; I++) {
SourceC = SourceString;
if ((SourceC == StartC) ||
(SourceC == StartCMapped) ||
((SourceC <= '\xFF') && (MapTable[SourceC] == StartCMapped))){
bool found = true;
for (int J = 1; J < SubString.Length; J++) {
// we don't have to compare the first char again, we can start at J = 1
SubC = SubString[J];
SourceC = SourceString[I + J];
if (SubC != SourceC) {
if (SubC <= '\xFF') SubC = MapTable[SubC];
if (SourceC <= '\xFF') SourceC = MapTable[SourceC];
if (SourceC != SubC) {
found = false;
break;
}
}
}
if (found) return I;
}
}
}
return -1;
}