N
Norman Diamond
Since my application uses the Excel driver to read some files, I used the
same Excel driver to write some CSV files. I also tried using the Text
driver to write CSV files. The results are correct but they take enormous
amounts of CPU time.
Of course I know how to write a CSV file using plain old Shift-JIS character
strings and quotation marks and commas, and probably I'll do that in order
to speed up this operation, but the question still remains.
Why is this so slow? The sample data table had 213 rows (header plus 212
data rows) and 25 columns, all strings (some of them empty strings). The
total file size on disk is 34KB. The computations and database operations
in memory take a few milliseconds, not even noticeable when running under a
debugger. But the call to
dataAdapter.Update(dataTable);
takes 55 SECONDS OF CPU TIME on a Pentium 4 running at 3 GHz. It occupies
100% of one CPU core for 55 seconds.
Actual time to write the file might be a few hundred milliseconds since 34KB
occupies several NTFS structures. Anyway, this thing isn't disk bound, and
it's not CPU bound in my code, it's CPU bound in the Update method.
What is going on here?
#undef UseOdbc // Use OleDb
string fileName = @"C:\test.csv"; // (not really)
string Headers[] = new string[25] { "1", "2", "3", /* ... */ "25" };
int columnCount = 25; // (not really)
FileInfo fileInfo = new System.IO.FileInfo(fileName);
string dirName = fileInfo.DirectoryName;
string tableName = fileInfo.Name;
#if UseOdbc
OdbcConnection connection = new OdbcConnection(
"Provider=MSDASQL;" +
"DRIVER={Microsoft Text Driver (*.txt; *.csv)};DBQ=" + dirName +
";Extended Properties='Text;Extensions=asc,csv,tab,txt;" +
"HDR=Yes;FMT=Delimited'");
connection.Open();
OdbcDataAdapter dataAdapter = new OdbcDataAdapter(
"SELECT * FROM [" + tableName + "]", connection);
OdbcCommand insertCmd = new OdbcCommand();
OdbcType varcharType = OdbcType.VarChar;
#else // OleDb
OleDbConnection connection = new OleDbConnection(
"Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + dirName +
";Extended Properties='Text;Extensions=asc,csv,tab,txt;" +
"HDR=Yes;FMT=Delimited'");
connection.Open();
OleDbDataAdapter dataAdapter = new OleDbDataAdapter(
"SELECT * FROM [" + tableName + "]", connection);
OleDbCommand insertCmd = new OleDbCommand();
OleDbType varcharType = OleDbType.VarChar;
#endif // OleDb
DataTable dataTable = new DataTable(tableName);
DataColumnCollection dataColumns = dataTable.Columns;
int columnNum;
string dataColumnName;
DataColumn dataColumn;
StringBuilder insertCmdHead = new StringBuilder(
"INSERT INTO [" + tableName + "] (");
StringBuilder insertCmdTail = new StringBuilder("VALUES (");
StringBuilder fileHeader = new StringBuilder();
for (columnNum = 0; columnNum < columnCount - 1; columnNum++)
{
dataColumnName = Headers[columnNum];
dataColumnName = dataColumnName.Replace('\n', ' ');
dataColumn = dataColumns.Add(dataColumnName);
dataColumn.DataType = typeof(string);
dataColumn.DefaultValue = "";
insertCmdHead.Append("[" + dataColumnName + "], ");
insertCmdTail.Append("?, ");
insertCmd.Parameters.Add(dataColumnName, varcharType, 255,
dataColumnName);
fileHeader.Append("\"" + dataColumnName + "\",");
}
dataColumnName = Headers[columnCount - 1];
dataColumn = dataColumns.Add(dataColumnName);
dataColumn.DataType = typeof(string);
dataColumn.DefaultValue = "";
insertCmdHead.Append("[" + dataColumnName + "]) ");
insertCmdTail.Append("?)");
insertCmd.Parameters.Add(dataColumnName, varcharType, 255,
dataColumnName);
insertCmd.CommandText = insertCmdHead.ToString() +
insertCmdTail.ToString();
insertCmd.Connection = connection;
dataAdapter.InsertCommand = insertCmd;
fileHeader.Append("\"" + dataColumnName + "\"");
for (int rowNum = 0; rowNum < 212; rowNum++) // (not really)
{
DataRow dataRow = dataTable.NewRow();
dataRow[2] = "hi i'm 2"; // (not really)
dataRow[18] = "18"; // (not really)
dataRow[19] = "19"; // (not really)
// (around half the cells default to empty strings)
dataTable.Rows.Add(dataRow);
}
StreamWriter fileWriter = new StreamWriter(fileInfo.Create(),
Encoding.Default);
fileWriter.WriteLine(fileHeader.ToString());
fileWriter.Close();
fileWriter.Dispose();
// UP TO THIS POINT TAKES A FEW MILLISECONDS, OK
//
// 55 SECONDS OF CPU TIME (Pentium 4 3 GHz) TO WRITE 34 KILOBYTES
dataAdapter.Update(dataTable); // 55 SECONDS TO WRITE 34 KILOBYTES
// 55 SECONDS OF CPU TIME (Pentium 4 3 GHz) TO WRITE 34 KILOBYTES
//
// OK AFTER THIS
dataAdapter.Dispose();
dataTable.Dispose();
connection.Close();
same Excel driver to write some CSV files. I also tried using the Text
driver to write CSV files. The results are correct but they take enormous
amounts of CPU time.
Of course I know how to write a CSV file using plain old Shift-JIS character
strings and quotation marks and commas, and probably I'll do that in order
to speed up this operation, but the question still remains.
Why is this so slow? The sample data table had 213 rows (header plus 212
data rows) and 25 columns, all strings (some of them empty strings). The
total file size on disk is 34KB. The computations and database operations
in memory take a few milliseconds, not even noticeable when running under a
debugger. But the call to
dataAdapter.Update(dataTable);
takes 55 SECONDS OF CPU TIME on a Pentium 4 running at 3 GHz. It occupies
100% of one CPU core for 55 seconds.
Actual time to write the file might be a few hundred milliseconds since 34KB
occupies several NTFS structures. Anyway, this thing isn't disk bound, and
it's not CPU bound in my code, it's CPU bound in the Update method.
What is going on here?
#undef UseOdbc // Use OleDb
string fileName = @"C:\test.csv"; // (not really)
string Headers[] = new string[25] { "1", "2", "3", /* ... */ "25" };
int columnCount = 25; // (not really)
FileInfo fileInfo = new System.IO.FileInfo(fileName);
string dirName = fileInfo.DirectoryName;
string tableName = fileInfo.Name;
#if UseOdbc
OdbcConnection connection = new OdbcConnection(
"Provider=MSDASQL;" +
"DRIVER={Microsoft Text Driver (*.txt; *.csv)};DBQ=" + dirName +
";Extended Properties='Text;Extensions=asc,csv,tab,txt;" +
"HDR=Yes;FMT=Delimited'");
connection.Open();
OdbcDataAdapter dataAdapter = new OdbcDataAdapter(
"SELECT * FROM [" + tableName + "]", connection);
OdbcCommand insertCmd = new OdbcCommand();
OdbcType varcharType = OdbcType.VarChar;
#else // OleDb
OleDbConnection connection = new OleDbConnection(
"Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + dirName +
";Extended Properties='Text;Extensions=asc,csv,tab,txt;" +
"HDR=Yes;FMT=Delimited'");
connection.Open();
OleDbDataAdapter dataAdapter = new OleDbDataAdapter(
"SELECT * FROM [" + tableName + "]", connection);
OleDbCommand insertCmd = new OleDbCommand();
OleDbType varcharType = OleDbType.VarChar;
#endif // OleDb
DataTable dataTable = new DataTable(tableName);
DataColumnCollection dataColumns = dataTable.Columns;
int columnNum;
string dataColumnName;
DataColumn dataColumn;
StringBuilder insertCmdHead = new StringBuilder(
"INSERT INTO [" + tableName + "] (");
StringBuilder insertCmdTail = new StringBuilder("VALUES (");
StringBuilder fileHeader = new StringBuilder();
for (columnNum = 0; columnNum < columnCount - 1; columnNum++)
{
dataColumnName = Headers[columnNum];
dataColumnName = dataColumnName.Replace('\n', ' ');
dataColumn = dataColumns.Add(dataColumnName);
dataColumn.DataType = typeof(string);
dataColumn.DefaultValue = "";
insertCmdHead.Append("[" + dataColumnName + "], ");
insertCmdTail.Append("?, ");
insertCmd.Parameters.Add(dataColumnName, varcharType, 255,
dataColumnName);
fileHeader.Append("\"" + dataColumnName + "\",");
}
dataColumnName = Headers[columnCount - 1];
dataColumn = dataColumns.Add(dataColumnName);
dataColumn.DataType = typeof(string);
dataColumn.DefaultValue = "";
insertCmdHead.Append("[" + dataColumnName + "]) ");
insertCmdTail.Append("?)");
insertCmd.Parameters.Add(dataColumnName, varcharType, 255,
dataColumnName);
insertCmd.CommandText = insertCmdHead.ToString() +
insertCmdTail.ToString();
insertCmd.Connection = connection;
dataAdapter.InsertCommand = insertCmd;
fileHeader.Append("\"" + dataColumnName + "\"");
for (int rowNum = 0; rowNum < 212; rowNum++) // (not really)
{
DataRow dataRow = dataTable.NewRow();
dataRow[2] = "hi i'm 2"; // (not really)
dataRow[18] = "18"; // (not really)
dataRow[19] = "19"; // (not really)
// (around half the cells default to empty strings)
dataTable.Rows.Add(dataRow);
}
StreamWriter fileWriter = new StreamWriter(fileInfo.Create(),
Encoding.Default);
fileWriter.WriteLine(fileHeader.ToString());
fileWriter.Close();
fileWriter.Dispose();
// UP TO THIS POINT TAKES A FEW MILLISECONDS, OK
//
// 55 SECONDS OF CPU TIME (Pentium 4 3 GHz) TO WRITE 34 KILOBYTES
dataAdapter.Update(dataTable); // 55 SECONDS TO WRITE 34 KILOBYTES
// 55 SECONDS OF CPU TIME (Pentium 4 3 GHz) TO WRITE 34 KILOBYTES
//
// OK AFTER THIS
dataAdapter.Dispose();
dataTable.Dispose();
connection.Close();