Compression in .NET 2.0

O

orekinbck

Hi There

Is there any easy way to use the System.Compression tools in .NET 2.0
to compress an entire directory ?

All my source code is kept in a single directory so I have written a
utility that recursively backs up the directory and compresses each
file as it goes. The utility has no GUI.

I have attached the program here, I would be interested to get any
feedback as I am still learning C#.

TIA
Bill




using System;

using System.Configuration;

using System.Diagnostics;

using System.IO;

using System.IO.Compression;

using System.Globalization;

namespace FileCopier

{

sealed public class FileCopierMain

{

public enum CompressionActions { Nothing, Compress, Decompress };

static string CompressSuffix = ".BWZIP";

static string ToFolder = @"C:\Backups\";

//***************************************************

//TO BACKUP AND COMPRESS, COMMENT THIS IN

//AND SET THE FROM FOLDERS

static CompressionActions CompressionAction =
CompressionActions.Compress;

static string FromFolder = @"C:\Dev\";

//***************************************************

//***************************************************

//TO DECOMPRESS A BACKED UP FOLDER, COMMENT THIS IN

//AND SET THE FROM FOLDER EQUAL TO THE BACKED UP FOLDER

//static CompressionActions CompressionAction =
CompressionActions.Decompress;

//static string FromFolder = @"C:\Backups\Dev_30Jul05_3";

//***************************************************

private FileCopierMain() { }

static void Main()

{

if (FromFolder.Substring(FromFolder.Length - 1, 1) == @"\")

FromFolder = FromFolder.Substring(0, FromFolder.Length - 1);

if (ToFolder.Substring(ToFolder.Length - 1, 1) == @"\")

ToFolder = ToFolder.Substring(0, ToFolder.Length - 1);


//Work out the folder which we are going to copy to. Do not overwrite
old backups

int LastBackSlashPos = FromFolder.LastIndexOf(@"\");

string NewFolderName = FromFolder.Substring(LastBackSlashPos + 1,
FromFolder.Length - LastBackSlashPos - 1);

NewFolderName += "_" + System.DateTime.Now.Date.ToString("ddMMMyy",
CultureInfo.CurrentCulture);

if (Directory.Exists(ToFolder + @"\" + NewFolderName))

{

int newFolderAppend = 1;

while (Directory.Exists(ToFolder + @"\" + NewFolderName + "_" +
newFolderAppend.ToString(CultureInfo.CurrentCulture)))

{

newFolderAppend++;

}

NewFolderName += "_" +
newFolderAppend.ToString(CultureInfo.CurrentCulture);

}

//Do the copy

CopyDirectory(FromFolder, ToFolder + @"\" + NewFolderName, true);

}

public static void CopyDirectory(string sourcePath, string
destinationPath, bool recurse)

{

String[] files;

if (destinationPath[destinationPath.Length - 1] !=
Path.DirectorySeparatorChar)

destinationPath += Path.DirectorySeparatorChar;

if (!Directory.Exists(destinationPath))
Directory.CreateDirectory(destinationPath);

files = Directory.GetFileSystemEntries(sourcePath);

foreach (string element in files)

{

if (recurse)

{

// copy sub directories (recursively)

if (Directory.Exists(element))

CopyDirectory(element, destinationPath + Path.GetFileName(element),
recurse);

// copy files in directory

else

{

if (CompressionAction == CompressionActions.Compress)

BWCompressionUtility.CopyAndCompressFile(element, destinationPath +
Path.GetFileName(element) + CompressSuffix);

else if (CompressionAction == CompressionActions.Decompress)

BWCompressionUtility.CopyAndDecompressFile(element, destinationPath +
Path.GetFileName(element).Replace(CompressSuffix,string.Empty));

else

File.Copy(element, destinationPath + Path.GetFileName(element), true);

}

}

else

{

// only copy files in directory

if (!Directory.Exists(element))

{

if (CompressionAction == CompressionActions.Compress)

BWCompressionUtility.CopyAndCompressFile(element, destinationPath +
Path.GetFileName(element) + CompressSuffix);

else if (CompressionAction == CompressionActions.Decompress)

BWCompressionUtility.CopyAndDecompressFile(element, destinationPath +
Path.GetFileName(element).Replace(CompressSuffix, string.Empty));

else

File.Copy(element, destinationPath + Path.GetFileName(element), true);

}

}

}

}

}

public class BWCompressionUtility

{

//References

//http://www.codeguru.com/columns/DotNet/article.php/c9931

//http://groups.google.com.au/group/m...pression+Folder&rnum=1&hl=en#bb15f1069eb8e175

//http://msdn2.microsoft.com/library/as1ff51s(en-us,vs.80).aspx

/// <summary>

/// Private because all members are static

/// </summary>

private BWCompressionUtility() { }

public static void CopyAndCompressFile(string SourceFullPath, string
DestFullPath)

{

FileStream fs = null;

GZipStream compressedZipStream = null;

// Open and read the contents of the file

fs = new FileStream(SourceFullPath, FileMode.Open, FileAccess.Read,
FileShare.Read);

byte[] buffer = new byte[fs.Length];

int count = fs.Read(buffer, 0, buffer.Length);

if (count != buffer.Length)

throw new Exception("Unable to read data from file");

fs.Close();

// Write buffer to file

fs = new FileStream(DestFullPath, FileMode.Create);

compressedZipStream = new GZipStream(fs, CompressionMode.Compress,
true);

compressedZipStream.Write(buffer, 0, buffer.Length);

compressedZipStream.Close();

fs.Close();

return;

}

public static void CopyAndDecompressFile(string SourceFullPath, string
DestFullPath)

{

FileStream fsREAD = null;

FileStream fsWRITE = null;

GZipStream compressedZipStream = null;

//Compressed File to Read and Associated Stream

fsREAD = System.IO.File.OpenRead(SourceFullPath);

compressedZipStream = new GZipStream(fsREAD,
CompressionMode.Decompress);

//Decompressed File

fsWRITE = new FileStream(DestFullPath, FileMode.Create);

//Read through the stream and write it out

int bytesRead = 0;

int bytesToDecompress = 1000;

byte[] holdingBay = new byte[bytesToDecompress];

while ((bytesRead = compressedZipStream.Read(holdingBay, 0,
holdingBay.Length)) > 0)

fsWRITE.Write(holdingBay, 0, bytesRead);

//Close all streams

compressedZipStream.Close();

fsWRITE.Close();

fsREAD.Close();

}

}

}
 
O

Oliver Sturm

Is there any easy way to use the System.Compression tools in .NET 2.0
to compress an entire directory ?

I think this is not currently possible, or rather you'd have to do it
yourself in large parts. The new classes under System.IO.Compression
provide only the ability to compress stream data, but they don't have
any of the functionality that's needed to handle multi-file management
information - such as the list of files in the archive, the hierarchy of
files and folders that may be included and so on. If you want that kind
of functionality, you'll be better off looking at SharpZipLib.
I have attached the program here, I would be interested to get any
feedback as I am still learning C#.

So, to a learner: nicely done! At a quick glance, one thing met my eye:
you are loading all the content of each file that you compress into a
memory buffer before writing it back out into the compressed stream. You
should try experimenting with having both streams open at the same time
and transferring content from one to the other buffer by buffer - that
way your algorithm will be a lot more efficient with large files.



Oliver Sturm
 
O

orekinbck

Thanks Oliver, I implemented your suggestion and it runs 25% quicker on
compressions now.

I also added a couple of other features:
- It accepts command line parameters
- One of the command line parameters is to ignore BIN and OBJ folders

Here is the latest implementation. Btw - I arbitrarily chose an array
size of 1,000 .. I have not had time to experiment with different array
sizes yet.

Cheers
Bill

using System;
using System.IO;
using System.IO.Compression;
using System.Globalization;

namespace FileCopier
{
sealed public class FileCopierMain
{
public enum CompressionAction { Nothing, Compress, Decompress
};

static bool ignoreBinAndObjFolders;
static string compressSuffix = ".BWZIP";
static string fromFolder;
static string toFolder;
static CompressionAction myCompressionAction =
CompressionAction.Nothing;

private FileCopierMain() { }

static void Main(string[] args)
{
//EXAMPLE COMMAND LINE FOR BACKUP AND COMPRESS:
//FileCopier.exe C:\Dev C:\Backups 1 1
//EXAMPLE COMMAND LINE FOR DECOMPRESS AND RESTORE
//FileCopier.exe C:\Backups\Dev_31Jul05 C:\Backups 2 1

if (args.Length != 4)
throw new ArgumentException("Wrong Number of
Arguments");

fromFolder = args[0].Trim(); //The From Folder
toFolder = args[1].Trim(); //The To Folder
if (args[2].Trim() == "1") //SWITCH: 1 = Compress,
2 = Decompress, Else Leave As Is
myCompressionAction = CompressionAction.Compress;
else if (args[2].Trim() == "2")
myCompressionAction = CompressionAction.Decompress;
if (args[3].Trim() == "1") //SWITCH: 1 = Do not
copy folders called BIN or OBJ, Else Copy Everthing
ignoreBinAndObjFolders = true;
else
ignoreBinAndObjFolders = false;

//Eliminate Last Back slash as it screws up logic
if (fromFolder[fromFolder.Length - 1] ==
Path.DirectorySeparatorChar)
fromFolder = fromFolder.Substring(0, fromFolder.Length
- 1);
if (toFolder[toFolder.Length - 1] ==
Path.DirectorySeparatorChar)
toFolder = toFolder.Substring(0, toFolder.Length - 1);

//Work out the folder which we are going to copy to. Do
not overwrite folders
int LastBackSlashPos =
fromFolder.LastIndexOf(Path.DirectorySeparatorChar);
string NewFolderName =
fromFolder.Substring(LastBackSlashPos + 1, fromFolder.Length -
LastBackSlashPos - 1);
NewFolderName += "_" +
System.DateTime.Now.Date.ToString("ddMMMyy",
CultureInfo.CurrentCulture);
if (Directory.Exists(toFolder + Path.DirectorySeparatorChar
+ NewFolderName))
{
int newFolderAppend = 1;
while (Directory.Exists(toFolder +
Path.DirectorySeparatorChar + NewFolderName + "_" +
newFolderAppend.ToString(CultureInfo.CurrentCulture)))
{
newFolderAppend++;
}
NewFolderName += "_" +
newFolderAppend.ToString(CultureInfo.CurrentCulture);
}

//Do the copy
CopyDirectory(fromFolder, toFolder +
Path.DirectorySeparatorChar + NewFolderName, true);
}

public static void CopyDirectory(string sourcePath, string
destinationPath, bool recurse)
{
String[] files;

//Ensure that last character is a path separator
if (destinationPath[destinationPath.Length - 1] !=
Path.DirectorySeparatorChar)
destinationPath += Path.DirectorySeparatorChar;

//If need to ignore Bin and Obj then do so
if (ignoreBinAndObjFolders)
{
int secondLastSlashPos = destinationPath.Substring(0,
destinationPath.Length - 1).LastIndexOf(Path.DirectorySeparatorChar);
string lastFolderName =
destinationPath.Substring(secondLastSlashPos + 1,
destinationPath.Length - secondLastSlashPos - 2);
if (lastFolderName.ToUpper(CultureInfo.CurrentCulture)
== "BIN" || lastFolderName.ToUpper(CultureInfo.CurrentCulture) ==
"OBJ")
return;
}

//OK, lets do it
if (!Directory.Exists(destinationPath))
Directory.CreateDirectory(destinationPath);
files = Directory.GetFileSystemEntries(sourcePath);
foreach (string element in files)
{
if (recurse)
{
// copy sub directories (recursively)
if (Directory.Exists(element))
CopyDirectory(element, destinationPath +
Path.GetFileName(element), recurse);
// copy files in directory
else
{
if (myCompressionAction ==
CompressionAction.Compress)

BWCompressionUtility.CopyAndCompressFile(element, destinationPath +
Path.GetFileName(element) + compressSuffix);
else if (myCompressionAction ==
CompressionAction.Decompress)

BWCompressionUtility.CopyAndDecompressFile(element, destinationPath +
Path.GetFileName(element).Replace(compressSuffix,string.Empty));
else
File.Copy(element, destinationPath +
Path.GetFileName(element), true);
}
}
else
{
// only copy files in directory
if (!Directory.Exists(element))
{
if (myCompressionAction ==
CompressionAction.Compress)

BWCompressionUtility.CopyAndCompressFile(element, destinationPath +
Path.GetFileName(element) + compressSuffix);
else if (myCompressionAction ==
CompressionAction.Decompress)

BWCompressionUtility.CopyAndDecompressFile(element, destinationPath +
Path.GetFileName(element).Replace(compressSuffix, string.Empty));
else
File.Copy(element, destinationPath +
Path.GetFileName(element), true);
}
}
}
}
}
sealed public class BWCompressionUtility
{

//References
//http://www.codeguru.com/columns/DotNet/article.php/c9931

//http://groups.google.com.au/group/m...pression+Folder&rnum=1&hl=en#bb15f1069eb8e175
//http://msdn2.microsoft.com/library/as1ff51s(en-us,vs.80).aspx

private BWCompressionUtility() { }

public static void CopyAndCompressFile(string sourceFullPath,
string destinationFullPath)
{
FileStream fsREAD = null;
FileStream fsWRITE = null;
GZipStream compressedZipStream = null;

// Open the file streams
fsREAD = new FileStream(sourceFullPath, FileMode.Open,
FileAccess.Read, FileShare.Read);
fsWRITE = new FileStream(destinationFullPath,
FileMode.Create);

//Create Zip Stream
compressedZipStream = new GZipStream(fsWRITE,
CompressionMode.Compress, true);

//Create buffer
int bytesRead = 0;
long TotalBytesRead = 0;
byte[] buffer = new byte[1000];

//Now transfer the data
while ((bytesRead = fsREAD.Read(buffer,0,buffer.Length)) >
0)
{
compressedZipStream.Write(buffer, 0, bytesRead);
TotalBytesRead += bytesRead;
}

if (TotalBytesRead != fsREAD.Length)
throw new IOException("Unable to read data from file");

//Close all streams. Close zip stream first so it flushes
compressedZipStream.Close();
fsWRITE.Close();
fsREAD.Close();
}
public static void CopyAndDecompressFile(string sourceFullPath,
string destinationFullPath)
{
FileStream fsREAD = null;
FileStream fsWRITE = null;
GZipStream compressedZipStream = null;

// Open the file streams
fsREAD = new FileStream(sourceFullPath, FileMode.Open,
FileAccess.Read, FileShare.Read);
//System.IO.File.OpenRead(sourceFullPath);
fsWRITE = new FileStream(destinationFullPath,
FileMode.Create);

//Create Zip Stream
compressedZipStream = new GZipStream(fsREAD,
CompressionMode.Decompress);

//Create buffer
int bytesRead = 0;
byte[] buffer = new byte[1000];

//Now transfer the data
while ((bytesRead = compressedZipStream.Read(buffer, 0,
buffer.Length)) > 0)
fsWRITE.Write(buffer, 0, bytesRead);

//Close all streams. Close zip stream first so it flushes
compressedZipStream.Close();
fsWRITE.Close();
fsREAD.Close();
}
}
}
 
O

Oliver Sturm

Here is the latest implementation. Btw - I arbitrarily chose an array
size of 1,000 .. I have not had time to experiment with different array
sizes yet.

I guess you should see a performance gain for large file sizes if you
have a buffer size that's in some relation to your file system's cluster
size. I think (but I'm not quite sure where I got that from) that a
"default" cluster size on NTFS these days is 4KB, so I'd expect best
results for your algorithm when running with a 4096 bytes buffer, or a
multiple thereof.



Oliver Sturm
 
Top