Very Slow reading excel data into an array (while opened in new xl

M

MikeZz

I have a VBA application that reads in many excel files (sometimes in the
100's), one at a time, scans them for key info, and summarizs the data in a
new workbook. The routine actually doing the reading is the first one:
ReadNewContract below.

After about the 150th file (each usually under 40k), the macro started
coming to a crawl and I noticed in Task Manager that Excel was using more and
more memory (pushing 100MB). Excel was not releasing the files out of VBA
Project.

In a post from Tom Ogilvy, I saw that I could open the file in a new xlApp,
then close it when I was done reading the data.

This got rid of the memory issue but it dramatically increased the time it
took to import the excel data worksheet into an array I could use.

In the original code, I read the excel file into an array using this:
arrMaster(r, c - LeftIndent) = ActiveSheet.Cells(r, c)
I didn't need the xlApp reference because of the way I opened the workbook.

In my revised code, I read the excel file into an array using this:
arrMaster(r, c - LeftIndent) = xlApp.ActiveSheet.Cells(r, c)

For some reason, it is just taking an incredible amount of time just to put
an excel worksheet into an array. In my original code, it happened in a
blink of an eye. With the "improved" code, it takes several seconds.

Can anyone tell what I'm doing wrong?
Is there a better way to read in the excel data?

Thanks!
MikeZz

To make it a cut and paste for testing, I have all basic code attached.
I put all the delcarations at the end so it's easier to find the code in
question.
Search for: '############### QUESTION HERE
to find the area in question.

Thanks!
MikeZz

Sub ReadNewContract(fileNo, arrMaster)

Dim MasterFile
Dim f, c, r
Dim lngCount As Long

Dim Master As Workbook
Dim masterSht As Worksheet
Dim rowsMaster, colsMaster, lastCellMaster
Dim rowMax, rightCol
Dim FoundIndent

Dim matCount, matTotal
Dim ctCellMaster
Dim testRowCountMat
Dim alertStat
Dim tempXLFile

Dim xlApp As New Excel.Application 'ADDED FOR MEMORY

Dim FileString As String 'ADDED FOR MEMORY
xlApp.Application.Visible = True 'ADDED FOR MEMORY

'############################################################################################
'########### READ IN MASTER FIL
'############################################################################################

If fileNo = 1 Or IsEmpty(fileLocExcel) Then
fileLocExcel = Get_File_Info(arrFiles(fileNo, colFileName), "Directory")
End If

FileString = arrFiles(fileNo, colFileName) 'ADDED FOR MEMORY

xlApp.Workbooks.Open (FileString) 'Focus is now on the workbook 'ADDED FOR
MEMORY
'Workbooks.Open (arrFiles(fileNo, colFileName))

Set Master = xlApp.ActiveWorkbook
Set masterSht = xlApp.ActiveSheet
MasterFile = Master.Name

lastCellMaster = LastCellIn(masterSht)
rowsMaster = LastRowIn(masterSht)
arrFiles(fileNo, colFileRows) = rowsMaster
colsMaster = LastColIn(masterSht)

If rowsMaster = Empty Or colsMaster = Empty Then
Exit Sub
End If

ctCellMaster = 0
ReDim arrMaster(0)
ReDim arrMaster(1 To rowsMaster, 0 To colsMaster)
For r = 1 To rowsMaster
LeftIndent = 0
FoundIndent = False
rightCol = 0
For c = 1 To colsMaster

'#####################################################################
'############### QUESTION HERE ###########################
'
' "xlApp.ActiveSheet.Cells(r, c)" seems to run magnitudes slower than using
' ActiveSheet.Cells(r, c) on a regular active sheet
' in original application instance.
' Is there another way?
'#####################################################################
'#####################################################################



If alignLeft = True And FoundIndent = False And
Len(xlApp.ActiveSheet.Cells(r, c)) = 0 Then
LeftIndent = LeftIndent + 1: GoTo nextMc
End If
FoundIndent = True
arrMaster(r, c - LeftIndent) = xlApp.ActiveSheet.Cells(r, c)
If Len(arrMaster(r, c - LeftIndent)) <> 0 Then rightCol = c - LeftIndent
nextMc:
Next c
arrMaster(r, 0) = rightCol
Next r

Master.Close SaveChanges:=False

Set masterSht = Nothing
Set Master = Nothing
xlApp.Quit
Set xlApp = Nothing 'ADDED FOR MEMORY

End Sub

Private Sub Get_File_List()
Dim lngCount
Dim maxcols

Call Initialize_Values
maxcols = colFileMaxx

With Application.FileDialog(msoFileDialogOpen)
.AllowMultiSelect = True
.Show

fileCount = .SelectedItems.Count
ReDim arrFiles(0)
ReDim arrFiles(0 To .SelectedItems.Count, 1 To maxcols)
' Display paths of each file selected

If fileCount = 0 Then End

For lngCount = 1 To fileCount
arrFiles(lngCount, colFileName) = .SelectedItems(lngCount)
Next lngCount

End With

Dim f
For f = 1 To fileCount
Call ReadNewContract(f, arrImport)
Next f

End Sub


Private Sub Initialize_Values()
Dim col0, r

ScanTime = Format(Now, "Medium Time")

dateSummaryFormat = Control.Range("dateSummaryFormat")
Select Case dateSummaryFormat
Case "yyyy-mm-dd"
ScanDate = Format(Date, dateSummaryFormat)
Case "dd-mmm-yy"
ScanDate = Format(Date, dateSummaryFormat)
Case "yyyy-ww-ddd"
ScanDate = Format(Date, dateSummaryFormat)
Case Else
ScanDate = Format(Date, "Medium Time")

End Select

'AutoSaveFile = "x" & ScanDate & "Contracts Scanned " & " " &
Replace(ScanTime, ":", "_") & ".xls"
AutoSaveFile = ScanDate & "Contracts Scanned " & " " & Replace(ScanTime,
":", "_") & ".xls"

fileCount = 0
completeCount = 0
col0 = 1
colFileName = col0: col0 = col0 + 1 'Filename
colFileStat = col0: col0 = col0 + 1 'Scan Status
colFileCust = col0: col0 = col0 + 1
colFileType = col0: col0 = col0 + 1 'Contract Type - Part, Tooling, etc
colFileProd = col0: col0 = col0 + 1 'Contract Part Product Area AB/SB/SW/EL
colFilePro2 = col0: col0 = col0 + 1
colFileProj = col0: col0 = col0 + 1 'Contract ALV Project Number
colFileDesc = col0: col0 = col0 + 1 'Contract Part Description
colFilePNum = col0: col0 = col0 + 1 'Contract Part Number
colFileLNum = col0: col0 = col0 + 1 'Contract Less Finish Part Number

colFileCNum = col0: col0 = col0 + 1 'Contract Number
colFileRevs = col0: col0 = col0 + 1 'Contract Revision
colFileDate = col0: col0 = col0 + 1 'Contract Date
colFileReas = col0: col0 = col0 + 1 'Contract Amendment Reason

colFileESOP = col0: col0 = col0 + 1 'Contract Effective Start Date
colFileEEOP = col0: col0 = col0 + 1 'Contract Effective End Date
colFilePeri = col0: col0 = col0 + 1 'Price Periods
colFilePric = col0: col0 = col0 + 1 'Starting Price
colFileLTAs = col0: col0 = col0 + 1 'LTA %= col0: col0 = col0 + 1 's
colFilePri2 = col0: col0 = col0 + 1 'Ending Price (part contracts)
colFileCurr = col0: col0 = col0 + 1 'Currency Type - First
colFileCurX = col0: col0 = col0 + 1 'Currency Type - Change to

colFilePack = col0: col0 = col0 + 1 'Packaging Type - First
colFilePacX = col0: col0 = col0 + 1 'Packaging Type - Change to

colFileCapa = col0: col0 = col0 + 1 'Starting Price
colFileCap2 = col0: col0 = col0 + 1 'Ending Price (part contracts)
colFileHPDa = col0: col0 = col0 + 1 'Hours Per Day - First
colFileHPDX = col0: col0 = col0 + 1 'Hours Per Day - Change to

colFileDunM = col0: col0 = col0 + 1 'Contract Mfg Dunns Code
colFileDDun = col0: col0 = col0 + 1 'Delivery To Duns - First
colFileDDuX = col0: col0 = col0 + 1 'Delivery To Duns - Change to
colFileSDun = col0: col0 = col0 + 1 'Ship From Duns - First
colFileSDuX = col0: col0 = col0 + 1 'Ship From Duns - Change to

colFileTFre = col0: col0 = col0 + 1 'Terms Start - Freight
colFileTFrX = col0: col0 = col0 + 1 'Terms Ending - Freight
colFileTPay = col0: col0 = col0 + 1 'Terms Start - Payment
colFileTPaX = col0: col0 = col0 + 1 'Terms Ending - Payment
colFileTDel = col0: col0 = col0 + 1 'Terms Start - Delivery
colFileTDeX = col0: col0 = col0 + 1 'Terms Ending - Delivery

colFileBuyr = col0: col0 = col0 + 1 'Contract Buyer Name
colFileHead = col0: col0 = col0 + 1 'Contract Header
colFileDown = col0: col0 = col0 + 1 'Contract Download Date

colFileSNam = col0: col0 = col0 + 1 'Contract Sheet Name
colFileRows = col0: col0 = col0 + 1 'Contract Effective End Date
colFileNam2 = col0: col0 = col0 + 1 'New Smart File Name
colFileKeys = col0: col0 = col0 + 1 'Contract Key - Part English, Part
Mexico, Tooling English etc..

colFileMaxx = col0 + 5

col0 = 0
colKeyDeffName = col0 + 1 'Key Definition: Name
colKeyDeffCust = colKeyDeffName + 1 'Key Definition: OEM
colKeyDeffType = colKeyDeffCust + 1 'Key Definition: Contract Type
colKeyDeffLang = colKeyDeffType + 1 'Key Definition: Language
colKeyDeffIden = colKeyDeffLang + 1 'Key Definition: Unique Identifier
String used to deterime which key to use
colKeyDeffSNum = colKeyDeffIden + 1 'Key Definition: Key Sheet Number
colKeyDeffIMxR = colKeyDeffSNum + 1 'Key Definition:
colKeyDeffIMxC = colKeyDeffIMxR + 1 'Key Definition:
colKeyDeffITyp = colKeyDeffIMxC + 1 'Key Definition:
colKeyDeffSNam = colKeyDeffITyp + 1 'Key Definition: Key Sheet Name
colKeyDeffMaxx = colKeyDeffSNam + 5

col0 = 0
colKeyAnchName = col0 + 1 'Key Anchor: Name
colKeyAnchStri = colKeyAnchName + 1 'Key Anchor: Search String (which
defines location)
colKeyAnchType = colKeyAnchStri + 1 'Key Anchor: Match Type (Full, Partial
Match)
colKeyAnchStar = colKeyAnchType + 1 'Key Anchor: Start Location in File
(Top, Previous Key, Bottom)
colKeyAnchDire = colKeyAnchStar + 1 'Key Anchor: Search Direction from
Start (Down, Up)
colKeyAnchLoca = colKeyAnchDire + 1 'Key Anchor: Row Location (Left,
Right, Any)
colKeyAnchFunc = colKeyAnchLoca + 1 'Key Anchor: Special Function to
perform (such as count repeats)
colKeyAnchRowX = colKeyAnchFunc + 1 'Key Anchor: Row in current file where
this Anchor is found
colKeyAnchColY = colKeyAnchRowX + 1 'Key Anchor: Col in current file where
this Anchor is found
colKeyAnchMaxx = colKeyAnchColY + 5
colKeyAnchStat = colKeyAnchMaxx
colKeyAnchFSta = colKeyAnchStat - 1 'Key Anchor: Status of Anchor Function

col0 = 0
colKeyCodeCode = col0 + 1 'Key Code: Code
colKeyCodeDesc = colKeyCodeCode + 1 'Key Code: Description of search:
colKeyCodeStri = colKeyCodeDesc + 1 'Key Code: Search String (which defines
location)
colKeyCodeStar = colKeyCodeStri + 1 'Key Code: Start Location in File (Top,
Previous Key, Bottom)
colKeyCodeType = colKeyCodeStar + 1 'Key Code: Match Type (Full, Partial
Match)
colKeyCodeDirS = colKeyCodeType + 1 'Key Code: Direction from Start to find
Key Text String
colKeyCodeDirA = colKeyCodeDirS + 1 'Key Code: Direction from Key Test
String to find Answer
colKeyCodeLook = colKeyCodeDirA + 1 'Key Code: Look Location to from Key
String to find answer (next value, last value in row)
colKeyCodeComm = colKeyCodeLook + 1 'Key Code: Command to perform
colKeyCodeFunc = colKeyCodeComm + 1 'Key Code:
colKeyCodePRng = colKeyCodeFunc + 1 'Key Code: Paste Range in contract
summary worksheet
colKeyCodeORig = colKeyCodePRng + 1 'Key Code: Column offset from paste
range in summary sheet to put value.
colKeyCodeODwn = colKeyCodeORig + 1 'Key Code: Row offset from paste range
in summary sheet to put value.
colKeyCodeMaxx = colKeyCodeODwn + 5 'Key Code:
colKeyCodeStat = colKeyCodeMaxx
colKeyCodeAnsw = colKeyCodeStat - 1 'Key Code: Result / Answer Field

alignLeft = Control.Range("alignLeft").Value
CreateNewWB = Control.Range("CreateNewWB").Value
fileSmart = Control.Range("fileSmart")
fileBackup = Control.Range("fileBackup")
fileSort = Control.Range("fileSort")
fileDelete = Control.Range("fileDelete")
fileLocUnscanned = Control.Range("fileLocUnscanned")
fileLocBackup = Control.Range("fileLocBackup")
fileLocPDF = Control.Range("fileLocPDF")
fileLocAuto = Control.Range("fileLocAuto") 'AutoSaveFile
fileLocScanned = Control.Range("fileLocScanned")
fileLocExcel = Empty 'This is determined when opening the first excel
contract.


fileDelPDF = Control.Range("fileDelPDF")
typeConfirm = Control.Range("typeConfirm")
fileKeywordScan = Control.Range("fileKeywordScan")
fileWorkDays = Control.Range("fileWorkDays")
ctCustomers = MasterData.Range("ctCustomers")
ctAmendment_Reason = MasterData.Range("ctCustomers")



KeySearches = MasterData.Range("KeySearches")
KeySearchRows = MasterData.Range("KeySearchRows")
KeySearchCols = MasterData.Range("KeySearchCols")

workbookCreated = False
errCount = 0
Set shtCopy = shtHorz

Set shtSummary = Nothing
Set shtPaste = Nothing
Set wbFinal = Nothing
Set rngCat = MasterData.Range("Category")

ReDim arrFileNameSetup(0)
ReDim arrKeyErr(0) 'Array of Current Key Definition (Title Block)
ReDim arrKeyDeff(0) 'Array of Current Key Definition (Title Block)
ReDim arrKeyCode(0) ' As Variant 'Array of Current Key Code (Programming)
ReDim arrKeyAnch(0) ' As Variant 'Array of Current Key Anchor Points of
Refernce (Title Block)
ReDim arrFiles(0) ' As Variant
ReDim arrImport(0) ' As Variant
ReDim arrPeriods(0)
ReDim arrPerCode(0)
ReDim arrHeadStr(0)
ReDim arrDunsCode(0)
ReDim arrProdCats(0)
ReDim arrNewDuns(0)
ReDim arrNewCats(0)
ReDim arrKeyWords(0)
ReDim arrNotes(0)
ReDim arrCustomers(0)
ReDim arrReasons(0)
CountNewDuns = 0
CountNewCats = 0
End Sub

Private Function Get_File_Info(str, Attrib)
Dim BackSlash

BackSlash = InStrRev(str, "\")

Select Case Attrib
Case "FileName"
Get_File_Info = Mid(str, BackSlash + 1)
Case "Directory"
Get_File_Info = Left(str, BackSlash)
End Select

End Function

Sub testy()
Dim xlApp As Excel.ApplicationExcel.ApplicationExcel.Application 'ADDED
FOR MEMORY
Dim test
'Dim FileString As String 'ADDED FOR MEMORY
'xlApp.Application.Visible = True 'ADDED FOR MEMORY


test = xlApp.ActiveWorkbook.Name


End Sub



Option Explicit
'DECLARATIONS HERE:
Dim fileCount
Dim completeCount
Dim arrFiles() As Variant
Dim fileName
Dim colFileName 'Filename
Dim colFileNam2 'New Filename
Dim colFileCust 'Customer like Saturn or GM
Dim colFileStat 'Contract Status - was it read or was the file structure not
found? Use for copying.
Dim colFileCNum 'Contract Number
Dim colFileRevs 'Contract Revision
Dim colFileDate 'Contract Date
Dim colFileHead 'Contract Header
Dim colFileDunM 'Contract Mfg Dunns Code
Dim colFilePNum 'Contract Part Number
Dim colFileLNum 'Contract Less Finish Part Number
Dim colFileType 'Contract Type - Part, Tooling, etc
Dim colFileKeys 'Contract Key - Part English, Part Mexico, Tooling English
etc..
Dim colFileProd 'Contract Product - AB/SB/SW/EL
Dim colFilePro2 'Contract Detail - RRAB/SB/SW/EL
Dim colFileDown 'Contract Download Date
Dim colFileDesc 'Contract Part Description
Dim colFileReas 'Contract Amendment Reason
Dim colFileBuyr 'Contract Buyer Name
Dim colFileESOP 'Contract Effective Start Date
Dim colFileEEOP 'Contract Effective End Date
Dim colFileSNam 'Contract Effective End Date
Dim colFileProj 'Contract Project Number
Dim colFileRows 'Contract Last Row # in Excel File
Dim colFilePeri 'Price Periods
Dim colFilePric 'Starting Price
Dim colFileLTAs 'LTA %'s
Dim colFilePri2 'Ending Price (part contracts)
Dim colFileCapa 'Starting Capacity
Dim colFileCap2 'Ending Capacity

Dim colFileTFre 'Terms Start - Freight
Dim colFileTFrX 'Terms Ending - Freight
Dim colFileTPay 'Terms Start - Payment
Dim colFileTPaX 'Terms Ending - Payment
Dim colFileTDel 'Terms Start - Delivery
Dim colFileTDeX 'Terms Ending - Delivery

Dim colFileDDun 'Delivery To Duns - First
Dim colFileDDuX 'Delivery To Duns - Change to
Dim colFileSDun 'Ship From Duns - First
Dim colFileSDuX 'Ship From Duns - Change to

Dim colFileHPDa 'Hours Per Day - First
Dim colFileHPDX 'Hours Per Day - Change to
Dim colFilePack 'Packaging Type - First
Dim colFilePacX 'Packaging Type - Change to
Dim colFileCurr 'Currency Type - First
Dim colFileCurX 'Currency Type - Change to


Dim colFileMaxx

Dim arrKeyDeff() As Variant 'Array of Current Key Definition (Title Block)
Dim colKeyDeffName 'Key Definition: Name
Dim colKeyDeffCust 'Key Definition: OEM
Dim colKeyDeffType 'Key Definition: Contract Type
Dim colKeyDeffLang 'Key Definition: Language
Dim colKeyDeffIden 'Key Definition: Unique Identifier String used to
deterime which key to use
Dim colKeyDeffIMxR 'Key Definition: Find String before this row
Dim colKeyDeffIMxC 'Key Definition: Find String before this col
Dim colKeyDeffIMax 'Key Definition: Max Row to search for Key Identifier
Dim colKeyDeffITyp 'Key Definition: Key String Match Type (Exact, Partial,
etc)
Dim colKeyDeffSNum 'Key Definition: Key Sheet Number
Dim colKeyDeffSNam 'Key Definition: Key Sheet Name
Dim colKeyDeffMaxx

Dim arrKeyAnch() As Variant 'Array of Current Key Anchor Points of Refernce
(Title Block)
Dim colKeyAnchName 'Key Anchor: Name
Dim colKeyAnchStri 'Key Anchor: Search String (which defines location)
Dim colKeyAnchType 'Key Anchor: Match Type (Full, Partial Match)
Dim colKeyAnchStar 'Key Anchor: Start Location in File (Top, Previous Key,
Bottom)
Dim colKeyAnchDire 'Key Anchor: Search Direction from Start (Down, Up)
Dim colKeyAnchLoca 'Key Anchor: Row Location (Left, Right, Any)
Dim colKeyAnchFunc 'Key Anchor: Special Function to perform (such as count
repeats)
Dim colKeyAnchRowX 'Key Anchor: Row in current file where this Anchor is
found
Dim colKeyAnchColY 'Key Anchor: Col in current file where this Anchor is
found
Dim colKeyAnchMaxx 'Key Anchor: Col in current file where this Anchor is
found
Dim colKeyAnchFSta 'Key Anchor: Status of Anchor Point
Dim colKeyAnchStat 'Key Anchor: Status of Anchor Point
Const rowKeyAnchMaxx = 20 'Key Anchor: Max possible Key Anchor Points for
all Keys

Dim arrKeyCode() As Variant 'Array of Current Key Code (Programming)
Dim colKeyCodeCode 'Key Code: Code
Dim colKeyCodeDesc 'Key Code: Description of search:
Dim colKeyCodeStri 'Key Code: Search String (which defines location)
Dim colKeyCodeStar 'Key Code: Start Location in File (Top, Previous Key,
Bottom)
Dim colKeyCodeType 'Key Code: Match Type (Full, Partial Match)
Dim colKeyCodeDirS 'Key Code: Direction from Start to find Key Text String
Dim colKeyCodeDirA 'Key Code: Direction from Key Test String to find Answer
Dim colKeyCodeLook 'Key Code: Look Location to from Key String to find
answer (next value, last value in row)
Dim colKeyCodeComm 'Key Code: Command to perform - Such as Loop
Dim colKeyCodeFunc 'Key Code: Function to perform on Value such as Add ALV
loc to Duns Code
Dim colKeyCodePRng 'Key Code: Paste Range in contract summary worksheet
Dim colKeyCodeORig 'Key Code: Column offset from paste range in summary
sheet to put value.
Dim colKeyCodeODwn 'Key Code: Row offset from paste range in summary sheet
to put value.
Dim colKeyCodeAnsw 'Key Code:
Dim colKeyCodeMaxx 'Key Code:
Dim colKeyCodeStat 'Key Code:

Dim arrErrors() As Variant
Const colKeyErrFile = 1 'KeyErr Anchor: Name
Const colKeyErrCode = 2 'KeyErr Anchor: Anchor or Code
Const colKeyErrDesc = 3 'KeyErr Anchor: Name
Const colKeyErrStri = 4 'KeyErr Anchor: Search String (which defines
location)
Const colKeyErrStar = 5 'KeyErr Anchor: Start Location in File (Top,
Previous KeyErr, Bottom)
Const colKeyErrType = 6 'KeyErr Anchor: Search Direction from Start (Down,
Up)
Const colKeyErrDir1 = 7 'KeyErr Anchor: Row Location (Left, Right, Any)
Const colKeyErrDir2 = 8 'KeyErr Anchor: Special Function to perform (such
as count repeats)
Const colKeyErrRowX = 9 'KeyErr Anchor: Special Function to perform (such
as count repeats)
Const colKeyErrColY = 10 'KeyErr Anchor: Special Function to perform (such
as count repeats)
Const colKeyErrRang = 11 'KeyErr Anchor: Special Function to perform (such
as count repeats)
Const colKeyErrStat = 12 'KeyErr Anchor: Special Function to perform (such
as count repeats)
Const colKeyErrMaxC = 20 'KeyErr Anchor: Special Function to perform (such
as count repeats)
Const colKeyErrMaxR = 20000 'KeyErr Anchor: Special Function to perform
(such as count repeats)

Dim errCount

Const rowKeyCodeMaxx = 100 'Key Code: Max possible Key Code Points for all
Keys

Dim LeftIndent
Dim alignLeft
Dim CreateNewWB
Dim shtCopy As Worksheet
Dim shtSummary As Worksheet
Dim shtPaste As Worksheet
Dim Check_For_Periods
Dim period_Count, period_Items
Dim wbFinal As Workbook


Dim KeyCount 'Number of Key Formats
Dim arrImport() As Variant
Dim arrPeriods() As Variant
Dim arrPerCode() As Variant
Dim arrHeadStr() As Variant
Dim arrDunsCode() As Variant
Dim arrProdCats() As Variant
Dim arrNewDuns() As Variant
Dim arrNewCats() As Variant
Dim arrKeyWords() As Variant
Dim arrCustomers() As Variant
Dim ctCustomers
Dim KeySearches, KeySearchRows, KeySearchCols

Dim arrReasons() As Variant
Dim ctAmendment_Reason

Dim arrNotes() As Variant
Dim CountNewDuns
Dim CountNewCats
Dim workbookCreated

Dim arrFileNameSetup() As Variant

Dim fileSmart
Dim fileBackup
Dim fileSort
Dim fileDelete
Dim fileDelPDF
Dim fileLocUnscanned
Dim fileLocBackup
Dim fileLocPDF
Dim fileLocScanned
Dim fileLocAuto
Dim fileLocExcel
Dim fileWorkDays
Dim typeConfirm
Dim fileKeywordScan
Dim rngCat As Range
Dim ScanTime
Dim ScanDate
Dim AutoSaveFile

Dim dateSummaryFormat
 
J

Joel

Your posting got truncated because it was so long. It did not see a close
statement in the code that was posted. Are you closing each file after you
read the data?
 
J

JP

With all due respect, your code is a mess. A few things I noticed:

1) You are declaring variables without typecasting them. For example:

Dim rowsMaster, colsMaster, lastCellMaster
Dim rowMax, rightCol
Dim FoundIndent
Dim matCount, matTotal
Dim ctCellMaster
Dim testRowCountMat
Dim alertStat
Dim tempXLFile

Excel is treating all of these as Variant type which is exponentially
increasing your runtime. Declaring them as variables of a specific
type will go a long way towards micro-optimization.

2) Never use "Dim xlApp As New Excel.Application" because you are
creating a new instance of Excel at the same time you are declaring
the variable. This is never the behavior you want. Are you using this
code in Excel? If so, you can just declare Workbook and Worksheet
variables and skip the reference to the Application object.

3) You can read a worksheet range into an array by simply putting the
Value property of the Range object into the array. For example:

Dim arrData() as Variant
arrData = Range("A1:D5480").Value

And the range will auto-dimension itself to accomodate.


I think perhaps also you should explain what your goal is, because
maybe there is a better way to do what you want, which would lead to
optimization of several orders of magnitude.

--JP
 
M

MikeZz

Joel,
Hope this is a little better to read. Thanks again1

At the very end of the first routine, you will see this code:

Master.Close SaveChanges:=False

Set masterSht = Nothing
Set Master = Nothing
xlApp.Quit
Set xlApp = Nothing 'ADDED FOR MEMORY
 
M

MikeZz

Hi JP,
Thanks for the tip on #1. It's duely noted. I never bothered in most cases
because it never impacted performance for me before in other projects where
I've created arrays up to 80 columns wide with 250,000 rows.

#2: I used "Dim xlApp as New...." because I was having problems with my
original code not releasing memory.
If you search for another of my threads "What is proper way to Open Close
Release xls files in VBA?", you will see my original code. The problem I was
originally having by not creating a new app instance was that the Task
Manager kept showing Excel memory going up. At around my 150th file, it all
started going very slow. The routine took about 45 minutes to complete with
TM showing over 100MB of Excel memory but it completed without memory crashes.

If you know of a better way to open and close several hundred excel files...
reading the data from sheet1 each time into an array, I'm all ears! BTW, the
data array gets recycled each time it opens a new file so I don't have to
maintain all the data in memory... I process the file's data then can release
the original data.

#3: Thanks for showing how to read a range into an array.... I've been
doing it the hard way for tooo long.

My Goal:
Read an excel file (with only one sheet) into an array.
Close the excel file.
Use my existing routines to extract pieces of data I need and put into a
"Summary" workbook.
Repeat this process up to several hundred times.
Not watch my hair turn grey while waiting for it to complete the job.

The files themselves are relatively small... 10 columns of data and maybe
only 500 rows.
Out of that, I only need a small amount of information.

I can process 100 files in under a couple minutes.
It can take 30-45 minutes to do 300 files.

Thanks for your help again,
MikeZz
 
J

Joel

It sounds like you are opening and closing the files properly. I can't fix
memory leaks in excel. I know they exist. I don't know if the files you are
opening and closing are casing the meory leak or the spreadsheet you are
running the macro is growing very large. One thing yu may try is to perform
a save after you open every 10 files on the Master Worksheet. This may
release some memory. the second thing you can try is reading the workbooks
without opening the workbooks. There is a method in doing this. You see the
file struct of microsoft applications (word, access, excel) use the same
structure. When you have large Access Databases that are shared by many
people you want to allow the database to be read without opening. You can
use Access commands to read excel files. Excel and Access use the same TABLE
structure to store and read data. Excel command are more flexible than
access. Excel allows random access of the tables while access yu have to
move to the cell location before you can read the data. The move
instructions say move one row or move one column until you reach the lcoation
you are looking for.
 
J

JP

The proper way, which I'm sure you know, is to declare an object of
the type you want, then destroy it at the end of the macro:

Sub MyMacro()
Dim xl As Application ' Excel.Application
Set xl = Application
' your code here
Set xl = Nothing
End Sub

I think at least part of the problem is your goal statement. Your
actual goal appears to be to summarize data contained in several
hundred workbooks. But your goal is polluted with one particular path
towards it. Rethinking the path to your goal might help write better
code.

1) Is there a better way to accomplish this goal --
For example, can you use the Pivot Table's Multiple Consolidation
Ranges feature to grab the information from each workbook? The ranges
all have to have the same headers. If they aren't like that, can they
be made that way?

2) Is there anything you know about the files/folders that can be used
to optimize the code --
Are the files are kept in the same folder? If not, can they be moved
into the same folder for this purpose?
Are the files static, or are they updated periodically? If so, are all
of them updated, or just some?
Do the ranges you want to copy in each file have the same name and
size? Or is each range in each worksheet a different size?

3) Why does the information need to be read into an array?
Can you simply cut and paste the information into one super-worksheet?

4) Have you stepped through the code to see where it could be
shortened or changed?

5) The code section that starts "colFileName = col0: col0 = col0 + 1
'Filename "
What are you trying to do here? Can the information be handled some
other way?

6) What is the purpose of the code section that starts "For r = 1 To
rowsMaster " - you should try to lessen hits on the worksheet as much
as possible using the array technique I showed you. Then you can loop
through the array, which will be much faster than telling VBA to keep
hitting the worksheet, which (relatively speaking) is much more costly
in terms of speed and efficiency.


I found a routine that might help you. It loops through a folder and
you use it to do something with each file found.

http://www.vbaexpress.com/kb/getarticle.php?kb_id=245

HTH,
JP
 
J

Joel

I believe this is a memory leak

Set xl = Nothing

You aren't releasing any memory. All you are doing is setting the variable
xl to nothing. The memory is still allocated to the program.

Try this

Sub MyMacro()
Dim xl
Set xl = CreateObject("word.application")
xl.Visible = True
Set xl = Nothing

End Sub

The word document stays opened. Close the word document manually. If the
object was invisiable it would still be running. You can prove this by not
making the object visible and creating the object in the code above. Then go
to Task Manager. You will still see Word running. Just because you cna't
see it running doesn't mean that it is NOT running.

Now this

Sub MyMacro()
Dim xl
Set xl = CreateObject("word.application")
xl.Visible = True

xl.Quit

End Sub
 
J

JP

But why are we even creating a new instance of the Excel application
at all? This code is running natively inside Excel (if I assume
correctly).

--JP
 
J

Joel

I ran a simple experiment. I wrote a macro to create 150 xls files and
another macro to read 150 macro files. I recorded the memory usages before
and after the code ran. I didn't see any significant lose of memory. There
was only minor changes. Below are my results and the two macros I wrote. I
don't think the problem is with opening and closing the workbooks. I suspect
the problem is with the Master Worksheet growing in size. I suspect if the
Master Workbook is closed most of the memory will be recovered.

Original

Physical (K)
total = 384536
Available = 59000
Cache = 109068

Kernel (K)
total = 81840
Paged = 75008
NonPaged = 6832

After Writes

Physical (K)
total = 384536
Available = 57428
Cache = 130532

Kernel (K)
total = 84028
Paged = 77004
NonPaged = 7024

After Reads
Physical (K)
total = 384536
Available = 55248
Cache = 134868

Kernel (K)
total = 84996
Paged = 77860
NonPaged = 7136




Sub MakeBooks()

Folder = "C:\temp\test2\"

For i = 1 To 150
With ThisWorkbook
.Sheets("sheet1").Copy
ActiveWorkbook.SaveAs Filename:=Folder & "Test" & i & ".xls"
ActiveWorkbook.Close
End With

Next i


End Sub


Sub ReadBooks()

Folder = "C:\temp\test2\"

FName = Dir(Folder & "*.xls")
RowCount = 1
Do While FName <> ""
Set OldBk = Workbooks.Open(Filename:=Folder & FName)
With ThisWorkbook.Sheets("Sheet1")
OldBk.Sheets("Sheet1").Rows(1).Copy _
Destination:=.Rows(RowCount)
End With
RowCount = RowCount + 1
OldBk.Close
FName = Dir()
Loop

End Sub
 
M

MikeZz

Hi JP, I'll try to answer all your questions.
First, I went down the path of creating new instances because I had a huge
memory problem ( Excel kept going higher and higher bringing it to a crawl ).
I tried creating a new instance because it seemed to resolve the issue. The
problem then became that it took forever just to read in the data... No hard
data but just looking at the clock I'd say it went from "instantly" to
several seconds.

Responding to your questions in a previous post:
First, let me explain the purpose of this app. Our customer sends us
hundreds of contracts each month in pdfs. The information is structured so
poorly it's very difficult to review... my guess is it's done this way
intentionally. If you saw it, you wouldn't believe it.

We use a batch pdf converter to convert the files to excel. I then wrote an
app that can pick bits and pieces of info spread all over a very chaotic the
file and summarize it in an excel table. The file itself is about 95% blank
cells and 99% of the actual data is useless. We only need the 1% that's
randomly spread in between. The end result of my app was really a quantum
leap in productivity. We would have someone manually read and retype a lot
of information from pdf contracts taking many hours a week. Now it's
relatively instant with a dramatic decrease in time with a dramatic increase
in accuracy of contract review.

Basically, I'm trying to fix a problem that in all honesty, could be fixed
by the customer just by reformating their contracts. The thing is that if
they make it easier to read, it would help to highlight errors that are
sometimes intentional... because it gives them more money. Harsh but true.

So anyway, I have all the logic to scan through an array to pluck the few
bits of useful info out of the file. It works and it works extremely well.
I just need to get the data into an array.

1)... Because of the above described randomness, a pivot table wouldn't work
because the files I'm opening are not in a table format.

2) I put the files into the same folder. They are just translations from
pdf so I don't update them. The ranges are all random. Basically I have to
read the entire file into an array... which is usually under 20 columns but
could be a couple thousand rows long.

3) I can't simply copy and paste data... like I said, what I'm doing is
like taking a shotgun blast, and putting 1 out of 100 pellets in a nice
straight line.

4) I've went through it pretty throughly. It's extremely complicated but I
think I've done a good job of getting it to solve a difficult problem.

5) The few routines I pasted are only a fraction of the total code. You
are seeing the entire Declaration which may be quite extensive.
colFileName = col0: col0 = col0 + 1
I've found the above to actually work rather nicely because in development,
I sometimes change the output table and this allows the most flexibility to
add or move data columns around.

6) r = 1 To rowsMaster
Part of my poor naming because I actually pulled this code from another
project.
Master is just the data file and arrMaster is just the temp array containing
the data found in the file called "Master".
I'll try the technique you mentioned as it is probably a lot quicker.
Is there a quick way to call range a1 to the "bottom right" of the file?

A couple other comments from Joel's post I'll also answer here:
I occaisionally auto save the summary file since it does get bigger and this
solved a previous out of memory error I had before, however, a summary file
with 150 contracts summarized in it is still only about 6MB of file size so
it's not much. I regularly deal with excel files pushing 30+MB without
issues.

.... Ok, after all that, I just tried the bulk read a range into an array
like you suggested and that it seemed to help ALOT. Now, if I pursue this
method of creating new xlApps, how do I stop my screen from blinking like
crazy... Basically before creating all the new apps, I could run the macro
and do anything else outside of excel. Now whenever the a file opens to read
(about 2x a second), it moves focus from whatever I'm doing back to excel...
can't even catch up on the news any more!

Thanks again for all your help... I'd be amazed if just a single command
will improve my code that much but hey, I'll take it any way I can get.

MikeZz
 
M

MikeZz

Hi Joel,
I just made a lengthy reply to JP with more info.

I think I have solved part of my issues with memory and speed but do have a
couple follow Q's for you:

Q1:
How can I read an excel file into memory without opening it?
The file is converstion of a pdf so it has no table structure.... no column
headers, just data spread all over with a lot of blank cells.

Q2:
Now the problem opening each data file in it's own xlApp, is that it takes
over my computer. Before, I could start the macro, minimize it and do other
things. Now Focus goes back to excel for each new file and so I can't type
for more than a half second before I jump back to excel.

Any further help would be great!

Thanks,
MikeZz
 
M

MikeZz

Hi JP, I'll try to answer all your questions.
First, I went down the path of creating new instances because I

had a huge memory problem ( Excel kept going higher and higher

bringing it to a crawl ). I tried creating a new instance because

it seemed to resolve the issue. The problem then became that it

took forever just to read in the data... No hard data but just

looking at the clock I'd say it went from "instantly" to several

seconds.

Responding to your questions in a previous post:
First, let me explain the purpose of this app. Our customer sends

us hundreds of contracts each month in pdfs. The information is

structured so poorly it's very difficult to review... my guess is

it's done this way intentionally. If you saw it, you wouldn't

believe it.

We use a batch pdf converter to convert the files to excel. I

then wrote an app that can pick bits and pieces of info spread all

over a very chaotic the file and summarize it in an excel table.

The file itself is about 95% blank cells and 99% of the actual

data is useless. We only need the 1% that's randomly spread in

between. The end result of my app was really a quantum leap in

productivity. We would have someone manually read and retype a

lot of information from pdf contracts taking many hours a week.

Now it's relatively instant with a dramatic decrease in time with

a dramatic increase in accuracy of contract review.

Basically, I'm trying to fix a problem that in all honesty, could

be fixed by the customer just by reformating their contracts. The

thing is that if they make it easier to read, it would help to

highlight errors that are sometimes intentional... because it

gives them more money. Harsh but true.

So anyway, I have all the logic to scan through an array to pluck

the few bits of useful info out of the file. It works and it

works extremely well. I just need to get the data into an array.

1)... Because of the above described randomness, a pivot table

wouldn't work because the files I'm opening are not in a table

format.

2) I put the files into the same folder. They are just

translations from pdf so I don't update them. The ranges are all

random. Basically I have to read the entire file into an array...

which is usually under 20 columns but could be a couple thousand

rows long.

3) I can't simply copy and paste data... like I said, what I'm

doing is like taking a shotgun blast, and putting 1 out of 100

pellets in a nice straight line.

4) I've went through it pretty throughly. It's extremely

complicated but I think I've done a good job of getting it to

solve a difficult problem.

5) The few routines I pasted are only a fraction of the total

code. You are seeing the entire Declaration which may be quite

extensive.
colFileName = col0: col0 = col0 + 1
I've found the above to actually work rather nicely because in

development, I sometimes change the output table and this allows

the most flexibility to add or move data columns around.

6) r = 1 To rowsMaster
Part of my poor naming because I actually pulled this code from

another project.
Master is just the data file and arrMaster is just the temp array

containing the data found in the file called "Master".
I'll try the technique you mentioned as it is probably a lot

quicker.
Is there a quick way to call range a1 to the "bottom right" of the

file?

A couple other comments from Joel's post I'll also answer here:
I occaisionally auto save the summary file since it does get

bigger and this solved a previous out of memory error I had

before, however, a summary file with 150 contracts summarized in

it is still only about 6MB of file size so it's not much. I

regularly deal with excel files pushing 30+MB without issues.

.... Ok, after all that, I just tried the bulk read a range into an

array like you suggested and that it seemed to help ALOT. Now, if

I pursue this method of creating new xlApps, how do I stop my

screen from blinking like crazy... Basically before creating all

the new apps, I could run the macro and do anything else outside

of excel. Now whenever the a file opens to read (about 2x a

second), it moves focus from whatever I'm doing back to excel...

can't even catch up on the news any more!

Thanks again for all your help... I'd be amazed if just a single

command will improve my code that much but hey, I'll take it any

way I can get.
 
M

MikeZz

Hi JP,
I think I posted back to Joel when it should have gone to you.
In case you are not notified of replies to him, please look at my 9/14 posts
further down in the thread to answer all your previous questions.

Thanks again, this thread is a lifesaver.
 
J

Joel

1) To speed up execution of your code and to stop blinking turn off screen
updting at the beginning of the macro and then turn it back on at the end

Application.ScreenUpdating = False

Application.ScreenUpdating = True

2) Excel is sometime bad at comnpacting workbooks to reduce memory size. It
may be best to create a new workbook with the your condensed table and savig
the final results as a new file. This will probably cure youor memory
problem. I suspect that maybe when you are creating the table you maybe
copying formulas rather than just the values which may be the reason for the
memory error.
 
M

MikeZz

Hi Joel,
Below is the code I am currently testing:
I've tried to use the Application.ScreenUpdating = False in the procedure
that calls this one but I still get the blinking (Focusing and Refocusing on
Excel).

I also seem to have another dilema. On my computer at Home, Task Manager
only had shown the single Excel Task when I was all done. At work, it shows
a new task for each file I open and both the qty of Processes and Memory
Usage continues to grow. Is there an options setting somewhere that is
impacting this?

Thanks again for all your help,

Sub ReadNewContract(fileNo, arrMaster)

Dim MasterFile
Dim f, c, r
Dim lngCount As Long

Dim arrTemp() As Variant

Dim Master As Workbook
Dim masterSht As Worksheet
Dim rowsMaster, colsMaster, lastCellMaster
Dim rowMax, rightCol
Dim FoundIndent

Dim matCount, matTotal
Dim ctCellMaster
Dim testRowCountMat
Dim alertStat
Dim tempXLFile

Dim xlApp As New Excel.Application 'ADDED FOR MEMORY

Dim FileString As String 'ADDED FOR MEMORY
xlApp.Application.Visible = True 'ADDED FOR MEMORY

'############################################################################################
'########### READ IN MASTER FIL
'############################################################################################

If fileNo = 1 Or IsEmpty(fileLocExcel) Then
fileLocExcel = Get_File_Info(arrFiles(fileNo, colFileName), "Directory")
End If

FileString = arrFiles(fileNo, colFileName) 'ADDED FOR MEMORY

xlApp.Workbooks.Open (FileString) 'Focus is now on the workbook 'ADDED FOR
MEMORY

Set Master = xlApp.ActiveWorkbook
Set masterSht = xlApp.ActiveSheet
MasterFile = Master.Name

lastCellMaster = LastCellIn(masterSht)
rowsMaster = LastRowIn(masterSht)
arrFiles(fileNo, colFileRows) = rowsMaster
colsMaster = LastColIn(masterSht)

colsMaster = Mid(Cells(1, colsMaster).Address, 2, 1)

If rowsMaster = Empty Or colsMaster = Empty Then
'File was not converted to Excel propertly
arrFiles(fileNo, colFileStat) = "Empty File"
Master.Close SaveChanges:=False
errCount = errCount + 1
arrErrors(errCount, colKeyErrFile) = Get_File_Info(arrFiles(fileNo,
colFileName), "FileName") ' 1 'KeyErr Anchor: Name
arrErrors(errCount, colKeyErrCode) = "Empty Excel File" ' 2 'KeyErr
Anchor: Anchor or Code
arrErrors(errCount, colKeyErrDesc) = "Empty Excel File" ' 3 'KeyErr
Anchor: Name
arrErrors(errCount, colKeyErrStat) = "Empty Excel File" 'KeyErr Anchor:
Special Function to perform (such as count repeats)
Exit Sub
End If

ctCellMaster = 0

arrTemp = masterSht.Range("A1:" & colsMaster & rowsMaster).Value

arrMaster = arrTemp

Master.Close SaveChanges:=False
'Application.DisplayAlerts = alertStat

Set masterSht = Nothing
Set Master = Nothing
xlApp.Quit
Set xlApp = Nothing 'ADDED FOR MEMORY

End Sub
 
J

Joel

I don't think the appl is really savving memory. try again without the appl

Sub ReadNewContract(fileNo, arrMaster)

Dim MasterFile
Dim f, c, r
Dim lngCount As Long

Dim arrTemp() As Variant

Dim Master As Workbook
Dim masterSht As Worksheet
Dim rowsMaster, colsMaster, lastCellMaster
Dim rowMax, rightCol
Dim FoundIndent

Dim matCount, matTotal
Dim ctCellMaster
Dim testRowCountMat
Dim alertStat
Dim tempXLFile

Dim xlApp As New Excel.Application 'ADDED FOR MEMORY

Dim FileString As String 'ADDED FOR MEMORY
xlApp.Application.Visible = True 'ADDED FOR MEMORY

'############################################################################################
'########### READ IN MASTER FIL
'############################################################################################

If fileNo = 1 Or IsEmpty(fileLocExcel) Then
fileLocExcel = Get_File_Info(arrFiles(fileNo, colFileName), "Directory")
End If

FileString = arrFiles(fileNo, colFileName) 'ADDED FOR MEMORY

xlApp.Workbooks.Open (FileString) 'Focus is now on the workbook 'ADDED FOR
MEMORY

Set Master = ActiveWorkbook
Set masterSht = .ActiveSheet
MasterFile = Master.Name

lastCellMaster = LastCellIn(masterSht)
rowsMaster = LastRowIn(masterSht)
arrFiles(fileNo, colFileRows) = rowsMaster
colsMaster = LastColIn(masterSht)

colsMaster = Mid(Cells(1, colsMaster).Address, 2, 1)

If rowsMaster = Empty Or colsMaster = Empty Then
'File was not converted to Excel propertly
arrFiles(fileNo, colFileStat) = "Empty File"
Master.Close SaveChanges:=False
errCount = errCount + 1
' 1 'KeyErr Anchor: Name
arrErrors(errCount, colKeyErrFile) = _
Get_File_Info(arrFiles(fileNo, colFileName), "FileName")
' 2 'KeyErrAnchor: Anchor or Code
arrErrors(errCount, colKeyErrCode) = "Empty Excel File"
' 3 'KeyErr Anchor: Name
arrErrors(errCount, colKeyErrDesc) = "Empty Excel File"
' KeyErr Anchor:Special Function to perform (such as count repeats)
arrErrors(errCount, colKeyErrStat) = "Empty Excel File"
Exit Sub
End If

ctCellMaster = 0

arrTemp = masterSht.Range("A1:" & colsMaster & rowsMaster).Value

arrMaster = arrTemp

Master.Close SaveChanges:=False
'Application.DisplayAlerts = alertStat

End Sub
 
M

MikeZz

Hi Joel,
Just want to make sure I understand you correctly..

You want me to delete these lines:
Set masterSht = Nothing
Set Master = Nothing
xlApp.Quit
Set xlApp = Nothing 'ADDED FOR MEMORY

or get rid of the xlApp reference all together?
Is the purpose to release the extra Excel's in Task Manager or stop the
blinking?

Thanks,
MikeZz
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Top