Extract Text From Multiple Word Files

A

andibevan

Hi All,

I have 82 MS word files in the same directory which all contain a tabl
and I need to extract a piece of information from each file.

The information I require from each word document is next to the ro
headings "Primary Effect" and "Secondary Effect" i.e.:-

Column 1 Coumn 2
Primary Effect INFORMATION to EXTRACT 1
Secondary Effect INFORMATION to EXTRACT 2

I think I essentially need something that will cycle through each fil
in the directory, open it, find the information in the cell next t
"Primary Effect" and "Secondary Effect" and copy it into th
spreadsheet against the file name.

Any help with this would be greatfully received.

Thanks

Andy
:confused
 
K

KL

Hi there,

The below code does seem to work, but I couldn't figure out how to make Word
documents invisible, which, I guess, should spead up the macro
significantly.

Regards, KL

--------------Code Start--------------
Private Function RemoveNoise(InValue) As String
With Application.WorksheetFunction
OutValue = UCase(InValue)
OutValue = .Substitute(OutValue, Chr(7), "")
OutValue = .Substitute(OutValue, Chr(9), "")
OutValue = .Substitute(OutValue, Chr(10), "")
OutValue = .Substitute(OutValue, Chr(13), "")
OutValue = .Substitute(OutValue, Chr(31), "")
OutValue = .Substitute(OutValue, Chr(160), "")
OutValue = .Substitute(OutValue, Chr(172), "")
OutValue = .Substitute(OutValue, Chr(182), "")
OutValue = .Substitute(OutValue, Chr(183), "")
OutValue = .Substitute(OutValue, "€", "")
OutValue = .Substitute(OutValue, " ", "")
End With
RemoveNoise = Trim(OutValue)
End Function

Sub ImportWordData()
Dim oAppWD As Object
Dim strPath As String
Dim FileName As String
strPath = ActiveWorkbook.Path

Set fs = Application.FileSearch
With fs
.LookIn = strPath
.SearchSubFolders = False
.FileName = ".doc"
If .Execute() > 0 Then
Application.ScreenUpdating = False
Set oAppWD = CreateObject("Word.Application")
For i = 1 To .FoundFiles.Count
oAppWD.Documents.Open FileName:=.FoundFiles(i)
'oAppWD.Visible = False
FileName = Dir(.FoundFiles(i))
With oAppWD.ActiveDocument.Tables(1)
ActiveSheet.Cells(i, 1) = FileName
ActiveSheet.Cells(i, 2) = _
RemoveNoise(.Rows(1).Cells(2).Range.Text)
ActiveSheet.Cells(i, 3) = _
RemoveNoise(.Rows(2).Cells(2).Range.Text)
End With
oAppWD.Documents.Close
Next i
oAppWD.Application.Quit
Set oAppWD = Nothing
Application.ScreenUpdating = True
End If
End With
End Sub

--------------Code End--------------
 
K

KL

A slightly corected code:

--------------Code Start--------------
Private Function RemoveNoise(InValue) As String
With Application.WorksheetFunction
OutValue = UCase(InValue)
OutValue = .Substitute(OutValue, Chr(7), "")
OutValue = .Substitute(OutValue, Chr(9), "")
OutValue = .Substitute(OutValue, Chr(10), "")
OutValue = .Substitute(OutValue, Chr(13), "")
OutValue = .Substitute(OutValue, Chr(31), "")
OutValue = .Substitute(OutValue, Chr(160), "")
OutValue = .Substitute(OutValue, Chr(172), "")
OutValue = .Substitute(OutValue, Chr(182), "")
OutValue = .Substitute(OutValue, Chr(183), "")
End With
RemoveNoise = Trim(OutValue)
End Function

Sub ImportWordData()
Dim oAppWD As Object
Dim strPath As String
Dim FileName As String
strPath = ActiveWorkbook.Path

Set fs = Application.FileSearch
With fs
.LookIn = strPath
.SearchSubFolders = False
.FileName = ".doc"
If .Execute() > 0 Then
Application.ScreenUpdating = False
Set oAppWD = CreateObject("Word.Application")
For i = 1 To .FoundFiles.Count
oAppWD.Documents.Open FileName:=.FoundFiles(i)
'oAppWD.Visible = False
FileName = Dir(.FoundFiles(i))
With oAppWD.ActiveDocument.Tables(1)
ActiveSheet.Cells(i, 1) = FileName
ActiveSheet.Cells(i, 2) = _
RemoveNoise(.Rows(1).Cells(2).Range.Text)
ActiveSheet.Cells(i, 3) = _
RemoveNoise(.Rows(2).Cells(2).Range.Text)
End With
oAppWD.Documents.Close
Next i
oAppWD.Application.Quit
Set oAppWD = Nothing
Application.ScreenUpdating = True
End If
End With
End Sub
--------------Code End--------------
 
K

KL

This macro in conjunction with the function I posted earlier does figure out
the visibility issue for Word application, but the processing speed seems to
be the same. For this macro to work you need to create a reference to
Microsoft Word Objects Library (in VBA Editor go TOOLS>REFERENCES... and
check Microsoft Word 9.0 [or whatever version is applicable] Objects
Library). Also, I forgot to mention that the macro serches for files in the
same folder where the excel file is located.

--------------Code Start--------------
Sub ImportWordData()
Dim oAppWD As Object
Dim wdDoc As Word.Document
Dim strPath As String
Dim FileName As String
strPath = ActiveWorkbook.Path

Set fs = Application.FileSearch
With fs
.LookIn = strPath
.SearchSubFolders = False
.FileName = ".doc"
If .Execute() > 0 Then
Application.ScreenUpdating = False
Set oAppWD = New Word.Application
oAppWD.Visible = False
For i = 1 To .FoundFiles.Count
Set wdDoc = oAppWD.Documents.Open(FileName:=.FoundFiles(i))
FileName = Dir(.FoundFiles(i))
With wdDoc.Tables(1)
ActiveSheet.Cells(i, 1) = FileName
ActiveSheet.Cells(i, 2) = _
RemoveNoise(.Rows(1).Cells(2).Range.Text)
ActiveSheet.Cells(i, 3) = _
RemoveNoise(.Rows(2).Cells(2).Range.Text)
End With
wdDoc.Close
Next i
oAppWD.Application.Quit
Set oAppWD = Nothing
Application.ScreenUpdating = True
End If
End With
End Sub
--------------Code End--------------
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Top