Hi Roger,
The core of the macro you need is given at
http://word.mvps.org/FAQs/MacrosVBA/DeleteParaRnge.htm. I've put in code
before that to get each word in a separate paragraph, without punctuation,
and sorted.
Sub SortNoDups()
Dim AmountMoved As Long
Dim myRange As Range
Set myRange = ActiveDocument.Range
myRange.Style = ActiveDocument.Styles("Normal")
' remove punctuation
With myRange.Find
.ClearFormatting
.Format = False
.Forward = True
.Wrap = wdFindStop
.MatchWildcards = True
.Text = "[!A-Za-z0-9 ^13]"
.Replacement.Text = ""
.Execute Replace:=wdReplaceAll
End With
' remove graphics
Set myRange = ActiveDocument.Range
With myRange.Find
.ClearFormatting
.Format = False
.Forward = True
.Wrap = wdFindStop
.MatchWildcards = False
.Text = "^g"
.Replacement.Text = ""
.Execute Replace:=wdReplaceAll
End With
' change all spaces to paragraph marks
Set myRange = ActiveDocument.Range
With myRange.Find
.ClearFormatting
.Format = False
.Forward = True
.Wrap = wdFindStop
.MatchWildcards = False
.Text = " "
.Replacement.Text = "^p"
.Execute Replace:=wdReplaceAll
End With
' sort paragraphs (single words)
Set myRange = ActiveDocument.Range
myRange.Sort
'start with first paragraph and extend range down to second
Set myRange = ActiveDocument.Paragraphs(1).Range
AmountMoved = myRange.MoveEnd(unit:=wdParagraph, Count:=1)
'loop until there are no more paragraphs to check
Do While AmountMoved > 0
'if two paragraphs are identical (case-insensitive), delete
'second one and add the one after that to myRange so it can
'be checked
If LCase(myRange.Paragraphs(1).Range.Text) = _
LCase(myRange.Paragraphs(2).Range.Text) Then
myRange.Paragraphs(2).Range.Delete
AmountMoved = myRange.MoveEnd(unit:=wdParagraph, Count:=1)
Else
'if two paragraphs aren't identical, add the one after
'that to my range, so it can be checked, and drop the first one,
'since it is no longer of interest.
AmountMoved = myRange.MoveEnd(unit:=wdParagraph, Count:=1)
myRange.MoveStart unit:=wdParagraph, Count:=1
End If
Loop
End Sub
A couple of caveats: (1) The "remove graphics" bit will remove only inline
graphics, not floating ones. If that's a problem for you, I can fix it. (2)
I've seen Word fail to sort large documents, but I don't know what the
maximum usable size is. Try it...