L
lists
I received some excellent responses regarding what I'm trying to
accomplish in the way of removing duplicates from a spreadsheet. I
have, however, found that this takes an extremely long time to finish.
I'm half way tempted to write a C program to do this as I have more
than 10,000 rows to work with per table.
One chunk for removing duplicates is found:
http://www.cpearson.com/excel/duplicat.htm
Can this be sped up?
I'm thinking that flagging all and then removing might speed things
up, but I'm not sure about this scenario.
Another chunk which actually appears to be quicker than the above,
which I modified to remove both duplicates and originals (originally
provided by Patrick Molloy) is:
Sub RemoveDupesAndOriginals()
Remove_Dupes 3
End Sub
Sub Remove_Dupes(testcol As Long)
Dim Col As Long
Dim lastrow As Long
Dim thisrow As Long
Dim lastrow2 As Long
Dim thisrow2 As Long
On Error GoTo EndMacro
Application.ScreenUpdating = False
Application.Calculation = xlCalculationManual
' get the last column, then add the row numbers
Col = Range("A1").End(xlToRight).Column + 1
' get the last row
lastrow = Range("A1").End(xlDown).Row
lastrow2 = lastrow
' add a column fro the original row order
With Range(Cells(1, Col), Cells(lastrow, Col))
.Formula = "=Row()"
.Value = .Value
End With
' sort the table by the test column
With Range(Cells(1, 1), Cells(lastrow, Col))
.Sort Cells(1, testcol)
' remove duplicate
For thisrow = lastrow To 2 Step -1
If Cells(thisrow, testcol).Value = Cells(thisrow - 1,
testcol).Value Then
Cells(thisrow - 1, testcol + 2).Value = 1
Cells(thisrow, testcol + 2).Value = 1
Rows(thisrow).Delete
End If
Next
'Delete the originals which had duplicates
For thisrow2 = lastrow2 To 2 Step -1
If Cells(thisrow2, testcol + 2).Value = 1 Then
Rows(thisrow2).Delete
End If
Next
If Cells(1, testcol + 2).Value = 1 Then
Rows(1).Delete
End If
'restore whats left to the original order
.Sort Cells(1, Col)
End With
EndMacro:
Application.ScreenUpdating = True
Application.Calculation = xlCalculationAutomatic
End Sub
accomplish in the way of removing duplicates from a spreadsheet. I
have, however, found that this takes an extremely long time to finish.
I'm half way tempted to write a C program to do this as I have more
than 10,000 rows to work with per table.
One chunk for removing duplicates is found:
http://www.cpearson.com/excel/duplicat.htm
Can this be sped up?
I'm thinking that flagging all and then removing might speed things
up, but I'm not sure about this scenario.
Another chunk which actually appears to be quicker than the above,
which I modified to remove both duplicates and originals (originally
provided by Patrick Molloy) is:
Sub RemoveDupesAndOriginals()
Remove_Dupes 3
End Sub
Sub Remove_Dupes(testcol As Long)
Dim Col As Long
Dim lastrow As Long
Dim thisrow As Long
Dim lastrow2 As Long
Dim thisrow2 As Long
On Error GoTo EndMacro
Application.ScreenUpdating = False
Application.Calculation = xlCalculationManual
' get the last column, then add the row numbers
Col = Range("A1").End(xlToRight).Column + 1
' get the last row
lastrow = Range("A1").End(xlDown).Row
lastrow2 = lastrow
' add a column fro the original row order
With Range(Cells(1, Col), Cells(lastrow, Col))
.Formula = "=Row()"
.Value = .Value
End With
' sort the table by the test column
With Range(Cells(1, 1), Cells(lastrow, Col))
.Sort Cells(1, testcol)
' remove duplicate
For thisrow = lastrow To 2 Step -1
If Cells(thisrow, testcol).Value = Cells(thisrow - 1,
testcol).Value Then
Cells(thisrow - 1, testcol + 2).Value = 1
Cells(thisrow, testcol + 2).Value = 1
Rows(thisrow).Delete
End If
Next
'Delete the originals which had duplicates
For thisrow2 = lastrow2 To 2 Step -1
If Cells(thisrow2, testcol + 2).Value = 1 Then
Rows(thisrow2).Delete
End If
Next
If Cells(1, testcol + 2).Value = 1 Then
Rows(1).Delete
End If
'restore whats left to the original order
.Sort Cells(1, Col)
End With
EndMacro:
Application.ScreenUpdating = True
Application.Calculation = xlCalculationAutomatic
End Sub