extraction VB code

D

david shapiro

I've put together this code to extract and id code data from an excel
workbook, but have come across quite a few bugs. I've tried to put
comments in the VB code describing what's going on at each stage. I
would appreciate it if someone could clear up the errors and get it
running. Thanks.

Dave Shapiro

Option Explicit
Sub extraction_codingmacro()

Dim wks As Worksheet
Dim SumWks As Worksheet
Dim myCell As Range
Dim oRow As Long
Dim myRng As Range

Set SumWks = Worksheets.Add
SumWks.Range("a1").Resize(1, 7).Value _
= Array("country", "source", "indicator", "data type",
"subgroup", "year", "value")

oRow = 1
For Each wks In ActiveWorkbook.Worksheets
If wks.Name = SumWks.Name Or wks.Name = "criteria file" Or _
wks.Name = "reference" Then
'do nothing
Else
wks.Select
Call preparefile
With wks
Set myRng = .Range("d8:aa" & _
.Cells(.Rows.Count, "A").End(xlUp).Row)
End With
With SumWks
For Each myCell In myRng.Cells
If myCell.Interior.ColorIndex = 3 Then

'the next two lines are supposed to filter out all
the rows with the words
' "GSD" in the B column of the row and rows with
the
words "AAA" in the D column
' of the row. But this doesn`t seem to work.
Could
you adjust this?

'If myCell.Cells(myCell.Row, "B").Text <> "GSD"
Then
'If InStr(1, myCell.Cells(myCell.Row,
"D").Text,"AAA", vbTextCompare) = 0
'Then
oRow = oRow + 1
.Cells(oRow, "A").Value _
= wks.Cells(myCell.Row, "A").Value
.Cells(oRow, "B").Value _
= wks.Cells(myCell.Row, "B").Value
.Cells(oRow, "C").Value _
= wks.Cells(myCell.Row, "C").Value
.Cells(oRow, "D").Value _
= wks.Cells(myCell.Row, "D").Value
.Cells(oRow, "E").Value _
= wks.Cells(myCell.Row, "E").Value
.Cells(oRow, "F").Value _
= wks.Cells(7, myCell.Column).Value
.Cells(oRow, "G").Value _
= myCell.Value
End If
'End If
'End If
Next myCell
End With
End If
Next wks

Call addmeasurementcolumn
Call noduplicaterows
Call extractall
Call codedata

End Sub

Sub preparefile()
'this procedure prepares the country worksheet. But due to the irregular
number of rows from
'the top of the page. The country name is sometimes on the 1st row, the
second row or the third row. It can differ from worksheet to worksheet.
Tthe table structure,
template and headings are always the same though. The name of the country,
for example, is always in the cell to the right of the cell containing the
words "country". This works for one country here. How could it be
adjusted to work for all countries?

Cells.Select 'this is just a copy-paste special value as the
original
Selection.Copy 'sheets are pivot tables and need to be made into
values
Selection.PasteSpecial Paste:=xlValues, Operation:=xlNone,
SkipBlanks:=
_
False, Transpose:=False
Columns("A:A").Select
Application.CutCopyMode = False
Selection.Insert Shift:=xlToRight
Selection.Insert Shift:=xlToRight 'two columns are created
Range("D1").Select
Selection.Copy
Range("A1").Select
ActiveSheet.Paste 'the name of the country is pasted here.
Range("D2").Select
Application.CutCopyMode = False
Selection.Copy
Range("B1").Select
ActiveSheet.Paste 'the name of the indicator is pasted here
Range("E1").Select
Application.CutCopyMode = False
ActiveCell.FormulaR1C1 = "x"
Columns("A:E").Select
Selection.SpecialCells(xlCellTypeBlanks).Select
Selection.FormulaR1C1 = "=R[-1]C" 'all blanks are filled in
with
right categories
Cells.Select
Selection.Copy
Selection.PasteSpecial Paste:=xlValues, Operation:=xlNone,
SkipBlanks:=
_
False, Transpose:=False
Application.CutCopyMode = False
Call cleanworksheet

End Sub

Sub cleanworksheet()
'For some reason, an error comes up here

Dim c As Range

For Each c In ActiveSheet.UsedRange
c = WorksheetFunction.Clean(c)
Next
'
End Sub
Sub addmeasurementcolumn()
'this procedure does the deletion of the "value" column and addition of the
"measurement" column
'It doesn't seem to be working. Also I don't think the word "number" is
copying for all
'the rows in the dataset. The number of rows varies from sheet to sheet.

ActiveSheet.Cells.Select
Cells.Find(What:="value", After:=ActiveCell, LookIn:=xlFormulas,
LookAt
_
:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext,
MatchCase:= _
False).Activate
Columns("F:F").Select
Selection.Clear
Range("F1").Select
ActiveCell.FormulaR1C1 = "measurement"
Range("F2").Select
ActiveCell.FormulaR1C1 = "number"
Range("F2").Select
Selection.AutoFill Destination:=Range("F2:F19"), Type:=xlFillDefault
Range("F2:F19").Select
Cells.Select
Selection.Copy
Selection.PasteSpecial Paste:=xlValues, Operation:=xlNone,
SkipBlanks:=
_
False, Transpose:=False
End Sub

Sub noduplicaterows()

ActiveSheet.Cells.Select
Range(Selection, Selection.End(xlToRight)).Select
Range(Selection, Selection.End(xlDown)).Select
Range("A1:E19").AdvancedFilter Action:=xlFilterCopy,
CopyToRange:=Range(
_
"A21"), Unique:=True
Range("A1:A20").Select
Range("A20").Activate
Selection.EntireRow.Delete
Cells.Select
Selection.Copy
Selection.PasteSpecial Paste:=xlValues, Operation:=xlNone,
SkipBlanks:=
_
False, Transpose:=False
Sheets("Sheet1").Select
Sheets("Sheet1").Name = "criteria file"
Range("A1").Select
End Sub


Sub extractall()
' this procedure uses the "criteria file" created above to extract the full
set of data from
' the "source data" file

Sheets("source data").Select
Range(Selection, Selection.End(xlToRight)).Select
Range(Selection, Selection.End(xlDown)).Select
Range("A1:I53263").AdvancedFilter Action:=xlFilterInPlace,
CriteriaRange:= _
Sheets("criteria file").Range("A1:F7"), Unique:=False
ActiveWindow.ScrollColumn = 5
ActiveWindow.ScrollColumn = 1
Selection.Copy
Worksheets.Add
Sheets("sheet2").Name = "final data"
Selection.PasteSpecial Paste:=xlValues, Operation:=xlNone,
SkipBlanks:=
_
False, Transpose:=False
Cells.Select
Application.CutCopyMode = False
Selection.Copy
Selection.PasteSpecial Paste:=xlValues, Operation:=xlNone,
SkipBlanks:=
_
False, Transpose:=False

End Sub

Sub codedata()
Dim rng As Range, rng2 As Range
Dim rng1 As Range, cell As Range
Dim sStr As String, sStr1 As String
'this procedure codes all the data rows in the file "final data". I have
used a combination
'cacatenation and vlookup technique to do the coding in mass. But
actually,
would it be possible
'to redo this section so that it cacatenates, vlookups and codes one row at
a time? For each
row, I`d also
'like to be able to check one row at a time whether there is an id code for
this row or
'not. And to put the rows which are id coded on one sheet and those for
which there is no id
'code on a separate sheet.

'prepare final data worksheet for coding
Sheets("final data").Select
Set rng = Cells(ActiveCell.Row, "IV").End(xlToLeft)
Set rng1 = Range(ActiveCell, Cells(Rows.Count,
ActiveCell.Column).End(xlUp))
ActiveCell.EntireColumn.Insert
Set rng2 = Range(rng1(1), rng)
Debug.Print rng2.Address
For Each cell In rng2
sStr1 = LCase(Cells(1, cell.Column))
If sStr1 = "indicator" Or sStr1 = "subgroup" Or sStr1 = "classification"
_
Or sStr1 = "gender" Or sStr1 = "measurement" Then
sStr = sStr & cell.Address(0, 0) & "&"
End If
Next
If Len(Trim(sStr)) = 0 Then
rng1.Offset(0, -1).EntireColumn.Delete
Exit Sub
End If
sStr = "=" & Left(sStr, Len(sStr) - 1)
rng1.Offset(0, -1).Formula = sStr

Columns("A:A").Select
Selection.Copy
Selection.PasteSpecial Paste:=xlValues, Operation:=xlNone, SkipBlanks:= _
False, Transpose:=False
Application.CutCopyMode = False
Selection.Insert Shift:=xlToRight

'code from reference worksheet
Sheets("reference").Select
Range("B1").Select
Set rng = Cells(ActiveCell.Row, "IV").End(xlToLeft)
Set rng1 = Range(ActiveCell, Cells(Rows.Count,
ActiveCell.Column).End(xlUp))
ActiveCell.EntireColumn.Insert
Set rng2 = Range(rng1(1), rng)
Debug.Print rng2.Address
For Each cell In rng2
sStr1 = LCase(Cells(1, cell.Column))
If sStr1 = "indicator" Or sStr1 = "subgroup" Or sStr1 = "classification"
_
Or sStr1 = "gender" Or sStr1 = "measurement" Then
sStr = sStr & cell.Address(0, 0) & "&"
End If
Next
If Len(Trim(sStr)) = 0 Then
rng1.Offset(0, -1).EntireColumn.Delete
Exit Sub
End If
sStr = "=" & Left(sStr, Len(sStr) - 1)
rng1.Offset(0, -1).Formula = sStr

Columns("B:B").Select
Selection.Copy
Selection.PasteSpecial Paste:=xlValues, Operation:=xlNone, SkipBlanks:= _
False, Transpose:=False

Sheets("final data").Select
Application.CutCopyMode = False
ActiveCell.FormulaR1C1 = "=VLOOKUP(RC[1],reference!RC:R[368]C[1],2,FALSE)"
Range("A1").Select
Selection.AutoFill Destination:=Range("A1:A13"), Type:=xlFillDefault
Range("A1:A13").Select
Columns("A:A").Select
Selection.Copy
Selection.PasteSpecial Paste:=xlValues, Operation:=xlNone, SkipBlanks:= _
False, Transpose:=False
Sheets("reference").Select
ActiveWindow.ScrollRow = 1
Application.CutCopyMode = False
Selection.Delete Shift:=xlToLeft
Range("A1").Select

End Sub
 
B

Bob Kilmer

See bottom of message.

david shapiro said:
I've put together this code to extract and id code data from an excel
workbook, but have come across quite a few bugs. I've tried to put
comments in the VB code describing what's going on at each stage. I
would appreciate it if someone could clear up the errors and get it
running. Thanks.

Dave Shapiro

Option Explicit

<snip>

If myCell.Parent.Cells(myCell.Row, "B").Text <> "GSD" And _
InStr(1, myCell.Parent.Cells(myCell.Row, "D").Text, "AAA", _
vbTextCompare) = 0 Then

End If

ought to work here.
 
B

Bob Kilmer

You comments didn't ask about this particular sub, but this example
illustrates some tips that you could use to simplify your code and make it
easier to debug overall.

Sub noduplicaterows()

'' After selecting all the cells on the worksheet...
'' ActiveSheet.Cells.Select
''
'' these steps are meaningless.
'' Range(Selection, Selection.End(xlToRight)).Select
'' Range(Selection, Selection.End(xlDown)).Select
''
'' and there was no need to have selected anyway since you
'' are working on the range you specify: Range("A1:E19")
Range("A1:E19").AdvancedFilter Action:=xlFilterCopy, _
CopyToRange:=Range("A21"), Unique:=True

'' BTW, Don't Select like this...
'' Range("A1:A20").Select
'' Range("A20").Activate
'' Selection.EntireRow.Delete
''
'' Just delete the rows of the range like this:
Range("A1:A20").EntireRow.Delete

'' This...
'' Cells.Select
'' Selection.Copy
''
'' becomes this
Cells.Copy

Selection.PasteSpecial Paste:=xlValues, Operation:=xlNone, _
SkipBlanks:=False, Transpose:=False

'' This...
'' Sheets("Sheet1").Select
'' Sheets("Sheet1").Name = "criteria file"
'' Range("A1").Select
''
'' becomes this:
Sheets("Sheet1").Name = "criteria file"
Sheets("Sheet1").Range("A1").Select 'if you must
End Sub


david shapiro said:
I've put together this code to extract and id code data from an excel
workbook, but have come across quite a few bugs. I've tried to put
comments in the VB code describing what's going on at each stage. I
would appreciate it if someone could clear up the errors and get it
running. Thanks.

Dave Shapiro

Option Explicit
Sub extraction_codingmacro()

Dim wks As Worksheet
Dim SumWks As Worksheet
Dim myCell As Range
Dim oRow As Long
Dim myRng As Range

Set SumWks = Worksheets.Add
SumWks.Range("a1").Resize(1, 7).Value _
= Array("country", "source", "indicator", "data type",
"subgroup", "year", "value")

oRow = 1
For Each wks In ActiveWorkbook.Worksheets
If wks.Name = SumWks.Name Or wks.Name = "criteria file" Or _
wks.Name = "reference" Then
'do nothing
Else
wks.Select
Call preparefile
With wks
Set myRng = .Range("d8:aa" & _
.Cells(.Rows.Count, "A").End(xlUp).Row)
End With
With SumWks
For Each myCell In myRng.Cells
If myCell.Interior.ColorIndex = 3 Then

'the next two lines are supposed to filter out all
the rows with the words
' "GSD" in the B column of the row and rows with
the
words "AAA" in the D column
' of the row. But this doesn`t seem to work.
Could
you adjust this?

'If myCell.Cells(myCell.Row, "B").Text <> "GSD"
Then
'If InStr(1, myCell.Cells(myCell.Row,
"D").Text,"AAA", vbTextCompare) = 0
'Then
oRow = oRow + 1
.Cells(oRow, "A").Value _
= wks.Cells(myCell.Row, "A").Value
.Cells(oRow, "B").Value _
= wks.Cells(myCell.Row, "B").Value
.Cells(oRow, "C").Value _
= wks.Cells(myCell.Row, "C").Value
.Cells(oRow, "D").Value _
= wks.Cells(myCell.Row, "D").Value
.Cells(oRow, "E").Value _
= wks.Cells(myCell.Row, "E").Value
.Cells(oRow, "F").Value _
= wks.Cells(7, myCell.Column).Value
.Cells(oRow, "G").Value _
= myCell.Value
End If
'End If
'End If
Next myCell
End With
End If
Next wks

Call addmeasurementcolumn
Call noduplicaterows
Call extractall
Call codedata

End Sub

Sub preparefile()
'this procedure prepares the country worksheet. But due to the irregular
number of rows from
'the top of the page. The country name is sometimes on the 1st row, the
second row or the third row. It can differ from worksheet to worksheet.
Tthe table structure,
template and headings are always the same though. The name of the country,
for example, is always in the cell to the right of the cell containing the
words "country". This works for one country here. How could it be
adjusted to work for all countries?

Cells.Select 'this is just a copy-paste special value as the
original
Selection.Copy 'sheets are pivot tables and need to be made into
values
Selection.PasteSpecial Paste:=xlValues, Operation:=xlNone,
SkipBlanks:=
_
False, Transpose:=False
Columns("A:A").Select
Application.CutCopyMode = False
Selection.Insert Shift:=xlToRight
Selection.Insert Shift:=xlToRight 'two columns are created
Range("D1").Select
Selection.Copy
Range("A1").Select
ActiveSheet.Paste 'the name of the country is pasted here.
Range("D2").Select
Application.CutCopyMode = False
Selection.Copy
Range("B1").Select
ActiveSheet.Paste 'the name of the indicator is pasted here
Range("E1").Select
Application.CutCopyMode = False
ActiveCell.FormulaR1C1 = "x"
Columns("A:E").Select
Selection.SpecialCells(xlCellTypeBlanks).Select
Selection.FormulaR1C1 = "=R[-1]C" 'all blanks are filled in
with
right categories
Cells.Select
Selection.Copy
Selection.PasteSpecial Paste:=xlValues, Operation:=xlNone,
SkipBlanks:=
_
False, Transpose:=False
Application.CutCopyMode = False
Call cleanworksheet

End Sub

Sub cleanworksheet()
'For some reason, an error comes up here

Dim c As Range

For Each c In ActiveSheet.UsedRange
c = WorksheetFunction.Clean(c)
Next
'
End Sub
Sub addmeasurementcolumn()
'this procedure does the deletion of the "value" column and addition of the
"measurement" column
'It doesn't seem to be working. Also I don't think the word "number" is
copying for all
'the rows in the dataset. The number of rows varies from sheet to sheet.

ActiveSheet.Cells.Select
Cells.Find(What:="value", After:=ActiveCell, LookIn:=xlFormulas,
LookAt
_
:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext,
MatchCase:= _
False).Activate
Columns("F:F").Select
Selection.Clear
Range("F1").Select
ActiveCell.FormulaR1C1 = "measurement"
Range("F2").Select
ActiveCell.FormulaR1C1 = "number"
Range("F2").Select
Selection.AutoFill Destination:=Range("F2:F19"), Type:=xlFillDefault
Range("F2:F19").Select
Cells.Select
Selection.Copy
Selection.PasteSpecial Paste:=xlValues, Operation:=xlNone,
SkipBlanks:=
_
False, Transpose:=False
End Sub

Sub noduplicaterows()

ActiveSheet.Cells.Select
Range(Selection, Selection.End(xlToRight)).Select
Range(Selection, Selection.End(xlDown)).Select
Range("A1:E19").AdvancedFilter Action:=xlFilterCopy,
CopyToRange:=Range(
_
"A21"), Unique:=True
Range("A1:A20").Select
Range("A20").Activate
Selection.EntireRow.Delete
Cells.Select
Selection.Copy
Selection.PasteSpecial Paste:=xlValues, Operation:=xlNone,
SkipBlanks:=
_
False, Transpose:=False
Sheets("Sheet1").Select
Sheets("Sheet1").Name = "criteria file"
Range("A1").Select
End Sub


Sub extractall()
' this procedure uses the "criteria file" created above to extract the full
set of data from
' the "source data" file

Sheets("source data").Select
Range(Selection, Selection.End(xlToRight)).Select
Range(Selection, Selection.End(xlDown)).Select
Range("A1:I53263").AdvancedFilter Action:=xlFilterInPlace,
CriteriaRange:= _
Sheets("criteria file").Range("A1:F7"), Unique:=False
ActiveWindow.ScrollColumn = 5
ActiveWindow.ScrollColumn = 1
Selection.Copy
Worksheets.Add
Sheets("sheet2").Name = "final data"
Selection.PasteSpecial Paste:=xlValues, Operation:=xlNone,
SkipBlanks:=
_
False, Transpose:=False
Cells.Select
Application.CutCopyMode = False
Selection.Copy
Selection.PasteSpecial Paste:=xlValues, Operation:=xlNone,
SkipBlanks:=
_
False, Transpose:=False

End Sub

Sub codedata()
Dim rng As Range, rng2 As Range
Dim rng1 As Range, cell As Range
Dim sStr As String, sStr1 As String
'this procedure codes all the data rows in the file "final data". I have
used a combination
'cacatenation and vlookup technique to do the coding in mass. But
actually,
would it be possible
'to redo this section so that it cacatenates, vlookups and codes one row at
a time? For each
row, I`d also
'like to be able to check one row at a time whether there is an id code for
this row or
'not. And to put the rows which are id coded on one sheet and those for
which there is no id
'code on a separate sheet.

'prepare final data worksheet for coding
Sheets("final data").Select
Set rng = Cells(ActiveCell.Row, "IV").End(xlToLeft)
Set rng1 = Range(ActiveCell, Cells(Rows.Count,
ActiveCell.Column).End(xlUp))
ActiveCell.EntireColumn.Insert
Set rng2 = Range(rng1(1), rng)
Debug.Print rng2.Address
For Each cell In rng2
sStr1 = LCase(Cells(1, cell.Column))
If sStr1 = "indicator" Or sStr1 = "subgroup" Or sStr1 = "classification"
_
Or sStr1 = "gender" Or sStr1 = "measurement" Then
sStr = sStr & cell.Address(0, 0) & "&"
End If
Next
If Len(Trim(sStr)) = 0 Then
rng1.Offset(0, -1).EntireColumn.Delete
Exit Sub
End If
sStr = "=" & Left(sStr, Len(sStr) - 1)
rng1.Offset(0, -1).Formula = sStr

Columns("A:A").Select
Selection.Copy
Selection.PasteSpecial Paste:=xlValues, Operation:=xlNone, SkipBlanks:= _
False, Transpose:=False
Application.CutCopyMode = False
Selection.Insert Shift:=xlToRight

'code from reference worksheet
Sheets("reference").Select
Range("B1").Select
Set rng = Cells(ActiveCell.Row, "IV").End(xlToLeft)
Set rng1 = Range(ActiveCell, Cells(Rows.Count,
ActiveCell.Column).End(xlUp))
ActiveCell.EntireColumn.Insert
Set rng2 = Range(rng1(1), rng)
Debug.Print rng2.Address
For Each cell In rng2
sStr1 = LCase(Cells(1, cell.Column))
If sStr1 = "indicator" Or sStr1 = "subgroup" Or sStr1 = "classification"
_
Or sStr1 = "gender" Or sStr1 = "measurement" Then
sStr = sStr & cell.Address(0, 0) & "&"
End If
Next
If Len(Trim(sStr)) = 0 Then
rng1.Offset(0, -1).EntireColumn.Delete
Exit Sub
End If
sStr = "=" & Left(sStr, Len(sStr) - 1)
rng1.Offset(0, -1).Formula = sStr

Columns("B:B").Select
Selection.Copy
Selection.PasteSpecial Paste:=xlValues, Operation:=xlNone, SkipBlanks:= _
False, Transpose:=False

Sheets("final data").Select
Application.CutCopyMode = False
ActiveCell.FormulaR1C1 = "=VLOOKUP(RC[1],reference!RC:R[368]C[1],2,FALSE)"
Range("A1").Select
Selection.AutoFill Destination:=Range("A1:A13"), Type:=xlFillDefault
Range("A1:A13").Select
Columns("A:A").Select
Selection.Copy
Selection.PasteSpecial Paste:=xlValues, Operation:=xlNone, SkipBlanks:= _
False, Transpose:=False
Sheets("reference").Select
ActiveWindow.ScrollRow = 1
Application.CutCopyMode = False
Selection.Delete Shift:=xlToLeft
Range("A1").Select

End Sub
 
B

Bob Kilmer

I am posting pieces because perhaps a little is better than none. I probably
won't get thru it all, but the first thing I'd recommend to you is to
simplify the code, throw out stuff that is pointless, improve the formatting
and then improve the functioning of what you have left. Here is the
annotated Sub "addmeasurementcolumn."

Sub addmeasurementcolumn()
'this procedure does the deletion of the "value" column and
'addition of the "measurement" column It doesnt seem to be
'working. Also I dont think the word "number" is copying for
'all the rows in the dataset. The number of rows varies from
'sheet to sheet.

'' Pointless:
'' ActiveSheet.Cells.Select
''
'' because the next lines says you are working on the cells of
'' the active sheet. Your code need not have selected the cells first.
Cells.Find(What:="value", After:=ActiveCell, _
LookIn:=xlFormulas, LookAt:=xlPart, SearchOrder:=xlByRows, _
SearchDirection:=xlNext, MatchCase:=False).Activate
Columns("F:F").Clear

'' This....
'' Range("F1").Select
'' ActiveCell.FormulaR1C1 = "measurement"
'' becomes:
Range("F1").FormulaR1C1 = "measurement"
'' etc.
Range("F2").FormulaR1C1 = "number"
Range("F2").AutoFill Destination:=Range("F2:F19"), Type:=xlFillDefault

'' This bit of code is suspect.
'' It says you are selecting Range("F2:F19"), but then copying
'' every cell in the sheet, and pasting it into Range("F2:F19").
Range("F2:F19").Select
Cells.Copy 'copying all cells in the workbook here
Selection.PasteSpecial Paste:=xlValues, Operation:=xlNone, _
SkipBlanks:=False, Transpose:=False

'' If you want to copy and paste to Range("F2:F19"), just use
'' Range("F2:F19").Copy
'' Range("F2:F19").PasteSpecial Paste:=xlValues, Operation:=xlNone, _
'' SkipBlanks:=False, Transpose:=False
'' after all, a Selection is just a range that you can better specify
explicitly.
''
'' To copy the active sheet's cells and paste 'em back as values use:
'' Cells.Copy
'' Range("A1").PasteSpecial Paste:=xlValues, Operation:=xlNone, _
'' SkipBlanks:=False, Transpose:=False

End Sub


david shapiro said:
I've put together this code to extract and id code data from an excel
workbook, but have come across quite a few bugs. I've tried to put

<balance of message deleted for brevity. See original post.>
 
B

Bob Kilmer

My comments are preceded by ''. Paring down this and other procedures will
make it easier to see what changes need to be made to improve
functionality.

Sub preparefile()
'this procedure prepares the country worksheet. But due to the
'irregular number of rows from the top of the page. The country
'name is sometimes on the 1st row, the second row or the third
'row. It can differ from worksheet to worksheet. The table
'structure, template and headings are always the same though.
'The name of the country, for example, is always in the cell to
'the right of the cell containing the words "country". This
'works for one country here. How could it be adjusted to work
'for all countries?

'this is just a copy-paste special value as the original
'sheets are pivot tables and need to be made into Values

Cells.Copy 'copying all cells in active sheet

'' I'd specify the target range rather than use Selection,
'' using Range("A1").PasteSpecial Paste:=... or
'' Cells(1).PasteSpecial Paste:=... to designate the
'' top left cell of the active sheet.
Selection.PasteSpecial Paste:=xlValues, Operation:=xlNone, _
SkipBlanks:=False, Transpose:=False

Application.CutCopyMode = False

'two columns are created
'' Columns("A:A").Select
'' Selection.Insert Shift:=xlToRight
'' Selection.Insert Shift:=xlToRight
''
'' Better:
Columns("A:B").Insert Shift:=xlToRight

'the name of the country is pasted here.
'' Range("D1").Copy
'' Range("A1").Select
'' ActiveSheet.Paste
''
'' Better:
Range("D1").Copy Range("A1")
'' says "copy contents of cell D1 to cell A1"

Application.CutCopyMode = False

'the name of the indicator is pasted here
'' Range("D2").Select
'' Selection.Copy
'' Range("B1").Select
'' ActiveSheet.Paste
''
'' Better:
Range("D2").Copy Range("B1")

Application.CutCopyMode = False

'' This...
'' Range("E1").Select
'' ActiveCell.FormulaR1C1 = "x"
'' becomes:
Range("E1").FormulaR1C1 = "x"

'all blanks are filled in with Right categories
'' This...
'' Columns("A:E").SpecialCells(xlCellTypeBlanks).Select
'' Selection.FormulaR1C1 = "=R[-1]C"
'' becomes
Columns("A:E").SpecialCells(xlCellTypeBlanks).FormulaR1C1 = "=R[-1]C"

Cells.Copy
'' Selection.PasteSpecial Paste:=xlValues, Operation:=xlNone, _
Cells(1).PasteSpecial Paste:=xlValues, Operation:=xlNone, _
SkipBlanks:=False, Transpose:=False

Application.CutCopyMode = False

Call cleanworksheet

End Sub


david shapiro said:
I've put together this code to extract and id code data from an excel
workbook, but have come across quite a few bugs. I've tried to put
comments in the VB code describing what's going on at each stage. I
would appreciate it if someone could clear up the errors and get it
running. Thanks.

Dave Shapiro

<balance of message deleted for brevity. See original post.>
 
B

Bob Kilmer

Sub cleanworksheet()
'For some reason, an error comes up here
'' What error?
On Error GoTo ErrHandler

Dim c As Range
For Each c In ActiveSheet.UsedRange.Cells
c.Value = WorksheetFunction.Clean(c.Text)
Next

Exit Sub
ErrHandler:
Debug.Print "Error #: " & Err.Number & vbNewLine; "Description: " &
Err.Description

End Sub

david shapiro said:
I've put together this code to extract and id code data from an excel
workbook, but have come across quite a few bugs. I've tried to put
comments in the VB code describing what's going on at each stage. I
would appreciate it if someone could clear up the errors and get it
running. Thanks.

Dave Shapiro
<balance of message deleted for brevity. See original post.>
 
M

Myrna Larson

What's the error number and description, and on what line does it occur? Have
you stepped through the code with F8?
 
B

Bob Kilmer

More commentary in the code. See double-apostrophe comments, as before.

Sub extractall()
' this procedure uses the "criteria file" created above to
' extract the full set of data from the "source data" file

'' The call that precedes this is noduplicaterows,
'' so the Selection is this - where noduplicaterows leaves off - I believe.
''
'' Sheets("Sheet1").Select
'' Sheets("Sheet1").Name = "criteria file"
'' Range("A1").Select
''
'' This routine would be so much more readable if this it used
'' Sheets("criteria file").Range("A1") or whatever is appropriate
'' instead of Selection.

Sheets("source data").Select
Range(Selection, Selection.End(xlToRight)).Select
Range(Selection, Selection.End(xlDown)).Select

Range("A1:I53263").AdvancedFilter Action:=xlFilterInPlace, _
CriteriaRange:=Sheets("criteria file").Range("A1:F7"), _
Unique:=False
ActiveWindow.ScrollColumn = 5 ''necessary?
ActiveWindow.ScrollColumn = 1 ''necessary?

'' Not sure of the effect of the filter on the selection.
'' If nothing, then the selection is all the contiguous data,
'' starting at cell A1.
Selection.Copy

'' It looks to me that you know that Sheet2 happens to be the
'' added sheet, but this is not necessarily the case in an
'' arbitrary workbook.
''
'' Worksheets.Add
'' Sheets("sheet2").Name = "final data"
'' Selection.PasteSpecial Paste:=xlValues, Operation:=xlNone, _
'' SkipBlanks:=False, Transpose:=False

'' Better:
'' Get a specific reference to the added sheet.
Dim wks As Worksheet
Set wks = Worksheets.Add
'name it
wks.Name = "final data"
'paste data in it
wks.Cells(1).PasteSpecial Paste:=xlValues, Operation:=xlNone, _
SkipBlanks:=False, Transpose:=False

'' This section looks like it is copying and pasting the same data
'' that was just added since Cells always refers to the active sheet.
'' If so, it can be omitted.
''
'' Cells.Select
'' Application.CutCopyMode = False
'' Selection.Copy
'' Selection.PasteSpecial Paste:=xlValues, Operation:=xlNone, _
'' SkipBlanks:=False, Transpose:=False

Set wkb = Nothing 'done with this object

End Sub

Sub codedata()
'this procedure codes all the data rows in the file "final
'data ". I have used a combination cacatenation and vlookup"
'technique to do the coding in mass. But actually, would it be
'possible to redo this section so that it cacatenates, vlookups
'and codes one Row at a time? For each row, I`d also like to be
'able to check one row at a time whether there is an id code
'for this row or not. And to put the rows which are id coded
'on one sheet and those for which there is no id code on a
'separate sheet.

Dim rng As Range, rng2 As Range
Dim rng1 As Range, cell As Range
Dim sStr As String, sStr1 As String

'prepare final data worksheet for coding
'' rather than...
'' Sheets("final data").Select
'' Cells.Select
'' Application.CutCopyMode = False
'' Selection.Copy
'' Selection.PasteSpecial Paste:=xlValues, Operation:=xlNone, _
'' SkipBlanks:=False, Transpose:=False

'' I'd prefer...
Sheets("final data").Cells.Copy
Sheets("final data").Cells(1).PasteSpecial _
Paste:=xlValues, _
Operation:=xlNone, _
SkipBlanks:=False, _
Transpose:=False
'' less ambiguous.

'' Isn't AcitiveCell A1 of Sheets("final data")?
''
Set rng = Cells(ActiveCell.Row, "IV").End(xlToLeft)
Set rng1 = Range(ActiveCell, Cells(Rows.Count, _
ActiveCell.Column).End(xlUp))

ActiveCell.EntireColumn.Insert
Set rng2 = Range(rng1(1), rng)

Debug.Print rng2.Address

- - -
David,
I am losing it here. W/o the actual workbook, it is hard for me to be sure
what cell is active or what the selection is, et al. But, no thanks. I don't
want the actual workbook. Perhaps my comments have been helpful. If you
tighten up this code, perhaps it will become clearer to you or others how
best to optimize it.

Regards,
Bob

david shapiro said:
I've put together this code to extract and id code data from an excel
workbook, but have come across quite a few bugs. I've tried to put
comments in the VB code describing what's going on at each stage. I
would appreciate it if someone could clear up the errors and get it
running. Thanks.
<balance of message deleted for brevity. See original post.>
 
B

Bob Kilmer

This is a suggested substitution for Dave Shapiro to employ to record the
error number and description or an error when it occurs and to hint at error
handling. As written, it will also "swallow" the error, quitely writing it
out, but not interupting exectution, so it needs be modified to re-raise an
error or only used while debugging if that is a problem.

Of course stepping thu the code when it breaks and making a note of the
exact error message when it comes up are wise and ultimately simpler
alternatives.

Bob

Myrna Larson said:
What's the error number and description, and on what line does it occur? Have
you stepped through the code with F8?
 
D

david shapiro

Bob,

Thanks for the suggestions on the code in the macro. I am in the
process of trying them out and tightening the code.

That last part in the macro - sub codedata - this is what it's doing:

It takes the dataset in the worksheet "final data" which has several
columns which have various headings (this worksheet had been created in
the last sub). A new column is added as the first column and given the
heading "indicator id". The objective is to find the indicator id code
there for the data in the row, to loop through and do this row by row
until the end of the dataset.

The indicator ID code can be found in the "reference" worksheet. The
correct indicator id code in the "reference" file is the one for which
the data row in the worksheets "final data" and "reference" shares the
same contents in the columns headed by:
indicator, subgroup, gender and measurement.

I have thought one possible way might be to cacatenate the indicator,
subgroup, gender and measurement columns in both the "source data" and
"reference" worksheets, compare them using a vlookup to find the correct
indicator id code in the "reference" worksheet, and then put that
indicator id code in the created blank column (1st column) in the "final
data" worksheet. And to loop through so it does this for all the rows
one at a time. it would be good too if all the rows for which an
indicator id code could not be found in the "reference" worksheet are
put in a separate newly created worksheet page. How do you suggest is
the best way to do this?

Regards,
Dave
 
B

Bob Kilmer

Dave,
Is it the indicator ID code that you want in the long run, or the data in
the source that the indicator ID code represents? Is the indicator ID code
an end in itself, or a means to an end?

Putting aside how to achieve the goal, what is the goal of Sub codedata? Are
you saying that the goal is to separate the data in "source data" into data
that matches "reference" and data that does not?

Putting aside how you would implement it in code, how would you describe the
process in human terms? In other words, what instructions would you give to
a human helper so they could do manually what you want to accomplish?

Am I understanding correctly that "source data," "reference" and "final
data" have essentially the same format?

Bob
 
D

david shapiro

Bob,

The goal of sub codedata is to take the "final data" worksheet, add a
column and to put in this column the correct id code for every row in
the dataset.

You can disregard the "source data" worksheet as it no longer applies
for sub codedata. An example of the format for the "final data"
worksheet is data in columns running across with these headings:
country year source datatype subgroup gender measurement value
Canada 1980 ILO census age 15-19 men number 104.4

The goal is to add the id code to the above for every row in the dataset
such as:

id code country year source datatype subgroup gender measurement value
11771 Canada 1980 ILO census age 15-19 men number 104.4

The "reference" worksheet is data in columns with similar headings and
info:
id no. source datatype subgroup gender measurement

When the source, datatype, subgroup, gender and measurement match
exactly for the row in "final data" and in "reference", the "reference"
sheet provides the id no. for this indicator.

This is the id code I'd like to put in the first column of "final data"
for every row of data.

It would be good if the final product of the macro is the following:

1) the "final data" worksheet with the following addition to the data:
an indicator id column has been inserted and all the rows now have the
correct indicator id number. The vlookup/cacatenation technique is just
one way I thought the coding could be done, but if there is another more
effective way to do the coding, feel free to apply that.

2) a new created worksheet called "id codes missing" which contains a
list of all the rows of data for which an id code could not be found.
(The "final data" worksheet at the end should only contain those rows
for which an id code could be found, and all those for which the code
could not be found are in this sheet).

Hope I've described it clearly. Just let me know if you need more
clarification.

Dave
 
D

david shapiro

Bob and Myrna (I`ve added the show error option),

I`ve reworked the code with the suggestions you made, and some of the
errors seem to have cleared up. The program now seems to run ok until
sub addmeasurementcolumn where there is "compile error: syntax error" at
this line

Selection.AutoFill Destination:=Range("F2:???), Type:=xlFillDefault

The ??? are supposed to be to the last row in the dataset.

I wasn`t sure how to define the range for this, as I noticed in the
previous version the autofill was stopping midway and not going on to
the last of the rows in the dataset. There are also some other areas in
the next two subs where I`ve put "???" being not sure how to approach
this. How can these be fixed?

Here is the new code I`ve reworked up to sub codedata incorporating your
suggestions. Bob, I also sent you earlier a description for the last
sub, sub codedata. Thanks.

Dave

Option Explicit
Sub extraction_codingmacro()

Dim wks As Worksheet
Dim SumWks As Worksheet
Dim myCell As Range
Dim oRow As Long
Dim myRng As Range

Set SumWks = Worksheets.Add
SumWks.Range("a1").Resize(1, 7).Value _
= Array("country", "source", "indicator", "data type",
"subgroup", "year", "value")

oRow = 1
For Each wks In ActiveWorkbook.Worksheets
If wks.Name = SumWks.Name Or wks.Name = "criteria file" Or _
wks.Name = "reference" Then
'do nothing
Else
wks.Select
Call preparefile
With wks
Set myRng = .Range("d8:aa" & _
.Cells(.Rows.Count, "A").End(xlUp).Row)
End With
With SumWks
For Each myCell In myRng.Cells
If myCell.Interior.ColorIndex = 3 Then

'the next two lines are supposed to filter out
all the rows with the words
' "GSD" in the B column of the row and rows with
the words "AAA" in the D column
' of the row. But this doesn`t seem to work.
Could you adjust this?

If myCell.Parent.Cells(myCell.Row, "C").Text <>
"GSD" And _
InStr(1, myCell.Parent.Cells(myCell.Row,
"E").Text, "AAA", _
vbTextCompare) = 0 Then
oRow = oRow + 1
.Cells(oRow, "A").Value _
= wks.Cells(myCell.Row, "A").Value
.Cells(oRow, "B").Value _
= wks.Cells(myCell.Row, "B").Value
.Cells(oRow, "C").Value _
= wks.Cells(myCell.Row, "C").Value
.Cells(oRow, "D").Value _
= wks.Cells(myCell.Row, "D").Value
.Cells(oRow, "E").Value _
= wks.Cells(myCell.Row, "E").Value
.Cells(oRow, "F").Value _
= wks.Cells(7, myCell.Column).Value
.Cells(oRow, "G").Value _
= myCell.Value
End If
End If
Next myCell
End With
End If
Next wks

Call addmeasurementcolumn
Call noduplicaterows
Call extractall
Call codedata

End Sub

Sub preparefile()
'this procedure prepares the file, but due to the irregular spacing of
rows for
'different countries(structure of tables and headings are always the
same though),
'this only works here for Austria. How could it be adjusted to work for
all countries?

Cells.Select
Selection.Copy
Selection.PasteSpecial Paste:=xlValues, Operation:=xlNone,
SkipBlanks:= _
False, Transpose:=False
Columns("A:A").Select
Application.CutCopyMode = False
Selection.Insert Shift:=xlToRight
Selection.Insert Shift:=xlToRight
Range("D1").Select
Selection.Copy
Range("A1").Select
ActiveSheet.Paste
Range("D2").Select
Application.CutCopyMode = False
Selection.Copy
Range("B1").Select
ActiveSheet.Paste
Range("E1").Select
Application.CutCopyMode = False
ActiveCell.FormulaR1C1 = "x"
Columns("A:E").Select
Selection.SpecialCells(xlCellTypeBlanks).Select
Selection.FormulaR1C1 = "=R[-1]C"
Cells.Select
Selection.Copy
Selection.PasteSpecial Paste:=xlValues, Operation:=xlNone,
SkipBlanks:= _
False, Transpose:=False
Application.CutCopyMode = False
Call cleanworksheet

End Sub

Sub cleanworksheet()

On Error GoTo errhandler

Dim c As Range

For Each c In ActiveSheet.UsedRange
c = WorksheetFunction.Clean(c)
Next

Exit Sub
errhandler:
Debug.Print "error #: " & Err.Number & vbNewLine; "description: " &
Err.Description

End Sub
Sub addmeasurementcolumn()
'the deletion of the "value" column and addition of the "measurement"
column
'doesn't seem to be working. Also I don't think the word "number" is
copying for all
' the rows in the dataset

Cells.Find(What:="value", After:=ActiveCell, LookIn:=xlFormulas,
LookAt _
:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext,
MatchCase:= _
False).Activate
Columns("F:F").Select
Selection.Clear
Range("F1").FormulaR1C1 = "measurement"
Range("F2").FormulaR1C1 = "number"
Range("F2").Select
Selection.AutoFill Destination:=Range("F2:???), Type:=xlFillDefault
Range("F2:???).Copy
Range ("F2:???).PasteSpecial Paste:=xlValues, Operation:=xlNone,
SkipBlanks:= _
False, Transpose:=False
End Sub

Sub noduplicaterows()

range("A1").CurrentRegion.select.advancedfilter
Action:=xlfiltercopy,_
copytorange:=range(???), Unique:=true
Sheets("sheet1").Name = "criteria file"
Sheets("criteria file").Range(A1).Select

End Sub


Sub extractall()
' this procedure uses the "criteria file" created above to extract the
full set of data from
' the "source data" file

Sheets("source data").Select
Set rng = ActiveSheet.Range(A1).CurrentRegion
.AdvancedFilter Action:=xlFilterInPlace, CriteriaRange:= _
Sheets("criteria file").Range("A1:F7"), Unique:=False
Selection.Copy
Worksheets.Add
Sheets("sheet2").Name = "final data"
Selection.PasteSpecial Paste:=xlValues, Operation:=xlNone,
SkipBlanks:= _
False, Transpose:=False
Cells.Select
Application.CutCopyMode = False
Selection.Copy
Selection.PasteSpecial Paste:=xlValues, Operation:=xlNone,
SkipBlanks:= _
False, Transpose:=False

End Sub
 
D

david shapiro

Bob,

I've cleaned up a number of errors, and the program seems to be working
now up to sub extractall. At sub extractall, I wasn't sure how to
define the range so that it automatically takes the whole range for the
dataset for the first worksheet and the criteria. How can it be changed
from the hardcoding it has now? (The code for this sub is attached
below. (I know I need to tighten it a bit too.))

Afterwards, it's just the sub codedata. I sent you a description on
this earlier today. Let me know if you need more description. Thanks.

Dave

sub extractall()

Sheets("source data").Select
Set rng = ActiveSheet.Range(A1).CurrentRegion
AdvancedFilter Action:=xlFilterInPlace, CriteriaRange:= _
Sheets("criteria").Range("A1:G5"), Unique:=False
Sheets.add
Sheets("source data").Select
Selection.SpecialCells(xlCellTypeVisible).Select
Selection.Copy
Sheets("Sheet1").Select
Selection.PasteSpecial Paste:=xlValues, Operation:=xlNone,
SkipBlanks:= _
False, Transpose:=False
Range("A1").Select
End Sub
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Top