Stephany,
Thanks for the code. I tried your sample, it doesn't seem to work. I'm
assuming _s variable is the string to be parsed and need not
necessarily have the fieldnames like Lastname etc, right?
How does the regex engine know to take 26 characters for extracting
City and that it is not the first 26 chrs. Please explain. And excuse
me for my ignorance. Never used reg exprs.
Imports System.Text
Imports System.IO
Imports System.Text.RegularExpressions
Module Module1
Sub Main()
Dim aStreamReader As TextReader
aStreamReader = New StreamReader("C:\SAMPLE FILE.txt")
Dim _s As String = aStreamReader.ReadToEnd
aStreamReader.Close()
Dim _exp As String = "((?<ActionCode>.{1})" & _
"(?<CarrierID>.{25})" & _
"(?<LastName>.{60})" & _
"(?<FirstName>.{30})" & _
"(?<MiddleName>.{15})" & _
"(?<Addr1>.{60})" & _
"(?<Addr2>.{60})" & _
"(?<City>.{30})" & _
"(?<State>.{2})" & _
"(?<Zip>.{10})" & _
"(?<BenefitOption>.{60})" & _
"(?<EmployerGroup>.{15})" & _
"(?<OptionEffDate>.{8})" & _
"(?<HPEffDate>.{8})" & _
"(?<TermDate>.{8})" & _
"(?<Sex>.{1})" & _
"(?<DOB>.{8})" & _
"(?<SSN>.{9})" & _
"(?<Phone>.{12})" & _
"(?<EmployerGroupAnivDate>.{8})" & _
"(?<HeadOfHouse>.{9})" & _
"(?<PrimaryStatus>.{1})" & _
"(?<MaritalStatus>.{1}))"
Dim r As Regex = New Regex(_exp)
Dim g As MatchCollection = r.Matches(_s)
Dim m As Match
Dim _sInActionCode As String
Dim _sInCarrierID As String
Dim _sInLastName As String
Dim _sInFirstName As String
Dim _sInMiddleName As String
Dim _sInAddr1 As String
Dim _sInAddr2 As String
Dim _sInCity As String
Dim _sInState As String
Dim _sInZip As String
Dim _sInBenefitOption As String
Dim _sInEmployerGroup As String
Dim _sInOptionEffDate As String
Dim _sInHPEffDate As String
Dim _sInTermDate As String
Dim _sInSex As String
Dim _sInDOB As String
Dim _sInSSN As String
Dim _sInPhone As String
Dim _sInEmployerGroupAnivDate As String
Dim _sInHeadOfHouse As String
Dim _sInPrimaryStatus As String
Dim _sInMaritalStatus As String
Dim d As New DateTime
Dim dt As Double
d = DateTime.Now
For i As Int32 = 0 To g.Count - 1
m = g.Item(i)
_sInActionCode = m.Groups("ActionCode").ToString.Trim
_sInCarrierID = m.Groups("CarrierID").ToString.Trim
_sInLastName = m.Groups("LastName").ToString.Trim
_sInFirstName = m.Groups("FirstName").ToString.Trim
_sInMiddleName = m.Groups("MiddleName").ToString.Trim
_sInAddr1 = m.Groups("Addr1").ToString.Trim
_sInAddr2 = m.Groups("Addr2").ToString.Trim
_sInCity = m.Groups("City").ToString.Trim
_sInState = m.Groups("State").ToString.Trim
_sInZip = m.Groups("Zip").ToString.Trim
_sInBenefitOption = m.Groups("BenefitOption").ToString.Trim
_sInEmployerGroup = m.Groups("EmployerGroup").ToString.Trim
_sInOptionEffDate = m.Groups("OptionEffDate").ToString.Trim
_sInHPEffDate = m.Groups("OptionEffDate").ToString.Trim
_sInTermDate = m.Groups("HPEffDate").ToString.Trim
_sInSex = m.Groups("TermDate").ToString.Trim
_sInDOB = m.Groups("DOB").ToString.Trim
_sInSSN = m.Groups("SSN").ToString.Trim
_sInPhone = m.Groups("Phone").ToString.Trim
_sInEmployerGroupAnivDate = m.Groups
("EmployerGroupAnivDate").ToString.Trim()
_sInHeadOfHouse = m.Groups("HeadOfHouse").ToString.Trim
_sInPrimaryStatus = m.Groups("PrimaryStatus").ToString.Trim
_sInMaritalStatus = m.Groups("MaritalStatus").ToString.Trim
'Console.WriteLine()
Console.WriteLine(i)
'Console.WriteLine()
'Console.WriteLine("_sInActionCode = " & _sInActionCode)
'Console.WriteLine("_sInCarrierID = " & _sInCarrierID)
'Console.WriteLine("_sInLastName = " & _sInLastName)
'Console.WriteLine("_sInFirstName = " & _sInFirstName)
'Console.WriteLine("_sInMiddleName = " & _sInMiddleName)
'Console.WriteLine("_sInAddr1 = " & _sInAddr1)
'Console.WriteLine("_sInAddr2 = " & _sInAddr2)
'Console.WriteLine("_sInCity = " & _sInCity)
'Console.WriteLine("_sInState = " & _sInState)
'Console.WriteLine("_sInZip = " & _sInZip)
'Console.WriteLine("_sInBenefitOption = " &
_sInBenefitOption)
'Console.WriteLine("_sInEmployerGroup = " &
_sInEmployerGroup)
'Console.WriteLine("_sInOptionEffDate = " &
_sInOptionEffDate)
'Console.WriteLine("_sInHPEffDate = " & _sInHPEffDate)
'Console.WriteLine("_sInTermDate = " & _sInTermDate)
'Console.WriteLine("_sInSex = " & _sInSex)
'Console.WriteLine("_sInDOB = " & _sInDOB)
'Console.WriteLine("_sInSSN = " & _sInSSN)
'Console.WriteLine("_sInPhone = " & _sInPhone)
'Console.WriteLine("_sInEmployerGroupAnivDate = " &
_sInEmployerGroupAnivDate)
'Console.WriteLine("_sInHeadOfHouse = " & _sInHeadOfHouse)
'Console.WriteLine("_sInPrimaryStatus = " &
_sInPrimaryStatus)
'Console.WriteLine("_sInMaritalStatus = " &
_sInMaritalStatus)
Next
Dim dt2 = DateTime.Now.Subtract(d).TotalSeconds
Console.WriteLine(dt2)
Console.ReadLine()
End Sub
End Module
Sorry the code is a little messy. But it works. Parsed 8064 Records in 2
seconds flat. Simulate some other work by outputing everything to a
console window an it takes 34 seconds.
To answer your question RegEx uses a position marker *simular* to that
of reading a file where the position is incremented relative to the
amount read (for comparison sakes). So just telling it how much to read
is good enough.
Thankyou Stephany for writing that all out
A note about your sample file: I hope fields were left blank, and things
like HeadOfHouse is a number, otherwise this isn't working.
Sample:
_sInActionCode =
_sInCarrierID = 00000050101
_sInLastName = SMITH
_sInFirstName = VICKI
_sInMiddleName =
_sInAddr1 = C/O SUE EDDY - MISD BENEFITS
_sInAddr2 = 405 EAST DAVIS
_sInCity = MESQUITE
_sInState = TX
_sInZip = 75149
_sInBenefitOption = 001
_sInEmployerGroup = 2002MISD
_sInOptionEffDate = 20050301
_sInHPEffDate = 20050301
_sInTermDate = 20040401
_sInSex = 20050331
_sInDOB = 19510125
_sInSSN = 000010009
_sInPhone =
_sInEmployerGroupAnivDate =
_sInHeadOfHouse = 464088770
_sInPrimaryStatus = P
_sInMaritalStatus = I
Let me know,
MP