: As the others suggest you can use either (or both) String.SubString & Regex
: to parse the file.
:
: Seeing as you are going to Xml, I would consider creating a custom XmlReader
: object similar to the GedcomReader in the last link below. Then use XSLT to
: transform it into the format expected. This allows easy changing of the
: target format by simply replacing the XSTL used...
:
: The following articles discuss how to create a custom XmlReader object:
:
http://msdn.microsoft.com/msdnmag/issues/01/09/xml/default.aspx
:
http://msdn.microsoft.com/msdnmag/issues/04/05/XMLFiles/
I might have use for this technique for $work, so as an exercise, I
wrote an XmlReader for the input format the OP described. It's
certainly not a complete (or even halfway polished) implementation, but
I hope someone will find value in it.
I welcome any comments.
using System;
using System.Collections;
using System.Globalization;
using System.IO;
using System.Text.RegularExpressions;
using System.Xml;
namespace FunkyReader
{
public class FunkyReader : XmlReader
{
private NameTable nametable = new NameTable();
private ReadState state;
private Codes codes;
private ArrayList dfs;
private int node;
public FunkyReader(string[] lines)
{
codes = new Codes();
state = ReadState.Initial;
ParseLines(lines);
}
public ArrayList Linearization
{
get { return dfs; }
}
#region XmlReader methods
public override int AttributeCount
{
get { return -1; }
}
public override string BaseURI
{
get { return ""; }
}
public override void Close() {}
public override int Depth
{
get { return -1; }
}
public override bool EOF
{
get { return false; }
}
public override string GetAttribute(int i) { return null; }
public override string GetAttribute(string name) { return null; }
public override string GetAttribute(string name, string namespaceURI)
{
return null;
}
public override bool HasValue
{
get { return false; }
}
public override bool IsDefault
{
get { return false; }
}
public override bool IsEmptyElement
{
get
{
Node n = (Node) dfs[node];
Node next = (Node) dfs[node+1];
return n.NodeType == Node.Type.Start && next.NodeType == Node.Type.End;
}
}
public override string LocalName
{
get
{
Node n = (Node) dfs[node];
switch (n.NodeType)
{
case Node.Type.Start:
case Node.Type.End:
case Node.Type.Attribute:
return n.Name;
default:
return "";
}
}
}
public override string LookupNamespace(string prefix) { return null; }
public override void MoveToAttribute(int i) {}
public override bool MoveToAttribute(string name) { return false; }
public override bool MoveToAttribute(string name, string ns) { return false; }
public override bool MoveToElement() { return false; }
public override bool MoveToFirstAttribute() { return false; }
public override bool MoveToNextAttribute()
{
Node next = (Node) dfs[node+1];
if (next.NodeType == Node.Type.Attribute)
{
++node;
return true;
}
else
return false;
}
public override string Name
{
get { return LocalName; }
}
public override string NamespaceURI
{
get { return ""; }
}
public override XmlNameTable NameTable
{
get { return nametable; }
}
public override XmlNodeType NodeType
{
get
{
if (node >= dfs.Count)
return XmlNodeType.None;
Node n = (Node) dfs[node];
switch (n.NodeType)
{
case Node.Type.Attribute:
return XmlNodeType.Attribute;
case Node.Type.Start:
return XmlNodeType.Element;
case Node.Type.End:
return XmlNodeType.EndElement;
case Node.Type.Text:
return XmlNodeType.Text;
default:
return XmlNodeType.None;
}
}
}
public override string Prefix
{
get { return null; }
}
public override char QuoteChar
{
get { return '"'; }
}
public override bool Read()
{
if (state == ReadState.Initial)
{
state = ReadState.Interactive;
node = 0;
}
else
node++;
return node < dfs.Count;
}
public override bool ReadAttributeValue()
{
Node n = (Node) dfs[node];
if (n.NodeType == Node.Type.Attribute)
{
++node;
return true;
}
else
return false;
}
public override ReadState ReadState
{
get { return ReadState.EndOfFile; }
}
public override void ResolveEntity() {}
public override string this[int i]
{
get { return null; }
}
public override string this[string name, string namespaceURI]
{
get { return null; }
}
public override string this[string name]
{
get { return null; }
}
public override string Value
{
get
{
return ((Node) dfs[node]).Value;
}
}
public override string XmlLang
{
get { return null; }
}
public override XmlSpace XmlSpace
{
get { return XmlSpace.None; }
}
#endregion
#region parse input
private void ParseLines(string[] lines)
{
Regex record = new Regex(
@"^" +
@"(?<code>\d{7})" +
@"(?<year>\d{4})" +
@"(?<months>\d\d)+" +
@"$");
foreach (string line in lines)
{
Match m = record.Match(line);
string code = m.Groups["code"].ToString();
string year = m.Groups["year"].ToString();
foreach (Capture mm in m.Groups["months"].Captures)
AddMonth(code, year, mm.ToString());
}
dfs = new ArrayList();
dfs.Add(new Node("codes", Node.Type.Start));
foreach (Code c in codes)
c.Linearize(dfs);
dfs.Add(new Node("codes", Node.Type.End));
}
private void AddMonth(string code, string year, string mm)
{
codes
Code:
[year].Add(new Month(code, year, mm));
}
#endregion
#region Node class
public class Node
{
public enum Type { Start, Attribute, Text, End };
private string name;
private Type type;
public Node(string name, Type type)
{
this.name = name;
this.type = type;
}
public string Name
{
get { return name; }
}
public string Value
{
get { return name; }
}
public Type NodeType
{
get { return type; }
}
}
#endregion
#region various element representations
class Codes
{
private ArrayList codes = new ArrayList();
public Code this[string code]
{
get
{
Code c = null;
for (int i = 0; i < codes.Count; i++)
{
if (((Code) codes[i]).ID == code)
{
c = (Code) codes[i];
break;
}
}
if (c != null)
return c;
else
{
codes.Add(c = new Code(code));
return c;
}
}
}
public IEnumerator GetEnumerator()
{
return codes.GetEnumerator();
}
}
class Code
{
string id;
private ArrayList years = new ArrayList();
public Code(string name)
{
id = name;
}
public Year this[string year]
{
get
{
Year y = null;
for (int i = 0; i < years.Count; i++)
{
if (((Year) years[i]).ID == year)
{
y = (Year) years[i];
break;
}
}
if (y != null)
return y;
else
{
years.Add(y = new Year(year));
return y;
}
}
}
public string ID
{
get { return id; }
}
public void Linearize(ArrayList record)
{
record.Add(new Node("code", Node.Type.Start));
record.Add(new Node("id", Node.Type.Attribute));
record.Add(new Node(ID, Node.Type.Text));
foreach (Year y in years)
y.Linearize(record);
record.Add(new Node("code", Node.Type.End));
}
}
class Year
{
string yrid;
ArrayList months = new ArrayList();
public Year(string year)
{
yrid = year;
}
public string ID
{
get { return yrid; }
}
public void Add(Month m)
{
months.Add(m);
}
public void Linearize(ArrayList record)
{
record.Add(new Node("year", Node.Type.Start));
record.Add(new Node("yrid", Node.Type.Attribute));
record.Add(new Node(yrid, Node.Type.Text));
foreach (Month m in months)
m.Linearize(record);
record.Add(new Node("year", Node.Type.End));
}
}
class Month
{
private int month; // i.e., 1-12
private string link;
public Month(string code, string year, string mm)
{
this.month = int.Parse(mm);
this.link = code + "_" + mm + year + ".xml";
}
public int ID
{
get { return month; }
}
public string Link
{
get { return link; }
}
public string MonthShortName
{
get
{
return DateTimeFormatInfo.InvariantInfo.MonthNames[month-1].Substring(0,3);
}
}
public void Linearize(ArrayList record)
{
record.Add(new Node("m", Node.Type.Start));
record.Add(new Node("id", Node.Type.Attribute));
record.Add(new Node(month.ToString(), Node.Type.Text));
record.Add(new Node("link", Node.Type.Attribute));
record.Add(new Node(link, Node.Type.Text));
record.Add(new Node(MonthShortName, Node.Type.Text));
record.Add(new Node("m", Node.Type.End));
}
}
#endregion
}
}
Enjoy,
Greg