Saturday, May 9, 2009

Excel Converter 2003 XML to OpenXML


Vendors like RIMM Black Berry can render OpenXML correctly in its native. The following are code to read Excel 2003 XML spread sheet and construct OpenXML:

using System;
using System.Collections.Generic;
using System.Text;
using ICSharpCode.SharpZipLib.Zip;
using System.IO;
using System.Xml;
using System.Data;
using System.Collections;
using System.Xml.XPath;

namespace JQD.OpenXMLConverter
{
public class ExcelConverter
{
#region Properties and Fields

string _FilePathExcelXML;
string _FilePathExcelOpenXMLTemplate;
string _FilePathExcelOpenXML;
string _DirectoryOpenXMLZipFiles;
Dictionary<string, int> _SharedStrings = new Dictionary<string, int>();
int _NonUniqueStringsCount = 0;
public string FilePathExcelXML
{
get { return _FilePathExcelXML; }
set { _FilePathExcelXML = value; }
}
public string FilePathExcelOpenXMLTemplate
{
get { return _FilePathExcelOpenXMLTemplate; }
set { _FilePathExcelOpenXMLTemplate = value; }
}
public string FilePathExcelOpenXML
{
get { return _FilePathExcelOpenXML; }
set { _FilePathExcelOpenXML = value; }
}

public string DirectoryOpenXMLZipFiles
{
get { return _DirectoryOpenXMLZipFiles; }
set { _DirectoryOpenXMLZipFiles = value; }
}

#endregion

#region Methods

public void ConvertToOpenXml()
{
StringReader strR;
using (StreamReader sr = new StreamReader(FilePathExcelXML))
{
strR = new StringReader(sr.ReadToEnd().Replace("x:", "")); // due to XML exception
}

using (XmlTextReader r = new XmlTextReader(strR))
{
int sheetNumber = 1;
for (r.MoveToContent(); r.Read(); )
{
if (r.Name == "Worksheet" && r.IsStartElement())
{
MapWorksheetToOpenXmlSheet(r.ReadOuterXml(), sheetNumber);
sheetNumber++;
}
}
}

WriteSharedString();
}

private void WriteSharedString()
{
using (XmlTextWriter writer = new XmlTextWriter(DirectoryOpenXMLZipFiles + @"\xl\sharedStrings.xml", Encoding.UTF8))
{
writer.WriteStartDocument(true);

writer.WriteStartElement("sst");
writer.WriteAttributeString("xmlns", "http://schemas.openxmlformats.org/spreadsheetml/2006/main");
writer.WriteAttributeString("count", _NonUniqueStringsCount.ToString());
writer.WriteAttributeString("uniqueCount", _SharedStrings.Count.ToString());

foreach (string str in _SharedStrings.Keys )
{
writer.WriteStartElement("si");
writer.WriteElementString("t", str);
writer.WriteEndElement();
}

writer.WriteEndElement();
}
}

private void MapWorksheetToOpenXmlSheet(string xmlString,int sheetNumber)
{
List<List<string>> RowOfCellList = new List<List<string>>();
StringReader strR = new StringReader(xmlString);
using (XmlTextReader r = new XmlTextReader(strR))
{
for (r.MoveToContent(); r.Read(); )
{
string s1;

if (r.Name == "Row" ) // very strange has to work this way.
{
while ((s1 = r.ReadOuterXml()).StartsWith("<Row"))
{
RowOfCellList.Add(GenerateCellList(s1));
}
}
}
}
AddToSharedString(RowOfCellList);
WriteWorksheet(DirectoryOpenXMLZipFiles + @"\xl\worksheets\sheet"+sheetNumber.ToString()+".xml", RowOfCellList);
}

private void AddToSharedString(List<List<string>> RowOfCellList)
{

for (int row = 0; row < RowOfCellList.Count; row++)
{
for (int column = 0; column < RowOfCellList[row].Count; column++)
{
_NonUniqueStringsCount++;
if (!_SharedStrings.ContainsKey(RowOfCellList[row][column]))
_SharedStrings.Add(RowOfCellList[row][column], _SharedStrings.Count); // 0 based
}
}
}

private List GenerateCellList(string cellString)
{
List<string> CellList = new List<string>();
StringReader strR = new StringReader(cellString);
using (XmlTextReader r = new XmlTextReader(strR))
{
int prevCellIndex = 0;
for (r.MoveToContent(); r.Read(); )
{
if (r.Name == "Cell" && r.IsStartElement())
{
int currCellIndex=Convert.ToInt32( r.GetAttribute("ss:Index"));
for (int i = 0; i < currCellIndex - prevCellIndex - 1; i++) CellList.Add(string.Empty); // could have skipped empty cell by missing a cell element
prevCellIndex=currCellIndex;
CellList.Add(ExtractData(r.ReadOuterXml()));
}
}
}
return CellList;
}

private string ExtractData(string xmlData)
{
// "<Cell ss:Index=\"1\" ss:StyleID=\"BoldFace\" xmlns:ss=\"urn:schemas-microsoft-com:office:spreadsheet\" xmlns=\"urn:schemas-microsoft-com:office:spreadsheet\">
// <Data ss:Type=\"String\">Break</Data></Cell>"
StringReader strR = new StringReader(xmlData);
string ret;
using (XmlTextReader r = new XmlTextReader(strR))
{
ret = r.ReadElementString();
}
strR.Close();
return ret;
}

public void WriteWorksheet(string sheetXmlPath, List<List<string>> RowCellList)
{
using (XmlTextWriter writer = new XmlTextWriter(sheetXmlPath, Encoding.UTF8))
{
int colCountMax = 0;
foreach (List<string> cL in RowCellList) if (cL.Count > colCountMax) colCountMax = cL.Count;

writer.WriteStartDocument(true);
writer.WriteStartElement("worksheet");
writer.WriteAttributeString("xmlns", "http://schemas.openxmlformats.org/spreadsheetml/2006/main");
writer.WriteAttributeString("xmlns:r", "http://schemas.openxmlformats.org/officeDocument/2006/relationships");

writer.WriteStartElement("dimension");
string lastCell;
if (colCountMax == 0) lastCell = RowColumnToPosition(0, 0); //A1;
else
lastCell = RowColumnToPosition(RowCellList.Count - 1, colCountMax-1);
writer.WriteAttributeString("ref", "A1:" + lastCell);
writer.WriteEndElement();

writer.WriteStartElement("sheetViews");
writer.WriteStartElement("sheetView");
writer.WriteAttributeString("tabSelected", "1");
writer.WriteAttributeString("workbookViewId", "0");
writer.WriteEndElement();
writer.WriteEndElement();

writer.WriteStartElement("sheetFormatPr");
writer.WriteAttributeString("defaultRowHeight", "15");
writer.WriteEndElement();
//writer.WriteStartElement("cols");
//for (int i = 0; i < colCountMax; i++)
//{
// writer.WriteStartElement("col");
// //writer.WriteAttributeString("min", "11");
// //writer.WriteAttributeString("max", "20");
// //writer.WriteAttributeString("width", "11.20");
// //writer.WriteAttributeString("bestFit", "1");
// //writer.WriteAttributeString("customWidth", "1");
// writer.WriteEndElement();
//}
//writer.WriteEndElement();

writer.WriteStartElement("sheetData");
WritesheetData(writer, RowCellList);
writer.WriteEndElement();

writer.WriteStartElement("pageMargins");
writer.WriteAttributeString("left", "0.7");
writer.WriteAttributeString("right", "0.7");
writer.WriteAttributeString("top", "0.75");
writer.WriteAttributeString("bottom", "0.75");
writer.WriteAttributeString("header", "0.3");
writer.WriteAttributeString("footer", "0.3");
writer.WriteEndElement();

writer.WriteEndElement();
}
}


public void WritesheetData(XmlTextWriter writer, List<List<string>> RowOfCellList)
{

for (int row = 0; row < RowOfCellList.Count; row++)
{
writer.WriteStartElement("row");
writer.WriteAttributeString("r", (row+1).ToString());
writer.WriteAttributeString("spans", "1:" + RowOfCellList[row].Count.ToString());

for (int column = 0; column < RowOfCellList[row].Count ; column++)
{
writer.WriteStartElement("c");
writer.WriteAttributeString("r", RowColumnToPosition(row, column));

string key,val;
writer.WriteAttributeString("t", "s");
key = RowOfCellList[row][column];
val = _SharedStrings[key].ToString();

writer.WriteElementString("v", val.ToString());
writer.WriteEndElement();
}

writer.WriteEndElement();
}
}

public void CreateBaseOpenXMLFilesFromTemplate()
{
// clean up
DirectoryInfo di = new DirectoryInfo(_DirectoryOpenXMLZipFiles);
foreach (FileInfo fi in di.GetFiles()) fi.Delete();
foreach (DirectoryInfo sdi in di.GetDirectories()) sdi.Delete(true);

// extract Excel Template into the directory
FastZip fz = new FastZip();
fz.ExtractZip(_FilePathExcelOpenXMLTemplate,_DirectoryOpenXMLZipFiles,null);
}

public void CreateOpenXMLExcelFile()
{
FastZip fz = new FastZip();
fz.CreateZip(_FilePathExcelOpenXML, _DirectoryOpenXMLZipFiles, true, null);
}

#endregion

#region Helper methods

string ColumnIndexToName(int columnIndex)
{
char second = (char)(((int)'A') + columnIndex % 26);

columnIndex /= 26;

if (columnIndex == 0)
return second.ToString();
else
return ((char)(((int)'A') - 1 + columnIndex)).ToString() + second.ToString();
}

string RowIndexToName(int rowIndex)
{
return (rowIndex + 1).ToString();
}

string RowColumnToPosition(int row, int column)
{
return ColumnIndexToName(column) + RowIndexToName(row);
}

#endregion
}
}


No comments: