I have a SSIS package where I have to select some values from an excel sheet and insert them into a SQL Server database table, i am doing it through Execute sql task.
These are the steps:
- Select all records from mapping table, cell location is dynamic so keeping it in sql table (around 3000 cells are there – we have to pick value from selective fields in Excel and not all)
Example:
-
Iterate through
Foreach
on each record -
Create a query with cell name and Sheet name
Example:
Select * from [GenDet$F3:F3]
-
Execute the query to get that cell’s value from Excel sheet
Example:
- Insert the values into the sql database table
It is working – but the problem is the time which it is taking. For 3000 fields, this whole process is taking 50 minutes to process one Excel file. I have to do this in less than a minute.
Please let me know the best approach to achieve this.
Thank you!
As i mentioned in the comments, I think that writing a c# script that read the data from excel cells and group them into a list or DataTable then perform a Bulk insert once will be more performant
C# application/script task
Needed assemblies
First you have to import Excel Interop assembly:
using Microsoft.Office.Interop.Excel;
using System.Data.SqlClient;
Convert column header letter to index
Now you should define the following function that convert the excel column alphabet to index:
private int ParseColHeaderToIndex(string colAdress)
{
int[] digits = new int[colAdress.Length];
for (int i = 0; i < colAdress.Length; i++)
{
digits[i] = Convert.ToInt32(colAdress[i]) - 64;
}
int mul = 1;
int res = 0;
for (int pos = digits.Length - 1; pos >= 0; pos--)
{
res += digits[pos] * mul;
mul *= 26;
}
return res;
}
SQL bulk insert function
The following function is to perform bulk insert operation into SQL
public void InsertToSQLUsingSQLBulk(System.Data.DataTable dt, string connectionstring, string Tablename)
{
try
{
using (var bulkCopy = new SqlBulkCopy(connectionstring, SqlBulkCopyOptions.KeepIdentity))
{
foreach (DataColumn col in dt.Columns)
{
bulkCopy.ColumnMappings.Add(col.ColumnName, col.ColumnName);
}
bulkCopy.BulkCopyTimeout = 600;
bulkCopy.DestinationTableName = Tablename;
bulkCopy.WriteToServer(dt);
}
}
catch (Exception ex)
{
throw ex;
}
}
Reading from excel into destination DataTable
The following function takes the excel path and the ranges DataTable as parameter and return a DataTable having the destination structure (Id, AttributeKey, AttributeValue)
public System.Data.DataTable ReadFromExcel(System.Data.DataTable dtRanges,string strPath)
{
string num = "0123456789";
//Declare result datatable
System.Data.DataTable destination = new System.Data.DataTable();
destination.Columns.Add("Id");
destination.Columns.Add("AttributeKey");
destination.Columns.Add("AttributeValue");
//Decalre Interop Objects
Microsoft.Office.Interop.Excel.Application m_XlApp;
m_XlApp = new Microsoft.Office.Interop.Excel.Application();
m_XlApp.Visible = false;
m_XlApp.DisplayAlerts = false;
Workbook xlWbs = null;
xlWbs = m_XlApp.Workbooks.Open(strPath, Type.Missing, Type.Missing,
Type.Missing, "'", Type.Missing, Type.Missing,
Type.Missing, Type.Missing, Type.Missing,
Type.Missing, Type.Missing, Type.Missing,
Type.Missing, Type.Missing);
xlWbs.DoNotPromptForConvert = true;
xlWbs.CheckCompatibility = false;
xlWbs.Application.DisplayAlerts = false;
//Loop over worksheets
foreach (Worksheet xlWks in xlWbs.Worksheets) {
string Name = xlWks.Name;
//Assing rows relevant to the current sheet
foreach (DataRow drRow in dtRanges.AsEnumerable().Where(x => x["Sheet_Name"].ToString() == Name))
{
string sheet = drRow["Sheet_Name"].ToString();
string range = drRow["Location_Value"].ToString();
string field = drRow["Field_Name"].ToString();
string id = drRow["Id"].ToString();
string rangeAlpha = range.Split(':')[0];
int rowidx = 0;
int colidx = 0;
foreach (char chr in num) {
rangeAlpha = rangeAlpha.Replace(chr, 'public System.Data.DataTable ReadFromExcel(System.Data.DataTable dtRanges,string strPath) { string num = "0123456789"; //Declare result datatable System.Data.DataTable destination = new System.Data.DataTable(); destination.Columns.Add("Id"); destination.Columns.Add("AttributeKey"); destination.Columns.Add("AttributeValue"); //Decalre Interop Objects Microsoft.Office.Interop.Excel.Application m_XlApp; m_XlApp = new Microsoft.Office.Interop.Excel.Application(); m_XlApp.Visible = false; m_XlApp.DisplayAlerts = false; Workbook xlWbs = null; xlWbs = m_XlApp.Workbooks.Open(strPath, Type.Missing, Type.Missing, Type.Missing, "'", Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing); xlWbs.DoNotPromptForConvert = true; xlWbs.CheckCompatibility = false; xlWbs.Application.DisplayAlerts = false; //Loop over worksheets foreach (Worksheet xlWks in xlWbs.Worksheets) { string Name = xlWks.Name; //Assing rows relevant to the current sheet foreach (DataRow drRow in dtRanges.AsEnumerable().Where(x => x["Sheet_Name"].ToString() == Name)) { string sheet = drRow["Sheet_Name"].ToString(); string range = drRow["Location_Value"].ToString(); string field = drRow["Field_Name"].ToString(); string id = drRow["Id"].ToString(); string rangeAlpha = range.Split(':')[0]; int rowidx = 0; int colidx = 0; foreach (char chr in num) { rangeAlpha = rangeAlpha.Replace(chr, '\0'); } rowidx = Int32.Parse(range.Split(':')[0].Replace(rangeAlpha, "")); colidx = ParseColHeaderToIndex(rangeAlpha); DataRow dr = destination.NewRow(); if (xlWks.Cells[rowidx, colidx] != null && (xlWks.Cells[rowidx, colidx] as Range).Value2 != null) { dr["AttributeValue"] = (string)(xlWks.Cells[rowidx, colidx] as Range).Value2; } else { dr["AttributeValue"] = ""; } dr["AttributeKey"] = drRow["Field_Name"].ToString(); dr["Id"] = drRow["Id"].ToString(); destination.Rows.Add(dr); } } xlWbs.Close(false, Type.Missing, Type.Missing); m_XlApp.Quit(); System.Runtime.InteropServices.Marshal.ReleaseComObject(xlWbs); System.Runtime.InteropServices.Marshal.ReleaseComObject(m_XlApp); return destination; }
');
}
rowidx = Int32.Parse(range.Split(':')[0].Replace(rangeAlpha, ""));
colidx = ParseColHeaderToIndex(rangeAlpha);
DataRow dr = destination.NewRow();
if (xlWks.Cells[rowidx, colidx] != null && (xlWks.Cells[rowidx, colidx] as Range).Value2 != null)
{
dr["AttributeValue"] = (string)(xlWks.Cells[rowidx, colidx] as Range).Value2;
}
else
{
dr["AttributeValue"] = "";
}
dr["AttributeKey"] = drRow["Field_Name"].ToString();
dr["Id"] = drRow["Id"].ToString();
destination.Rows.Add(dr);
}
}
xlWbs.Close(false, Type.Missing, Type.Missing);
m_XlApp.Quit();
System.Runtime.InteropServices.Marshal.ReleaseComObject(xlWbs);
System.Runtime.InteropServices.Marshal.ReleaseComObject(m_XlApp);
return destination;
}
Main program
public void Main(){
//Initialize ranges table
System.Data.DataTable ranges = new System.Data.DataTable();
ranges.Columns.Add("Id");
ranges.Columns.Add("Field_Name");
ranges.Columns.Add("Location_Value");
ranges.Columns.Add("Sheet_Name");
//Add rows or read them from database using SQLDataAdapter
//note that the destination table must exists in the database with identical columns of datatable
System.Data.DataTable destination = ReadFromExcel(ranges, "C:\1.xlsx", "dbo.destination");
InsertToSQLUsingSQLBulk(destination, "Pass SQL Server destination connection string here");
}
Update 1 – Improve performance
You can improve the method performance by putting all worksheet contents inside an two dimension array, then loop over array instead of looping inside the Excel worksheet.
Excel.Range targetCells = xlWks.UsedRange;
object[,] allValues = (object[,])targetCells.Cells.Value;
...
if (targetCells.Cells[rowidx, colidx] != null)
{
dr["AttributeValue"] = (string)(targetCells.Cells[rowidx, colidx] as Range).Value2;
}
else
{
dr["AttributeValue"] = "";
}
Reference
Answer:
How about considering building one select statement to run that gets all the records at once.
Based on your image, something like this:
select
(Select [Field1] from [GenDet$I3:I3]) as Field1
,(Select [Field2] from [GenDet$I4:I4]) as Field2
...
That was horizontal and column based.
or you can go vertical with
(Select [FieldName],[Field1] as Value from [GenDet$I3:I3]) as Field1
union all
(Select [Field2],* from [GenDet$I4:I4]) as Field2
...
I know there are 3000 or so but you can build this with a string concatenation query rather simply.
Just a thought.
This would decrease execution time as the spreadsheet is not opening and closing on every iteration.