Sample source code for HTML table:
<tr style="background-color: rgb(255, 255, 255);">
<td style="line-height: 16px; font-family: verdana, helvetica, arial, sans-serif; font-size: 12px;">
1<br />
<br />
</td>
<td style="line-height: 16px; font-family: verdana, helvetica, arial, sans-serif; font-size: 12px">
Alfreds Futterkiste</td>
<td style="line-height: 16px; font-family: verdana, helvetica, arial, sans-serif; font-size: 12px">
Maria Anders</td>
<td style="line-height: 16px; font-family: verdana, helvetica, arial, sans-serif; font-size: 12px">
Obere Str. 57</td>
<td style="line-height: 16px; font-family: verdana, helvetica, arial, sans-serif; font-size: 12px">
Berlin</td>
<td style="line-height: 16px; font-family: verdana, helvetica, arial, sans-serif; font-size: 12px">
12209</td>
<td style="line-height: 16px; font-family: verdana, helvetica, arial, sans-serif; font-size: 12px">
Germany</td>
</tr>
Here we need to make sure that how we can get content from simple HTML table. So only possibility is we have to download HTML string and extract <table> content. <table> tag contains <tr> and <td> tags but it may have style properties. So first we have to avoid these style properties and after we can get required content from table. Please follow the steps I’ve written here.
First we have to upload HTML page to server folder. Then we can download HTML string from that HTML page.
Designer source code:
<%@ Page Language="C#" AutoEventWireup="true" CodeFile="Default.aspx.cs" Inherits="_Default"
EnableEventValidation="true" %>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head runat="server">
<title></title>
</head>
<body>
<form id="form1" runat="server">
<div>
<table>
<tr>
<td>
<asp:FileUpload ID="FileUpload1" runat="server" />
</td>
<td>
<asp:Button ID="Button1" runat="server" Text="submit" OnClick="button1_Click" />
</td>
</tr>
<tr>
<td colspan="2">
<br />
<asp:GridView ID="gvCustomers" runat="server">
</asp:GridView>
</td>
</tr>
</table>
</div>
</form>
</body>
</html>
C# code:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.Adapters;
using System.Collections;
using System.Text.RegularExpressions;
using System.Data;
using System.Data.SqlClient;
using System.Net;
using System.IO;
public partial class _Default : System.Web.UI.Page
{
//please refer connection string from web config.
SqlConnection conn = new SqlConnection("Data Source=Jitendra;Initial Catalog=Northwind;Integrated Security=True");
protected void Page_Load(object sender, EventArgs e)
{
if (!IsPostBack)
{
BindGrid();
}
}
const string msgFormat = "table[{0}], tr[{1}], td[{2}], a: {3}, b: {4}";
const string table_pattern = "<table.*?>(.*?)</table>";
const string tr_pattern = "<tr.*?>(.*?)</tr>";
const string td_pattern = "<td.*?>(.*?)</td>";
const string a_pattern = "<a href=\"(.*?)\"></a>";
const string b_pattern = "<b>(.*?)</b>";
private static List<string> GetContents(string input, string pattern)
{
MatchCollection matches = Regex.Matches(input, pattern, RegexOptions.Singleline);
List<string> contents = new List<string>();
foreach (Match match in matches)
contents.Add(match.Value);
return contents;
}
protected void button1_Click(object sender, EventArgs e)
{
WebClient wc = new WebClient();
string uploadfile = FileUpload1.FileName;
string filepath = Server.MapPath("~/Document/" + uploadfile);
FileUpload1.SaveAs(filepath);
// here you could get virtual path of the html file
//for testing you could change port number as per your localhost
string url = "http://localhost:7331/Html-DB/Document/" + uploadfile;
string fileContent = wc.DownloadString(url);
List<string> tableContents = GetContents(fileContent, table_pattern);
int tableIndex = 0;
List<string> list = new List<string>();
foreach (string tableContent in tableContents)
{
List<string> trContents = GetContents(tableContent, tr_pattern);
int trIndex = 0;
foreach (string trContent in trContents)
{
List<string> tdContents = GetContents(trContent, td_pattern);
string[] columns = new string[tdContents.Count];
foreach (string tdContent in tdContents)
{
string result = Regex.Replace(tdContent, @"<[^>]*>", string.Empty);
list.Add(result);
}
if (list.Count > 0)
{
string sql = "INSERT INTO Customers_new(CustomerID, CompanyName, ContactName, Address, City, PostalCode, Country)";
sql += "VALUES('" + list[0].Trim() + "','" + list[1].Trim() + "','" + list[2].Trim() + "','" + list[3].Trim() + "','" + list[4].Trim() + "','" + list[5].Trim() + "','" + list[6].Trim() + "')";
conn.Open();
SqlCommand cmd = new SqlCommand(sql, conn);
cmd.ExecuteNonQuery();
conn.Close();
}
list.Clear();
trIndex++;
}
tableIndex++;
}
BindGrid();
}
private void BindGrid()
{
DataSet ds = new DataSet();
conn.Open();
string cmdstr = "Select CustomerID,CompanyName,ContactName,Address,City,PostalCode,Country from Customers_new";
SqlCommand cmd = new SqlCommand(cmdstr, conn);
SqlDataAdapter adp = new SqlDataAdapter(cmd);
adp.Fill(ds);
gvCustomers.DataSource = ds;
gvCustomers.DataBind();
}
}
No comments:
Post a Comment