1: public class HtmlToDatatable
2: {
3: public static DataTable GetHtmlDataTable(string url, string tablename)
4: {
5:
6: return GetDataTable(url,tablename);
7: }
8: private static string ReadHtml(string url)
9: {
10: System.Net.WebClient client = new System.Net.WebClient();
11: return client.DownloadString(url);
12: }
13: private static HtmlDocument GetHtmlDocument(string url)
14: {
15: string htmlText = ReadHtml(url);
16: if (string.IsNullOrWhiteSpace(htmlText)) throw new Exception("No se ha leido texto html.");
17: using (WebBrowser browser = new WebBrowser())
18: {
19: browser.DocumentText = htmlText;
20: do
21: {
22: Application.DoEvents();
23: } while (browser.ReadyState != WebBrowserReadyState.Complete);
24: return browser.Document;
25: }
26: }
27: private static HtmlElement GetHtmlTable(HtmlDocument doc, string tablename)
28: {
29: return doc.GetElementById(tablename);
30: }
31: private static DataTable GetDataTable(string url, string tablename)
32: {
33: DataTable dt = new DataTable();
34: HtmlDocument doc = GetHtmlDocument(url);
35: if (doc == null) throw new Exception("No se ha obtenido el documento HTML.");
36: HtmlElement table = GetHtmlTable(doc, tablename);
37: if (table == null) throw new Exception("No se ha obtenido la tabla indicada.");
38: CreateColumns(table, dt);
39: CreateRows(table, dt);
40: return dt;
41: }
42:
43: private static void CreateRows(HtmlElement table, DataTable dt)
44: {
45: List<HtmlElement> rowCol = table.GetByTagName("TR").ToList();
46: DataRow row;
47: foreach (HtmlElement fila in rowCol)
48: {
49: if (fila.Parent.TagName != "THEAD")
50: {
51: row = dt.NewRow();
52: List<HtmlElement> tdCol = fila.GetByTagName("TD").ToList();
53:
54: for(int counter = 0 ; counter<=dt.Columns.Count - 1;counter++)
55: row[counter] = tdCol[counter].InnerText;
56:
57: dt.Rows.Add(row);
58: }
59:
60: }
61:
62:
63:
64: }
65: private static void CreateColumns(HtmlElement table, DataTable dt)
66: {
67: HtmlElement header = table.GetByTagName("THEAD").FirstOrDefault();
68: HtmlElement firstRow = table.GetByTagName("TR").FirstOrDefault();
69: if (header == null)
70: {
71: CreateHeaders(dt, firstRow);
72: }
73: else
74: {
75: CreateHeaders(dt, header);
76:
77: }
78:
79:
80: }
81: private static void CreateHeaders(DataTable dt, HtmlElement headerElement)
82: {
83: foreach (HtmlElement element in headerElement.All)
84: {
85: dt.Columns.Add(element.InnerText);
86: }
87: }
88: }