using System;
using HtmlAgilityPack;
using System.Threading.Tasks;
using System.Net;
public class ResourceGrabber
{
public string Grab(string templateUrl)
{
Uri baseUri = new Uri(templateUrl);
WebClient client = new WebClient();
string strHtml = client.DownloadString(templateUrl);
//------------------------------
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(strHtml);
// Grab Css files
Task task1 = new Task((obj) => GrabCss(baseUri, ref doc), "Task 1");
// Grab Js files
Task task2 = new Task((obj) => GrabJs(baseUri, ref doc), "Task 2");
// Grab Image files
Task task3 = new Task((obj) => GrabImg(baseUri, ref doc), "Task 3");
task1.Start();
task2.Start();
task3.Start();
Task.WaitAll(task1, task2, task3);
string outhtml = doc.DocumentNode.InnerHtml;
return outhtml;
}
private void GrabImg(Uri baseUri, ref HtmlDocument doc)
{
try
{
var imgUrls = doc.DocumentNode.SelectNodes("//img");
foreach (HtmlNode link in imgUrls)
{
if (link.Attributes["src"] == null)
{
continue;
}
string jsUrl = link.Attributes["src"].Value;
Uri uri = ResolveUrl(baseUri, jsUrl);
if (uri != null)
{
string filename = @"Template/img/" + System.IO.Path.GetFileName(uri.LocalPath);
string retUrl = SaveAsFile(uri.AbsoluteUri, filename);
if (!String.IsNullOrEmpty(retUrl))
{
link.SetAttributeValue("src", retUrl);
}
}
}
}
catch (Exception)
{
}
}
private void GrabJs(Uri baseUri, ref HtmlDocument doc)
{
try
{
var jsUrls = doc.DocumentNode.SelectNodes("//script");
foreach (HtmlNode link in jsUrls)
{
if (link.Attributes["src"] == null)
{
continue;
}
string jsUrl = link.Attributes["src"].Value;
Uri uri = ResolveUrl(baseUri, jsUrl);
if (uri != null)
{
string filename = @"Template/js/" + System.IO.Path.GetFileName(uri.LocalPath);
string retUrl = SaveAsFile(uri.AbsoluteUri, filename);
if (!String.IsNullOrEmpty(retUrl))
{
link.SetAttributeValue("src", retUrl);
}
}
}
}
catch (Exception)
{
}
}
private void GrabCss(Uri baseUri, ref HtmlDocument doc)
{
try
{
var cssUrls = doc.DocumentNode.SelectNodes("//link");
foreach (HtmlNode link in cssUrls)
{
if (link.Attributes["href"] == null)
{
break;
}
string cssUrl = link.Attributes["href"].Value;
Uri uri = ResolveUrl(baseUri, cssUrl);
if (uri != null)
{
string filename = @"Template/css/" + System.IO.Path.GetFileName(uri.LocalPath);
string retUrl = SaveAsFile(cssUrl, filename);
if (!String.IsNullOrEmpty(retUrl))
{
link.SetAttributeValue("href", retUrl);
}
}
}
}
catch (Exception)
{
//throw;
}
}
private string SaveAsFile(string fileUrl, string filePath)
{
string strUrl = string.Empty;
var objUtils = new Utilities();
try
{
using (WebClient client = new WebClient())
{
byte[] imageData = client.DownloadData(fileUrl);
var stream = new System.IO.MemoryStream(imageData);
//Implement function to save file stream to local/server and return respective url
strUrl = SaveFilesToLocalAndReturnUrl(stream, filePath);
}
}
catch (Exception ex)
{
strUrl = string.Empty;
}
return strUrl;
}
//Resolve relative url to help easy download url content
Uri ResolveUrl(Uri baseUri, string url)
{
var resultantUrl = new Uri(baseUri, url);
return resultantUrl;
}
}
using HtmlAgilityPack;
using System.Threading.Tasks;
using System.Net;
public class ResourceGrabber
{
public string Grab(string templateUrl)
{
Uri baseUri = new Uri(templateUrl);
WebClient client = new WebClient();
string strHtml = client.DownloadString(templateUrl);
//------------------------------
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(strHtml);
// Grab Css files
Task task1 = new Task((obj) => GrabCss(baseUri, ref doc), "Task 1");
// Grab Js files
Task task2 = new Task((obj) => GrabJs(baseUri, ref doc), "Task 2");
// Grab Image files
Task task3 = new Task((obj) => GrabImg(baseUri, ref doc), "Task 3");
task1.Start();
task2.Start();
task3.Start();
Task.WaitAll(task1, task2, task3);
string outhtml = doc.DocumentNode.InnerHtml;
return outhtml;
}
private void GrabImg(Uri baseUri, ref HtmlDocument doc)
{
try
{
var imgUrls = doc.DocumentNode.SelectNodes("//img");
foreach (HtmlNode link in imgUrls)
{
if (link.Attributes["src"] == null)
{
continue;
}
string jsUrl = link.Attributes["src"].Value;
Uri uri = ResolveUrl(baseUri, jsUrl);
if (uri != null)
{
string filename = @"Template/img/" + System.IO.Path.GetFileName(uri.LocalPath);
string retUrl = SaveAsFile(uri.AbsoluteUri, filename);
if (!String.IsNullOrEmpty(retUrl))
{
link.SetAttributeValue("src", retUrl);
}
}
}
}
catch (Exception)
{
}
}
private void GrabJs(Uri baseUri, ref HtmlDocument doc)
{
try
{
var jsUrls = doc.DocumentNode.SelectNodes("//script");
foreach (HtmlNode link in jsUrls)
{
if (link.Attributes["src"] == null)
{
continue;
}
string jsUrl = link.Attributes["src"].Value;
Uri uri = ResolveUrl(baseUri, jsUrl);
if (uri != null)
{
string filename = @"Template/js/" + System.IO.Path.GetFileName(uri.LocalPath);
string retUrl = SaveAsFile(uri.AbsoluteUri, filename);
if (!String.IsNullOrEmpty(retUrl))
{
link.SetAttributeValue("src", retUrl);
}
}
}
}
catch (Exception)
{
}
}
private void GrabCss(Uri baseUri, ref HtmlDocument doc)
{
try
{
var cssUrls = doc.DocumentNode.SelectNodes("//link");
foreach (HtmlNode link in cssUrls)
{
if (link.Attributes["href"] == null)
{
break;
}
string cssUrl = link.Attributes["href"].Value;
Uri uri = ResolveUrl(baseUri, cssUrl);
if (uri != null)
{
string filename = @"Template/css/" + System.IO.Path.GetFileName(uri.LocalPath);
string retUrl = SaveAsFile(cssUrl, filename);
if (!String.IsNullOrEmpty(retUrl))
{
link.SetAttributeValue("href", retUrl);
}
}
}
}
catch (Exception)
{
//throw;
}
}
private string SaveAsFile(string fileUrl, string filePath)
{
string strUrl = string.Empty;
var objUtils = new Utilities();
try
{
using (WebClient client = new WebClient())
{
byte[] imageData = client.DownloadData(fileUrl);
var stream = new System.IO.MemoryStream(imageData);
//Implement function to save file stream to local/server and return respective url
strUrl = SaveFilesToLocalAndReturnUrl(stream, filePath);
}
}
catch (Exception ex)
{
strUrl = string.Empty;
}
return strUrl;
}
//Resolve relative url to help easy download url content
Uri ResolveUrl(Uri baseUri, string url)
{
var resultantUrl = new Uri(baseUri, url);
return resultantUrl;
}
}
No comments:
Post a Comment