舉例代碼:
int pageCount = 1; // 在抓取第壹頁時,填充這個變量
// 假設保存的頁面為1.htm到20.htm
for(int i=1;i<=pageCount;i++){
string url = "/a.aspx?page=" + i;// 循環20頁,抓取20個html
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
HttpWebResponse response = request.GetResponse() as HttpWebResponse;
Stream stream = response.GetResponseStream();
string html;
using (StreamReader reader = new StreamReader(stream))
{
html = reader.ReadToEnd();
}
Regex reg = new Regex(@"a\.aspx\?page=(\d*)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
// 在抓取第壹頁時,填充頁數變量
if(i == 1){
MatchCollection mc = reg.Matchs(html);
if(mc.Count > 0){
int.TryParse(mc[mc.Count - 1].Result("$1"), out pageCount);
}
}
// 替換抓取到的html裏的a.aspx的鏈接,把類似a.aspx?page=分頁的鏈接替換成"分頁.htm"
html = reg.Replace(html, "$1.htm");
// 保存抓取到的html到靜態文件
using(StreamWriter sw = new StreamWriter(HttpContext.Current.Server.MapPath(i + ".htm")))
{
sw.Write(html);
}
}