Tag: 奥运 爬虫代理 动态IP 爬虫采集 数据分析 数据采集 c# 爬虫程序
东京奥运会正在如火如荼的进行中,中国健儿目前获取的金牌数已经超过约奥运会的金牌总数,同时大家也不再聚焦于取得奖牌的项目,苏炳添创造的历史、女篮取得不错的成绩、女排的遗憾都值得大家的关心。那么接下来还有哪些精彩赛事呢,通过爬虫程序抓取奥运网站分析之后可以更快的了解实时信息如下: 1、8月6日 (1)女子20公里竞走决赛(2)乒乓球男团决赛(3)空手道女子61公斤级决赛(4)女子标枪决赛(5)男子4x100接力决赛 2、8月7日 (1)女子500米双人划艇决赛(2)跳水男子十米台决赛(3)空手道女子+61公斤级决赛(4)拳击女子69公斤级决赛 还需要获取更多的热点赛事信息,请运行下面的程序试试: // 要访问的目标页面 string targetUrl = "https://olympics.com/zh/"; // 代理服务器(产品官网 www.16yun.cn) string proxyHost = "http://t.16yun.cn"; string proxyPort = "31111"; // 代理验证信息 string proxyUser = "username"; string proxyPass = "password"; // 设置代理服务器 WebProxy proxy = new WebProxy(string.Format("{0}:{1}", proxyHost, proxyPort), true); ServicePointManager.Expect100Continue = false; var request = WebRequest.Create(targetUrl) as HttpWebRequest; request.AllowAutoRedirect = true; request.KeepAlive = true; request.Method = "GET"; request.Proxy = proxy; //request.Proxy.Credentials = CredentialCache.DefaultCredentials; request.Proxy.Credentials = new System.Net.NetworkCredential(proxyUser, proxyPass); // 设置Proxy Tunnel // Random ran=new Random(); // int tunnel =ran.Next(1,10000); // request.Headers.Add("Proxy-Tunnel", String.valueOf(tunnel)); //request.Timeout = 20000; //request.ServicePoint.ConnectionLimit = 512; //request.UserAgent = "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.82 Safari/537.36"; //request.Headers.Add("Cache-Control", "max-age=0"); //request.Headers.Add("DNT", "1"); //String encoded = System.Convert.ToBase64String(System.Text.Encoding.GetEncoding("ISO-8859-1").GetBytes(proxyUser + ":" + proxyPass)); //request.Headers.Add("Proxy-Authorization", "Basic " + encoded); using (var response = request.GetResponse() as HttpWebResponse) using (var sr = new StreamReader(response.GetResponseStream(), Encoding.UTF8)) { string htmlStr = sr.ReadToEnd(); }