点击回首页
我的浏览记录 | | 帮助?
当前位置:
首页>> 行业软件>> 获取网页图片>> 源文件浏览
[免费版 Free] WinForm,下载次数:35 次 | 关键字: 获取网页图片

源码截图

源码目录树

;
当前路径:WindowsFormsApplication1/Form1.cs
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading;
using System.Windows.Forms;

namespace WindowsFormsApplication1
{
    public partial class Form1 : Form
    {
        private static string Path = AppDomain.CurrentDomain.BaseDirectory + "img";
        public Form1()
        {
            InitializeComponent();
        }

        private void button1_Click(object sender, EventArgs e)
        {
            string url = txturl.Text.Trim();
            if (string.IsNullOrEmpty(url))
            {
                MessageBox.Show("请输入URl");
                return;
            }
            txtimg.AppendText("开始抓取中:\r\n");
            Thread th = new Thread(() => ShuaQu(url)) { IsBackground = true };
            th.Start();
        }
        private void ShuaQu(string url)
        {
            DirectoryInfo di = new DirectoryInfo(Path);
            if (System.IO.Directory.Exists(Path))
            {
                di.Delete(true);
            }
            System.IO.Directory.CreateDirectory(Path);
            //string result = WebHttp.HttpGet(url, null, 3);

            string result = GetHttpResponse(url, 6000);

            string[] str = GetHtmlImageUrlList(result);
            txtimg.Invoke(new Action(() =>
            {
                txtimg.AppendText("已经获取到数据!" + str.Count() + "\r\n");
            }));
            //建立获取网页标题正则表达式  
            String regex = @"<title>.+</title>";

            //返回网页标题  
            String title = Regex.Match(result, regex).ToString();
            txttitle.Invoke(new Action(() =>
            {
                txttitle.Text = Regex.Replace(title, @"[\""]+", "");
            }));
            foreach (string s in str)
            {
                Uri u = new Uri(s);
                if (u.Host == "www.xxx.com")
                {
                    Thread downimg = new Thread(() => Get_img(s)) { IsBackground = true };
                    downimg.Start();
                    txtimg.Invoke(new Action(() =>
                    {
                        txtimg.AppendText(s + "\r\n");
                    }));
                }
            }
            txtimg.Invoke(new Action(() =>
            {
                txtimg.AppendText("全部抓取完成!\r\n");
            }));

            for(int i=0;i<str.Length;i++)
            {
                //截取字符串后三位(jpg\png)
                //C# 截取字符串最后一个字符
                string k = str[i].Substring(str[i].Length - 3, 3);//jpg\png
                string paramA = str[i].Substring(0, 4);//http
                string paramB = str[i].Substring(0, 5);//https

                if (paramA == "http" || paramB == "https")
                {
                    UrlToImage(str[i], i);
                }
                else
                {
                    string param = "http:" + str[i];
                    UrlToImage(param, i);
                }
            }

        }

        public void Get_img(string imgpath)
        {
            string[] file = imgpath.Split('?');
            string name = System.IO.Path.GetFileName(file[0]);
            WebClient mywebclient = new WebClient();
            mywebclient.DownloadFile(imgpath, Path + @"\" + name);
        }

        public static string GetHttpResponse(string url, int Timeout)
        {
            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
            request.Method = "GET";
            request.ContentType = "text/html;charset=UTF-8";
            request.UserAgent = null;
            request.Timeout = Timeout;

            HttpWebResponse response = (HttpWebResponse)request.GetResponse();
            Stream myResponseStream = response.GetResponseStream();
            StreamReader myStreamReader = new StreamReader(myResponseStream, Encoding.GetEncoding("utf-8"));
            string retString = myStreamReader.ReadToEnd();
            myStreamReader.Close();
            myResponseStream.Close();

            return retString;
        }

        /// <summary> 
        /// 取得HTML中所有图片的 URL。 
        /// </summary> 
        /// <param name="sHtmlText">HTML代码</param> 
        /// <returns>图片的URL列表</returns> 
        private string[] GetHtmlImageUrlList(string sHtmlText)
        {
            // 定义正则表达式用来匹配 img 标签 
            Regex regImg = new Regex(@"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>", RegexOptions.IgnoreCase);

            // 搜索匹配的字符串 
            MatchCollection matches = regImg.Matches(sHtmlText);
            int i = 0;
            string[] sUrlList = new string[matches.Count];

            // 取得匹配项列表 
            foreach (Match match in matches)
                sUrlList[i++] = match.Groups["imgUrl"].Value;
            return sUrlList;
        }

        #region 下载图片到Image
        public void UrlToImage(string url,int num)
        {
            string saveName = num.ToString() + ".jpg";
            string paramparamurl = "D:\\HtmlImage";
            if (!Directory.Exists(paramparamurl))
            {
                Directory.CreateDirectory(paramparamurl);
            }
            string param2 = paramparamurl + "//" + saveName; 

            WebClient wc = new WebClient();
            var bytes = wc.DownloadData(url);
            FileStream fs = new FileStream(param2, FileMode.Create, FileAccess.Write);
            fs.Write(bytes, 0, bytes.Length);
            fs.Close();
            fs.Dispose();
        }
        #endregion
    }
}
关于我们 | 顾问团队 | 发展历程 | 联系我们 | 源码上传
联系电话(Tel):4008-010-151(免长途)
地址:北京市海淀区大恒科技大厦五层 邮编:100080
Floor 5th,Daheng Building,Zhongguancun,Beijing,China,100080
51Aspx.com 版权所有 CopyRight © 2006-2022. 京ICP备09089570号 | 京公网安备11010702000869号