点击回首页
我的浏览记录 | | 帮助?
当前位置:

源码截图

源码目录树

当前路径:InsApp/InfoSearch/CatchHouse.cs     using System;
using System.Collections.Generic;
using System.Collections;
using System.Text;
using InsApp.log4;

//该源码下载自www.51aspx.com(51aspx.com)
//5_1_a_s_p_x.c_o_m

namespace InsApp.InfoSearch
{
    //房产信息采集
    public class CatchHouse:Getfile
    {
        Type type = System.Reflection.MethodBase.GetCurrentMethod().DeclaringType;

        private string _MdbPath;                //2007-3-16如果用户使用access数据库,当多线程启用的时候、会出现无法获取数据库的路径
        //所以,在多线程启动之前将数据库的路径作为类的参数传入。
        public static string Showit;            //显示服务器端的采集内容

        private string Txt_Show, Txt_Url, Txt_EveryPage, Txt_Import, Txt_GetUrl, Txt_DemoUrl, Txt_Address, Txt_ZjPrice, Txt_PjPrice,
            Txt_mianji, Txt_Hx, Txt_City, Txt_BuildYear, Txt_Quan, Txt_Cx, Txt_LC, Txt_Alc, Txt_Shinei, Txt_memo,
            Txt_LrPhone, Txt_Lr, Txt_CityID, Txt_ProID, Txt_Type;

        #region MdbPath                      数据库的路径
        public string MdbPath               //数据库的路径
        {
            get { return _MdbPath; }
            set { _MdbPath = value; }
        }
        #endregion

        #region CatchNews   构造函数,共计25个参数,初始化当前类2007-3-16
        /// <summary>
        /// 当前接受新闻采集组的参数,实例化当前类  共计25个参数
        /// </summary>
        public CatchHouse(string Web_Show, string Web_Url, string Web_EveryPage, string Web_Import,
        string Web_GetUrl, string Web_DemoUrl, string Web_Address, string Web_ZjPrice, string Web_PjPrice,
        string Web_mianji, string Web_Hx, string Web_City, string Web_BuildYear, string Web_Quan, string Web_Cx,
        string Web_LC, string Web_Alc, string Web_Shinei, string Web_memo, string Web_LrPhone,
        string Web_Lr, string Web_CityID, string Web_ProID,string Web_Type ,string SysInfo)
        {
            Txt_Show = Web_Show;                              //1
            Txt_Url = Web_Url;                                //2
            Txt_EveryPage = Web_EveryPage;                      //3
            Txt_Import = Web_Import;                    //4
            Txt_GetUrl = Web_GetUrl;                                //5
            Txt_DemoUrl = Web_DemoUrl;                            //6
            Txt_Address = Web_Address;                              //7
            Txt_ZjPrice = Web_ZjPrice;                              //8
            Txt_PjPrice = Web_PjPrice;                                  //9
            Txt_mianji = Web_mianji;                                          //10
            Txt_Hx = Web_Hx;                                          //11
            Txt_City = Web_City;                                  //12
            Txt_BuildYear = Web_BuildYear;                                      //13
            Txt_Quan = Web_Quan;                                    //14
            Txt_Cx = Web_Cx;
            Txt_LC = Web_LC;
            Txt_Alc = Web_Alc;
            Txt_Shinei = Web_Shinei;
            Txt_memo = Web_memo;
            Txt_LrPhone = Web_LrPhone;
            Txt_Lr = Web_Lr;
            Txt_CityID = Web_CityID;
            Txt_ProID = Web_ProID;
            Txt_Type=Web_Type;
            MdbPath=SysInfo;
        }
        #endregion

        #region CatchHouse_Thread()  采集过程中使用多线程,测试版 2007-3-16
        public void CatchHouse_Thread()
        {
            int CAll = 0;                                       //统计总共采集了多少信息
            this.Web_Method = "get";                            //读取页面的模式
            this.IsReadContent = "yes";                         //是否读取内容
            InsApp.InfoSearch.Getword MyWord = new Getword();

            /// 1、返回新闻列表的html编码       =LastValue
            /// 2、根据正则表达式,匹配新闻url  =Txt_WebSiteNews_Memo
            /// 3、读取每一个url的内容,
            /// 4、将新闻内容匹配正则表达式,获得标题和内容
            /// 5、如果页面中,标题和内容不能匹配返回错误,停止
            /// 
            Get_WebCode = Get_PageEncode(Txt_EveryPage);   //返回网页的编码[网站的域名gb2312||utf-8]

            //分解房产列表页面,循环开始从第1页到10页
            for (int Pa = 0; Pa < 10; Pa++)
            {
                Get_Url = Txt_Import + Pa.ToString();
            
            string LastValue = this.ReadPage();            //返回所有房产列表的内容,房产列表包含房源详细页面的url

            //根据正则表达式,返回页面中的超连接
            string[] s_Array = MyWord.Get_url_Array(Txt_GetUrl, LastValue);             //第一个是正则,第二个[内容分页页面]
            if (s_Array == null)
            {
                continue;
            }
            else
            { 
                //格式化获得的超连接数组
                Format_Url(ref s_Array, Txt_EveryPage);
            }

            //将没有重复内容的url读取内容
            //利用正则表达式匹配需要得到的结果

            for (int i = 0; i < s_Array.Length; i++)
            {
                try
                {
                    this.Get_Url = MyWord.Check_ChineseCode(s_Array[i]);              //编码参数中的中文参数

                    // this.Get_WebCode = this.Get_PageEncode(Get_Url);                    //判断页面编码
                    string EveryPageNews_Content = this.ReadPage();                   //得到每一页的新闻内容
                    string Address =Filtrate(MyWord.CheckReg(Txt_Address, EveryPageNews_Content));            //地址
                    if (Address == "")
                    {
                        continue;
                    }
                    string Mianji = Filtrate(MyWord.NoHTML(MyWord.CheckReg(Txt_mianji, EveryPageNews_Content)));  //面积
                    ///出售信息必须有面积
                    ///出租的不需要面积
                    if ( Txt_Type=="sale")
                    {
                        if(Mianji ==null || Mianji=="")
                        continue;
                    }
                    string Hx = Filtrate(MyWord.NoHTML(MyWord.CheckReg(Txt_Hx, EveryPageNews_Content)));  //户型
                    if (Hx == "")
                    {
                        continue;
                    }
                    string LrPhone = Filtrate(MyWord.NoHTML(MyWord.CheckReg(Txt_LrPhone, EveryPageNews_Content)));  //电话
                    if (LrPhone == "")
                    {
                        continue;
                    }
                    //内容要清除&nbsp;
                    string ZjPrice = Filtrate(MyWord.CheckReg(Txt_ZjPrice, EveryPageNews_Content)); //总价
                    if (ZjPrice == null || ZjPrice == "")
                    {
                        ZjPrice = "0";
                    }
                    string PjPrice = Filtrate(MyWord.CheckReg(Txt_PjPrice, EveryPageNews_Content)); //平均价
                    if (PjPrice == null || PjPrice == "")
                    {
                        PjPrice = "0";
                    }
                    ///如果在得知    面积 数据后   总价和平均价 有一个数据唯空
                    ///那么 根据     面积和总价或者平均价计算另外一个数据的价格
                    ///如果,当前是出售信息,计算相关的结果,否则不计算
                    if(Txt_Type=="sale")
                    {
                        try
                        {
                            GetTrueNumber(ref Mianji, ref ZjPrice, ref PjPrice);
                        }
                        catch
                        {
                            continue; 
                        }
                    }

                    string Cx = Filtrate(MyWord.CheckReg(Txt_Cx, EveryPageNews_Content));           //朝向
                    string City = Filtrate(MyWord.CheckReg(Txt_City, EveryPageNews_Content));        //市区
                    string BuildYear = Filtrate(MyWord.CheckReg(Txt_BuildYear, EveryPageNews_Content));//建筑年代
                    string Quan = Filtrate(MyWord.CheckReg(Txt_Quan, EveryPageNews_Content));       //产权
                    string LC = Filtrate(MyWord.CheckReg(Txt_LC, EveryPageNews_Content));//楼层
                    string Alc =Filtrate(MyWord.CheckReg(Txt_Alc, EveryPageNews_Content));//总楼层
                    string Shinei = Filtrate(MyWord.CheckReg(Txt_Shinei, EveryPageNews_Content));//室内情况
                    string Memo = Filtrate(MyWord.CheckReg(Txt_memo, EveryPageNews_Content));//备注信息
                    string Lr = Filtrate(MyWord.CheckReg(Txt_Lr, EveryPageNews_Content));//联系人

                    //Txt_CityID城市 Txt_ProID省 Txt_Type类型

                    string InsertSql = " Insert into  HouseInfo ("+
                    "H_Url,H_AutoId,H_UserID,H_Type,H_Address,H_PjPrice,H_ZjPrice,H_Huxing, H_mianji,H_CityID,H_ProID,H_Area_ID, " +
                    "H_BuildYear,H_Quan,H_Cx,H_Lc,H_Alc,H_Shinei,H_memo,H_Lr, H_LrPhone,H_AutoTime)"+
                    " values ('" + Get_Url + "','" + System.DateTime.Now.ToString("yyyyMMddhhmmss") + InsApp.word.CreateCode.Rand_Number_AZ_Code(12) +
                    "','King Catch','" + Txt_Type + "','" + Address + "','" + PjPrice + "','" + ZjPrice + "','" + Hx +
                    "','" + Mianji + "','" + Txt_CityID + "','" + Txt_ProID + "','" + City + "','" + BuildYear + "','" + Quan + "','"+
                    Cx + "','" + LC + "','" + Alc + "','" + Shinei + "','" + Memo + "','" + Lr + "','" + LrPhone + "','"+DateTime.Now+"')";

                    if (GetSqlCmd_bool(InsertSql, MdbPath) == true)
                    {
                        CAll++;
                        Showit += CAll + "、网站:" + Txt_Show + "    信息:<a href='" + this.Get_Url + "' target='_blank'>" + Address + " </a>      成功<br/>";
                    }
                }
                catch (Exception ex)
                {
                    LogUtil.FATAL(type, ex.Message);
                }
            }

            
        }//分解房产列表页面,循环结束
        Showit = "共计" + CAll + "条数据,采集结束。" + DateTime.Now;
        }
        #endregion

        #region GetTrueNumber 采集过程对面积,总价和平均价格进行自动的运算
        void GetTrueNumber(ref string Mianji, ref string ZjPrice, ref string PjPrice)
        {
                double _Mianji = Convert.ToDouble(Mianji);
                double _ZjPrice = Convert.ToDouble(ZjPrice);
                double _PjPrice = Convert.ToDouble(PjPrice);
                if (_Mianji > 0)                            //面积必须有内容
                {
                    if (_ZjPrice > 0 && _PjPrice > 0)      //如果总价和平均价都有内容 
                    {
                        //不用处理
                    }
                    if (_ZjPrice > 0 && _PjPrice==0)         //总价大于0,平均价小于0
                    {
                        PjPrice =Convert.ToString( Math.Round(_ZjPrice*10000 / _Mianji));
                    }
                    if (_ZjPrice == 0 && _PjPrice>0)         //总价<0,平均价>0
                    {
                        ZjPrice =Convert.ToString( Math.Round(_PjPrice * _Mianji));
                    }
                    if (_ZjPrice == 0 && _PjPrice== 0)       //总价平均价都小于0
                    {
                        throw new Exception("Err");
                    }
                }
          }
        #endregion
        }
    }

关于我们 | 顾问团队 | 发展历程 | 联系我们 | 源码上传
联系电话(Tel):4008-010-151(免长途) 企业QQ:4000410510
地址:北京市海淀区中关村鼎好大厦A座二层 邮编:100080
Room A-801,Dinghao Building,Zhongguancun,Beijing,China,100080
51Aspx.com 版权所有 CopyRight © 2006-2015. 京ICP备09089570号 | 京公网安备11010702000869号
在线客服
分享该页面
关闭侧边栏