1. <ul id="0c1fb"></ul>

      <noscript id="0c1fb"><video id="0c1fb"></video></noscript>
      <noscript id="0c1fb"><listing id="0c1fb"><thead id="0c1fb"></thead></listing></noscript>

      99热在线精品一区二区三区_国产伦精品一区二区三区女破破_亚洲一区二区三区无码_精品国产欧美日韩另类一区

      RELATEED CONSULTING
      相關(guān)咨詢
      選擇下列產(chǎn)品馬上在線溝通
      服務(wù)時(shí)間:8:30-17:00
      你可能遇到了下面的問(wèn)題
      關(guān)閉右側(cè)工具欄

      新聞中心

      這里有您想知道的互聯(lián)網(wǎng)營(yíng)銷解決方案
      php多線程爬蟲(chóng)類
      1. 代碼:
        ]>
        * @property
        * 1、calltrigger    觸發(fā)爬蟲(chóng)程序的回調(diào)函數(shù)
        * 2、calltodo       處理業(yè)務(wù)邏輯的回調(diào)函數(shù) 如:把抓取到的內(nèi)容處理后存到數(shù)據(jù)庫(kù)
        * 3、timeout        超時(shí)時(shí)間,默認(rèn)5秒
        * 4、depth          重定向深度,默認(rèn)3
        * 5、name           上傳文件的名字,默認(rèn)file
        * 6、cookie         模擬登錄時(shí)cookie存儲(chǔ)在本地的文件,默認(rèn)cookie_n.txt
        * @method
        * 1、ssl            是否設(shè)置https           true:是  false:否
        * 2、auth           啟用驗(yàn)證                user:用戶名    pass:密碼
        * 3、login          模擬登錄,獲取cookie
        * 4、cookie         使用cookie登錄
        * 5、header         設(shè)置請(qǐng)求頭              data:請(qǐng)求頭數(shù)組
        * 6、proxy          設(shè)置服務(wù)器代理          url:代理服務(wù)器url   port:代理服務(wù)器端口
        * 7、agent          設(shè)置瀏覽器代理          browse:代理瀏覽器 默認(rèn):Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)
        * 8、get            模擬get請(qǐng)求             data:傳遞的數(shù)據(jù)
        * 9、post           模擬post請(qǐng)求            data:傳遞的數(shù)據(jù)
        * 10、json          模擬json請(qǐng)求            data:傳遞的數(shù)據(jù)
        * 11、upload        模擬表單上傳            files:上傳的文件   array|string
        * 12、download      下載文件                dir:要下載的文件  格式:a/b
        * 13、run           執(zhí)行                    depth:深度
        */
        class crawl{
        public $calltrigger = 'trigger';  #  觸發(fā)爬蟲(chóng)程序的回調(diào)函數(shù)
        public $calltodo = 'todo';  #  處理業(yè)務(wù)邏輯的回調(diào)函數(shù) 
        public $timeout = 5;  #  超時(shí)時(shí)間,默認(rèn)5秒
        public $depth = 3;  #  重定向深度,默認(rèn)3
        public $name = 'file';  #  上傳文件的名字,默認(rèn)file
        public $cookie = 'cookie.txt';  #  模擬登錄時(shí)cookie存儲(chǔ)在本地的文件,默認(rèn)cookie_n
        private $schemes = array();
        private $hosts = array();
        private $paths = array();
        private $querys = array();
        private $options = array();
        private $chs;
        private $fps;
        private $handle;
        private $urls = array();
        /*
         @desc:內(nèi)部方法,獲取頁(yè)面中的超鏈接
         @param content 頁(yè)面內(nèi)容
         @return urls 獲取到的超鏈接
         */
        private function geturl($content){
            $preg = '/<[a|A].*?href=[\'\"]{0,1}([^>\'\"\ ]*).*?>/i';
            $bool = preg_match_all($preg,$content,$res);
            $urls = array();
            if($bool){
                $urls = $res[1];
            }
            $urls = array_unique($urls);
            return $urls;
        }
        /*
         @desc:內(nèi)部方法,修復(fù)不完整的url
         @param url 原始url
         @param url 修復(fù)好的url
         */
        private function reviseurl($url){
            $info = parse_url($url);
            $scheme = $info["scheme"]?:'http';
            $user = $info["user"];
            $pass = $info["pass"];
            $host = $info["host"];
            $port = $info["port"];
            $path = $info["path"];
            $url = $scheme . '://';
            if ($user && $pass) {
                $url .= $user . ":" . $pass . "@";
            }
            $url .= $host;
            if ($port) {
                $url .= ":" . $port;
            } 
            $url .= $path;
            return $url;
        }
        /*
         @desc:內(nèi)部方法,調(diào)用回調(diào)函數(shù)進(jìn)行業(yè)務(wù)處理
         @param content 傳入到回調(diào)函數(shù)的參數(shù)
         */
        private function todo($content){
            $calltodo = $this->calltodo;
            call_user_func($calltodo,$content);
        }
        /*
         @desc:觸發(fā)爬蟲(chóng)程序的回調(diào)函數(shù)
         @param urls 待處理的url數(shù)組
         @param depth 處理深度
         */
        private function trigger($urls,$depth){
            $calltrigger = $this->calltrigger;
            call_user_func($calltrigger,$urls,$depth);
        }
        /*
         @desc:內(nèi)部方法 設(shè)置get請(qǐng)求參數(shù)
         @param data 請(qǐng)求數(shù)據(jù)
         */
        private function setget($data){
            $schemes = $this->schemes;
            $hosts = $this->hosts;
            $paths = $this->paths;
            $querys = $this->querys;
            foreach($this->chs as $k=>$v){
                $sep = ($querys[$k] || !empty($data))?"?":"";
                $qurl = $schemes[$k].'://'.$hosts[$k].$paths[$k].$sep.$querys[$k].$data;
                $this->options[$k][CURLOPT_URL] = $qurl;
            }
            return $this;
        }
        /*
         @desc:內(nèi)部方法 設(shè)置post請(qǐng)求參數(shù)
         @param data 請(qǐng)求數(shù)據(jù)
         */
        private function setpost($data){
            $schemes = $this->schemes;
            $hosts = $this->hosts;
            $paths = $this->paths;
            $querys = $this->querys;
            foreach($this->chs as $k=>$v){
                $sep = $query?"?":"";
                $qurl = $schemes[$k].'://'.$hosts[$k].$paths[$k].$sep.$querys[$k];
                $this->options[$k][CURLOPT_URL] = $qurl;
                $this->options[$k][CURLOPT_POST] = 1;
                $this->options[$k][CURLOPT_POSTFIELDS] = $data;
            }
            return $this;
        }
        /*
         @desc:內(nèi)部方法 設(shè)置最終請(qǐng)求參數(shù)
         */
        private function setopt(){
            $options = $this->options;
            foreach($options as $k=>$v){
                curl_setopt_array(
                        $this->chs[$k],
                        $v
                    );
            }
            return $this;
        }
        /*
         @desc:構(gòu)造方法 設(shè)置初始請(qǐng)求參數(shù)
         @param urls 請(qǐng)求地址數(shù)組
         */
        public function __construct($urls){
            $this->urls = $urls;
            $this->handle = curl_multi_init();
            foreach($urls as $k=>$v){
                $info = parse_url($v);
                $this->schemes[$k] = $info['scheme']?:'http';
                $this->hosts[$k] = $info['host'];
                $this->paths[$k] = $info['path'];
                $this->querys[$k] = $info['query'];
                $this->chs[$k] = curl_init();
                $this->options[$k][CURLOPT_CONNECTTIMEOUT] = $this->timeout;
                $this->options[$k][CURLOPT_RETURNTRANSFER] = 1;
                $this->options[$k][CURLOPT_FOLLOWLOCATION] = 1;
                $this->options[$k][CURLINFO_HEADER_OUT] = true;
                $this->options[$k][CURLOPT_ENCODING] = 'gzip';
                $this->options[$k][CURLOPT_MAXREDIRS] = $this->depth;
                curl_multi_add_handle ($this->handle,$this->chs[$k]);
            }
        }
        /*
         @desc:是否設(shè)置https請(qǐng)求
         @param bool true:https請(qǐng)求 false:http請(qǐng)求
         */
        public function ssl($bool = false){
            if($bool){
                foreach($this->chs as $k=>$v){
                    $this->scheme[$k] = 'https';
                    $this->options[$k][CURLOPT_SSL_VERIFYHOST] = 1;
                    $this->options[$k][CURLOPT_SSL_VERIFYPEER] = false;
                }
            }
            return $this;
        }
        /*
         @desc:設(shè)置驗(yàn)證用戶名、密碼
         @param user 用戶名
         @param pass 密碼
         */
        public function auth($user,$pass){
            foreach($this->chs as $k=>$v){
                $this->options[$k][CURLOPT_USERPWD] = $user.':'.$pass;
            }
            return $this;
        }
        /*
         @desc:模擬登錄
         */
        public function login(){
            $cookie = $this->cookie;
            $arr = explode('.',$cookie);
            $name = $arr[0];
            $ext = $arr[1];
            foreach($this->chs as $k=>$v){
                $this->options[$k][CURLOPT_COOKIEJAR] = $name.'_'.$k.'.'.$ext;
                $this->options[$k][CURLOPT_RETURNTRANSFER] = 0;
            }
            return $this;
        }
        /*
         @desc:帶cookie登錄
         */
        public function cookie(){
            $cookie = $this->cookie;
            $arr = explode('.',$cookie);
            $name = $arr[0];
            $ext = $arr[1];
            foreach($this->chs as $k=>$v){
                $this->options[$k][CURLOPT_COOKIEFILE] = $name.'_'.$k.'.'.$ext;
            }
            return $this;
        }
        /*
         @desc:設(shè)置請(qǐng)求頭信息
         @param data 請(qǐng)求頭
         */
        public function header($data){
            foreach($this->chs as $k=>$v){
                $this->options[$k][CURLOPT_HTTPHEADER] = $this->options[$k][CURLOPT_HTTPHEADER]?:array();
                $this->options[$k][CURLOPT_HTTPHEADER] = array_merge($this->options[$k][CURLOPT_HTTPHEADER],$data);
            }
            return $this;
        }
        /*
         @desc:設(shè)置代理服務(wù)器
         @param url 代理服務(wù)器url
         @param port 代理服務(wù)器端口
         */
        public function proxy($url,$port){
            $info = parse_url($url);
            $scheme = $info['scheme']?:'http';
            $host = $info['host'];
            $path = $info['path'];
            $purl = $scheme.'://'.$host.$path.':'.$port;
            foreach($this->chs as $k=>$v){
                $this->options[$k][CURLOPT_PROXY] = $purl;
            }
            return $this;
        }
        /*
         @desc:設(shè)置代理瀏覽器
         @param browse 代理瀏覽器
         */
        public function agent($browse = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)'){
            foreach($this->chs as $k=>$v){
                $this->options[$k][CURLOPT_USERAGENT] = $browse;
            }
            return $this;
        }
        /*
         @desc:模擬get請(qǐng)求
         @param data 請(qǐng)求數(shù)據(jù)
         */
        public function get($data = array()){
            $data = http_build_query($data);
            $this->setget($data);
            return $this;
        }
        /*
         @desc:模擬post請(qǐng)求
         @param data 請(qǐng)求數(shù)據(jù)
         */
        public function post($data = array()){
            $this->setpost($data);
            return $this;
        }
        /*
         @desc:模擬json請(qǐng)求
         @param data 請(qǐng)求數(shù)據(jù)
         */
        public function json($data = array()){
            $data = json_encode($data);
            $header = array(
                    'Content-Type: application/json',
                    'Content-Length:' . strlen($data)
                );
            $this->header($header);
            $this->setpost($data);
            return $this;
        }
        /*
         @desc:模擬表單上傳
         @param files 文件路徑
         */
        public function upload($files){
            $data = array();
            $name = $this->name;
            if(is_array($files)){
                foreach($files as $k=>$v){
                    $data["{$name}[{$k}]"]=new \CURLFile($v);
                }
            }else{
                $data["{$name}"]=new \CURLFile($files);
            }
            $this->setpost($data);
            return $this;
        }
        /*
         @desc:下載文件
         @param dir 存儲(chǔ)文件目錄
         */
        public function download($dir = ''){
            $paths = $this->paths;
            if($dir && !is_dir($dir)){
                mkdir($dir,0755,true);
            }
            foreach($this->paths as $k=>$v){
                $name = strrchr($v, '/');
                $dsep = $dir?'/':'';
                $this->fps[$k]=fopen('.'.$dsep.$dir.$name, 'w');
                $this->options[$k][CURLOPT_FILE] = $this->fps[$k];
            }
            $this->setget('');
            return $this;
        }
        /*
         @desc:執(zhí)行方法
         @param depth 深度 默認(rèn)2
         */
        public function run($depth = 2){
            $this->setopt();
            $chs = $this->chs;
            $handle = $this->handle;
            $urls = $this->urls;
            if($depth > 0){
                $depth--;
                $active = null;
                $mrc = curl_multi_exec($handle, $active);
                while ($mrc == CURLM_CALL_MULTI_PERFORM) {
                    $mrc = curl_multi_exec($handle, $active);
                }
                while ($active && $mrc == CURLM_OK) {
                    if (curl_multi_select($handle) != -1) {  
                        usleep(100);
                    }
                    $mrc = curl_multi_exec($handle, $active);
                    while ($mrc == CURLM_CALL_MULTI_PERFORM) {
                        $mrc = curl_multi_exec($handle, $active);
                    }
                }
                foreach ($chs as $k => $v) {
                    if (curl_error($chs[$k]) == "") {
                        $content = curl_multi_getcontent($chs[$k]);
                        $this->todo($content);
                        $aurls = $this->geturl($content);
                        $urls[$k] = $this->reviseurl($urls[$k]);
                        if (is_array($aurls) && !empty($aurls)) {
                            foreach ($aurls as $k1=>$u) {
                                if (preg_match('/^http/', $u)) {
                                    $returl[$k1] = $u;
                                } else {
                                    $real = $urls[$k] . '/' . $u;
                                    $returl[$k1] = $real;
                                }
                            }
                            $this->trigger($returl,$depth);
                        }
                    }
                    curl_multi_remove_handle($handle, $chs[$k]);  
                    curl_close($chs[$k]);
                }
                curl_multi_close($handle);
            }
        }
        }
      2. 測(cè)試:
        function todo($content){
        echo 'ok'.PHP_EOL;
        }
        $urls=array(
        'www.baidu.com',  
        'www.taobao.com'
        );
        function trigger($urls = array(),$depth = 2){
        $crawl = new crawl($urls);
        $crawl->get()->run($depth);
        }
        trigger($urls);
      3. 輸出:
        ok
        ok
        ok
        ok
        ok
        ok
        ok
        ok
        ok
        ok
        ok
        ok
        ok
        ok

      網(wǎng)頁(yè)題目:php多線程爬蟲(chóng)類
      當(dāng)前路徑:http://ef60e0e.cn/article/gojios.html
      99热在线精品一区二区三区_国产伦精品一区二区三区女破破_亚洲一区二区三区无码_精品国产欧美日韩另类一区
      1. <ul id="0c1fb"></ul>

        <noscript id="0c1fb"><video id="0c1fb"></video></noscript>
        <noscript id="0c1fb"><listing id="0c1fb"><thead id="0c1fb"></thead></listing></noscript>

        广东省| 永昌县| 湟源县| 宽甸| 天全县| 卓尼县| 泗水县| 林州市| 拉萨市| 肇州县| 通道| 尼勒克县| 桦南县| 石首市| 根河市| 德令哈市| 双牌县| 庆安县| 新蔡县| 扶绥县| 呼图壁县| 都安| 张家口市| 麦盖提县| 五寨县| 邮箱| 旬邑县| 新邵县| 陆河县| 睢宁县| 三台县| 精河县| 青神县| 周至县| 辽源市| 乌拉特中旗| 日照市| 景谷| 金沙县| 永平县| 七台河市|