·您现在的位置: 云翼网络 >> 文章中心 >> 网站建设 >> 网站建设开发 >> php网站开发 >> curl采集 根据关键词 获取雅虎竞价排名

curl采集 根据关键词 获取雅虎竞价排名

作者:佚名      php网站开发编辑:admin      更新时间:2022-07-23
curl采集 根据关键词 获取雅虎竞价排名

之前写过curl批处理采集数据,这里贴上完整版本,代码很简单,废话不说,上代码,新手欢迎指教!!!

代码只写到 获取到链接了,至于排名 后边数组的键不就是排名喽。。。

  1 <?php  2 /**  3  * Based on yahoo access to data  4  *  5  * @author chujiu <[email protected]>  6  * @copyright 2014.04.26 By chujiu  7  * @version 0.2.1 2014.04.26  8  */  9  10 class DataCollectionRank { 11  12     const   PAGE = 10; 13     public  $path = ''; 14     public  $main = 91; 15      16     // 添加curl句柄 返回资源 17     PRivate function _gather_data($keyWord) { 18         if(empty($keyword)) { 19             return ''; 20         } 21         $chs = array(); // 句柄 22         $mh = curl_multi_init(); 23         for( $i=1; $i<=$this->main; $i+=self::PAGE ) { 24             $url = 'http://search.yahoo.co.jp/search?p='.urlencode($keyword).'&tid=top_ga1_sa&ei=UTF-8&aq=-1&oq='.urlencode($keyword).'&pstart=1&fr=top_ga1_sa&b='.$i; 25             $ch = curl_init(); 26             //设置选项 27             curl_setopt_array($ch, array( 28                 CURLOPT_URL => $url, 29                 CURLOPT_HEADER => false, 30                 CURLOPT_SSL_VERIFYPEER => false, 31                 CURLOPT_RETURNTRANSFER => true, 32                 CURLOPT_TIMEOUT => 30, 33                 CURLOPT_AUTOREFERER => true 34                 ) 35             ); 36             curl_multi_add_handle($mh, $ch); // 添加批处理句柄 37             $chs['handle'][$i]['ch'] = $ch; 38             $chs['handle'][$i]['url'] = $url; 39         } 40         $chs['mh'] = $mh; 41         return $chs; 42     } 43      44     // 处理CURL请求 45     public function exec_curl_get_data($keyword, $path) { 46         $error = ''; 47         $this->path = $path; 48         $chs = $this->_gather_data($keyword); 49         if(empty($chs)) return '';  50          // 执行批处理句柄 51         $active = null; 52         do { 53            $mrc = curl_multi_exec($chs['mh'],$active); 54            //$info = curl_multi_info_read($chs['mh']); 55         } while ($active > 0); 56         // 获取数据 57         $responses = array(); 58         foreach($chs['handle'] as $k=>$ch){  59             if(curl_error($ch['ch'])){ 60                 $error .= "\n".'error提示:'.curl_error($ch['ch']).'-------URL:'.$ch['url'].'--------时间:'.date('Y-d-m H:i:s',time())."\n"; 61             } else { 62                 $responses[$k]['data'] = curl_multi_getcontent( $ch['ch'] ); 63             } 64              65             //curl_multi_info_read($mh); 66             // close current handler  67             curl_multi_remove_handle($chs['mh'], $ch['ch']);  68             curl_close($ch['ch']); 69         } 70         //关闭curl 批处理 71         curl_multi_close($chs['mh']); 72         $str = ''; 73         if($error != '') { 74             $this->_writeFile('get_rank_log.txt', $error, 'ab+'); 75         } 76         foreach ($responses as $val) { 77             if(!empty($val['data'])) { 78                 $str.= $this->_get_keyword_link_preg($val['data']); 79             } 80         } 81         $str = substr($str, 0 ,-1); 82         $contents = explode('|', $str); 83         return $contents; 84     } 85  86     // 过滤数据 获取链接 87     private function _get_keyword_link_preg ($str) { 88         $res = ''; 89         if(empty($str)) { 90             return ''; 91         } 92         $arr = explode('<div id="web">', $str); 93         $arr1 = explode('<div id="posS" class="spns">', $arr[1]); 94         $arr2 = preg_replace('#<div id=\"pg\">[\s\S]+#', '', $arr1[0]); 95         $arr3 = preg_replace('#<div id=\"rel\">[\s\S]+#', '', $arr2); 96         $arr4 = preg_replace('#<em>[\s\S]+?</em>#', '', $arr3); 97         if(preg_match_all('#href=\"(.*?)\">#',$arr4,$arr5) !== false) { 98             foreach($arr5[1] as $val) { 99                 $res.= urldecode($val).'|';100             }101         }102         return $res;103     }104 105     // 写入文件106     public function _writeFile($fileName, $data, $method="rb+", $iflock=1, $check=1, $chmod=1){107         $check && @strpos($this->path.'/'.$fileName, '..')!==false && exit('403 Forbidden!');108         @touch($this->path.'/'.$fileName);109         $handle = @fopen($this->path.'/'.$fileName, $method);110         if($iflock) {111             @flock($handle,LOCK_EX);112         }113         $fw = @fwrite($handle,$data);114         if($method == "rb+") ftruncate($handle, strlen($data));115         fclose($handle);116         $chmod && @chmod($this->path.'/'.$fileName,0777);117     }118 }119 ?>

 1 function array_unique_fb($array){ 2     $temp = array(); 3     $data = array(); 4     foreach ($array as $value){ 5         $value = join(",",$value); //降维,也可以用implode,将一维数组转换为用逗号连接的字符串 6         $temp[] = $value; 7     } 8         $temp = array_flip(array_flip($temp));    //去掉重复的字符串,也就是重复的一维数组 9     foreach ($temp as $k => $value){10         $temp[$k] = explode(",",$value);   //再将拆开的数组重新组装11     }12     foreach ($temp as $key => $value) {13         $data[$key]['keyword'] = $value[0];14         $data[$key]['domain'] = $value[1];15     }16     return $data;17 }