php 抓取淘宝商品详情
转载声明:
本文为摘录自“csdn博客”,版权归原作者所有。
温馨提示:
为了更好的体验,请点击原文链接进行浏览
摘录时间:
2020-06-07 18:19:16
爬虫规则经常变化,19.8月份的规则如下,百度了一大推全是坑
商品链接:
主要用的参数是id
下面分析详情页面的数据来源
找到数据源咯,接下来看数据是怎么拿出来的。
当然是查看源码啦
拿到两个地址啦啦啦啦
还是带个随即参数
$url = "https://item.taobao.com/item.htm?id=600321893997&tbpm=3&ra=".mt_rand(100000,999999);
data = controller("common")->http_curl($url);
$data = mb_convert_encoding($data, 'UTF-8', 'UTF-8,GBK,GB2312,BIG5');
preg_match("/dscnew.taobao.com.+?'/i", $data, $matches1);
preg_match("/descnew.taobao.com.+?'/i", $data, $matches2);
$matches1_ = substr($matches1[0],0,-1);
$matches2_ = substr($matches2[0],0,-1);
$data1 = controller("common")->http_curl($matches2_);
$data1 = mb_convert_encoding($data1, 'UTF-8', 'UTF-8,GBK,GB2312,BIG5');
echo substr($data1,10,-3)."<br>";
preg_match_all("/src=\"(.+?)\"/i", $data1, $matches3);
/**
*
* @param type $url
* @param type $type
* @param type $arr
* @return type
*/
public function http_curl($url, $type = 'get', $arr = '') {
if($arr){
$o = "";
foreach ( $arr as $k => $v )
{
$o.= "$k=" . urlencode( $v ). "&" ;
}
$arr = substr($o,0,-1);
}
$ch = curl_init();
$headers = array(
"cache-control: no-cache"
);
$user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36";
curl_setopt($ch, CURLOPT_URL, $url); //设置访问的地址
curl_setopt($ch, CURLOPT_USERAGENT,$user_agent);
// curl_setopt($ch, CURLOPT_HTTPHEADER,$headers);
// curl_setopt($ch, CURLOPT_HEADER, 1 );
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); //获取的信息返回
curl_setopt ($ch, CURLOPT_REFERER, "www.baidu.com");
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
// curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
// curl_setopt($ch,CURLOPT_COOKIE,$cookie);
if ($type == 'post') {
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $arr);
}
$output = curl_exec($ch);
if (curl_error($ch)) {
return curl_error($ch);
}
return $output;
}