www.gusucode.com > iWebshop开源PHP商城系统 v2.8源码程序 > iwebshop/plugins/collect/jd_collect.php
<?php include(dirname(__FILE__).'/collect.php'); /** * @brief 京东商品列表采集器,适用于普通类,不可再分类的列表 * @author nswe * @date 2013/12/25 13:52:19 */ class jd_collect extends collect { //属性数据 private $cacheAttrData = array(); //读取超时设置 public $httpContext = array( 'http' => array( 'method' => "GET", 'timeout' => 350, ) ); /** * @brief 构造函数 */ public function __construct() { $this->context = stream_context_create($this->httpContext); } /** * @brief 检查列表url */ public function checkListUrl($url) { return strpos($url,'http://list.jd.com/list.html?cat=') === false ? false : true; } /** * @brief 检查详情url */ public function checkShowUrl($url) { return strpos($url,'http://item.jd.com') === false ? false : true; } /** * @brief 挑选分类 * @return array 根据层次返回分类 */ public function pickCatFromList() { $catExp = '|<strong><a href=.+?</a></span>|'; preg_match($catExp,$this->listPageHtml,$match); if(!isset($match[0])) { throw new Exception('页面缺少商品分类'); } return explode(' > ',trim(strip_tags($match[0]))); } /** * @brief 挑选属性 * @return array 属性数据 */ public function pickAttributeFromList() { $keyExp = '@<div class="a-key">(.+?)</div>@'; $valueExp = '@<ul class="f-list">(.+?)</ul>@'; preg_match_all($keyExp,$this->listPageHtml,$matchKey); preg_match_all($valueExp,$this->listPageHtml,$matchValue); if(!isset($matchKey[1]) && !isset($matchValue[1])) { $keyExp = '@<div class=\\\\"a-key\\\\">(.+?)</div>@'; $valueExp = '@<ul class=\\\\"f-list\\\\">(.+?)</ul>@'; preg_match_all($keyExp,$this->listPageHtml,$matchKey); preg_match_all($valueExp,$this->listPageHtml,$matchValue); } //过滤无用的数据 array_shift($matchKey[1]);//移除品牌 array_shift($matchKey[1]);//移除价格 array_shift($matchValue[1]);//移除价格 $attrData = array(); foreach($matchKey[1] as $key => $val) { $attrData[trim($val,':')] = trim(strip_tags(strtr($matchValue[1][$key],array('</li>' => '</li>,'))),','); } return $attrData; } /** * @brief 挑选列表页面的商品连接 * @return array 商品详情的url */ public function pickGoodsLinkFromList() { $linkExp = '@<div class="p-img"><a target="_blank" href="(.+?)"@'; preg_match_all($linkExp,$this->listPageHtml,$match); if(!isset($match[1])) { $linkExp = '@<div class=\\\\"p-img\\\\"><a target=\\\\"_blank\\\\" href=\\\\"(.+?)\\\\"@'; preg_match_all($linkExp,$this->listPageHtml,$match); } return $match[1]; } /** * @brief 获取商品名称从详情页面 * @return string 商品名字 */ public function pickGoodsNameFromShow() { $exp = '@<h1>.+?</h1>@'; preg_match($exp,$this->showPageHtml,$match); if(!isset($match[0])) { throw new Exception('没有找到商品名称'); } return strip_tags($match[0]); } /** * @brief 获取商品价格从API * @param $idArray string 商品id数组,如:J_970602 * @return string 商品价格json */ public function getGoodsPriceFromAPI($idString) { $apiUrl = 'http://p.3.cn/prices/mgets?skuIds='.trim($idString,','); $result = file_get_contents($apiUrl,false,$this->context); $result = strtr($result,array('J_' => '')); return JSON::decode($result); } /** * @brief 获取商品属性从详情页面 * @return string 商品某属性 */ public function pickGoodsAttributeFromShow() { $exp = '@<ul class="detail-list">(.+?)</ul>@s'; preg_match($exp,$this->showPageHtml,$match); if(!isset($match[1])) { throw new Exception('没有找到商品属性'); } $match[1] = trim(strip_tags(strtr($match[1],array('<li>' => '</li>,')))); $tempArray = explode(',',$match[1]); $attrArray = array(); $tmp = array(); foreach($tempArray as $key => $val) { $tmp = explode(':',$val); $attrArray[$tmp[0]] = trim($tmp[1]); } return $this->cacheAttrData = $attrArray; } /** * @brief 获取商品图片从详情页面 * @return array 商品的图片url */ public function pickGoodsImageFromShow() { $exp = '@data-url="(.+?)"@'; preg_match_all($exp,$this->showPageHtml,$match); if(!isset($match[1]) || !is_array($match[1])) { throw new Exception('没有找到商品图片'); } $jdImageServerPre = 'http://img13.360buyimg.com/n0/'; foreach($match[1] as $key => $val) { $match[1][$key] = $jdImageServerPre.$val; } return $match[1]; } /** * @brief 获取商品规格从详情页面 * @return array 商品的规格 array(规格名称=>规格值) */ public function pickGoodsSpecFromShow() { $exp = '@<li id="choose-(?:version|color)".*?>.*?</li>@s'; preg_match_all($exp,$this->showPageHtml,$match); $result = array(); if(isset($match[0]) && $match[0]) { foreach($match[0] as $key => $val) { $val = trim(strip_tags(strtr($val,array('</a>' => '</a>,'))),','); $temp = explode(':',$val); if(isset($temp[1])) { $result[$temp[0]] = $temp[1]; } } } return $result; } /** * @brief 获取商品详情从详情页面 * @return string 商品的详情数据 */ public function pickGoodsContentFromShow() { $exp = '@<div class="detail-content">.*<!--product-detail end-->@s'; preg_match($exp,$this->showPageHtml,$match); if(!isset($match[0])) { throw new Exception('没有找到商品详情'); } return strtr($match[0],array('data-lazyload' => 'src')); } /** * @brief 获取商品重量 * @return string 商品重量 */ public function pickGoodsWeightFromShow() { if(!$this->cacheAttrData) { $this->pickGoodsAttributeFromShow(); } preg_match('@[\d\.]+@',$this->cacheAttrData['商品毛重'],$matchAttr); return isset($matchAttr[0]) ? $matchAttr[0] : 0; } /** * @brief 获取商品计量单位 * @return string 计量单位 */ public function pickGoodsUnitFromShow() { if(!$this->cacheAttrData) { $this->pickGoodsAttributeFromShow(); } preg_match('@[\d\.]+(.*)$@',$this->cacheAttrData['商品毛重'],$matchAttr); return isset($matchAttr[1]) ? $matchAttr[1] : '千克'; } /** * @brief 开始采集商品 * @param int $num 采集数量 * @return array('cat' => '商品分类','attr' => '属性','item' => array( * 'goods_no' => '商品编号','up_time' => '上架时间','weight' => '重量','unit' => '计量单位','name' => '商品名字','price' => '商品价格','img' => array(商品图片),'content' => '商品详情','spec' => '商品规格','attr' => '商品属性' * )) */ public function collect($num = 20) { $result = array( 'cat' => $this->pickCatFromList(), 'attr'=> $this->pickAttributeFromList(), 'item'=> array() ); $goodsUrl = $this->pickGoodsLinkFromList(); foreach($goodsUrl as $key => $val) { if($num > 0 && $key >= $num) { break; } $this->readShowPage($val); preg_match('@\d+@',$val,$match); $priceObj = $this->getGoodsPriceFromAPI('J_'.$match[0]); $attrData = $this->pickGoodsAttributeFromShow(); $result['item'][] = array( 'goods_no' => $attrData['商品编号'], 'up_time' => $attrData['上架时间'], 'weight' => $this->pickGoodsWeightFromShow(), 'unit' => $this->pickGoodsUnitFromShow(), 'name' => $this->pickGoodsNameFromShow(), 'price' => $priceObj[0]['p'], 'img' => $this->pickGoodsImageFromShow(), 'content'=> $this->pickGoodsContentFromShow(), 'spec' => $this->pickGoodsSpecFromShow(), 'attr' => $attrData ); } return $result; } }