php提取html中指定div下a标签的text和href问题
已解决,有点凌乱,速度就行。
<?php
<?php
header('content-type:application/json;charset=utf8');
$url='http://www.hkxy.edu.cn/';
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_NOBODY, 0); // remove body
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36');
$response = curl_exec($ch); // 检查是否有错误发生
if(curl_errno($ch)) {
echo 'Curl error: ' . curl_error($ch);
} else{
echo htmlspecialchars($response);
}
curl_close($ch);
$response=iconv('gbk', 'utf-8', $response);
$response=str_replace(' ','',$response);
$pa = '%<div class="column4">(.*?)</div>%sim';
preg_match_all($pa,$response,$arr);
$pa = '%<a class="" href="(.*?)" title="(.*?)" target="_blank">(.*?)</a>%sim';
preg_match_all($pa,$response,$arr);
$result=array();
$number=count($arr[1]);
for($i=0;$i<$number;$i++){
$temp=explode('/',$arr[1][$i]);
//print_r($temp);exit();
$result[$temp[2]][$i]['src']='http://www.hkxy.edu.cn'.$arr[1][$i];
$result[$temp[2]][$i]['title']=$arr[2][$i];
}
echo JSON($result);
function arrayRecursive(&$array, $function, $apply_to_keys_also = false)
{
static $recursive_counter = 0;
if (++$recursive_counter >1000 ) {
die('possible deep recursion attack');
}
foreach ($array as $key => $value) {
if (is_array($value)) {
arrayRecursive($array[$key], $function, $apply_to_keys_also);
} else {
$array[$key] = $function($value);
}
if ($apply_to_keys_also && is_string($key)) {
$new_key = $function($key);
if ($new_key != $key) {
$array[$new_key] = $array[$key];
unset($array[$key]);
}
}
}
$recursive_counter--;
}
/**************************************************************
*
* 将数组转换为JSON字符串(兼容中文)
* @param array $array 要转换的数组
* @return string 转换得到的json字符串
* @access public
*
*************************************************************/
function JSON($array)
{
arrayRecursive($array, 'urlencode', true);
$json = json_encode($array);
return urldecode($json);
}
如图所示:
我想提取.offer_box_wide1下a元素的text和href怎么破?求教
蛋疼的孩子
9 years, 5 months ago
Answers
参考PHP Simple HTML DOM Parser
http://simplehtmldom.sourceforge.net/
可以像jquery选择器一样灵活操作html。
免贵姓喵名霸天
answered 9 years, 5 months ago