Scraping Links With PHP
By Justin Laing2008-01-06
Your Completed Link Scraper
function storeLink($url,$gathered_from) {
$query = "INSERT INTO links (url, gathered_from) VALUES ('$url', '$gathered_from')";
mysql_query($query) or die('Error, insert query failed');
}
$target_url = "http://www.merchantos.com/";
$userAgent = 'Googlebot/2.1 (http://www.googlebot.com/bot.html)';
// make the cURL request to $target_url
$ch = curl_init();
curl_setopt($ch, CURLOPT_USERAGENT, $userAgent);
curl_setopt($ch, CURLOPT_URL,$target_url);
curl_setopt($ch, CURLOPT_FAILONERROR, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_AUTOREFERER, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER,true);
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
$html= curl_exec($ch);
if (!$html) {
echo "
cURL error number:" .curl_errno($ch);
echo "
cURL error:" . curl_error($ch);
exit;
}
// parse the html into a DOMDocument
$dom = new DOMDocument();
@$dom->loadHTML($html);
// grab all the on the page
$xpath = new DOMXPath($dom);
$hrefs = $xpath->evaluate("/html/body//a");
for ($i = 0; $i < $hrefs->length; $i++) {
$href = $hrefs->item($i);
$url = $href->getAttribute('href');
storeLink($url,$target_url);
echo "
Link stored: $url";
}
Tutorial pages:
|
Originally posted on Makebeta
|
|||||||||
You might also want to check these out:
|
Leave a Comment on "Scraping Links With PHP"
You must be logged in to post a comment.
Link to This Tutorial Page!

