I just downloaded and tried vcd-db a couple of days ago and think it's really great fun to be able to have this personal movie db. Seems that the IMDB fetch was a bit broken in the latest release (0991). The forum only seemed to contain a few earlier fixes to some of the fetch strings so I set out to make my own fix for scrubbing the data of an IMDB page. I've seen a few good posts from other users requesting an update since the IMDB facelift, so instead of spamming the forum I'll put this in a new thread with an easy enough subject for people to find.
Now, I'm really not that great at php coding, so there are probably better ways to write these changes... but hey, it works even if there might be a little overhead
I'll just post the contents of my VCDFetch_imdb.php file with all the changes. Just copy/paste if you've not done any custom work on your file (file is located in: /classes/fetch)
here goes:
- Code: Select all
<?php
/**
* VCD-db - a web based VCD/DVD Catalog system
* Copyright (C) 2003-2007 Konni - konni.com
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or (at
* your option) any later version.
*
* @author Hákon Birgsson <konni@konni.com>
* @package Kernel
* @subpackage WebFetch
* @version $Id$
*/
?>
<?php
class VCDFetch_imdb extends VCDFetch {
protected $regexArray = array(
'title' => '<h1 class="header">([^\<]*)<span>',
'year' => '(<a href="/year/([0-9]{4})/">([0-9]{4})</a>)',
'poster' => '<a href="/media/rm[0-9]+/tt[0-9]+"><img src="([^<]*)"[^"]*height="([0-9]{2,3})"[^"]*width="([0-9]{2,3})"[^"]*alt="([^<]*)"[^"]*title="([^<]*)" /></a>',
'director' => 'Director[^"]*<a href="/name/nm[0-9]+/">([^\<]*)</a>',
'genre' => '<a href="/genre/[a-zA-Z\\-]*">([a-zA-Z\\-]*)</a>',
'rating' => '<span class="rating-rating">([0-9]).([0-9])<span>/10</span></span>',
'cast' => NULL, // The cast is populated in the fetchDeeper() function
'runtime' => '([0-9]+) min',
'aka' => '<h4 class="inline">Also Known As:</h4> ([^\<]*)<span class="see-more inline"><a href="releaseinfo#akas">See more</a></span>[^"]*</div>',
'country' => 'Country:[^"]*<a href="/country/[^"]*">([^\<]*)</a>',
'plot' => '<h2>Storyline</h2>[^"]*<p>([^<]*)<em class="nobr">'
);
protected $multiArray = array(
'genre', 'cast', 'akas', 'country'
);
private $servername = 'akas.imdb.com';
private $searchpath = '/find?s=tt&q=[$]';
private $itempath = '/title/tt[$]/';
public function __construct() {
$this->setSiteName("imdb");
$this->setFetchUrls($this->servername, $this->searchpath, $this->itempath);
$this->useSnoopy();
}
public function search($title) {
return parent::search($title);
}
public function showSearchResults() {
$this->setMaxSearchResults(50);
$regx = '<a href=\"\/title\/tt([0-9]+)\/([^\<]*)\">([^\<]*)</a>[^(]*\(([0-9]{4}(/I+)?)\)';
$results = parent::generateSimpleSearchResults($regx, 1, 3, 4);
return parent::generateSearchSelection($results);
}
protected function processResults() {
if (!is_array($this->workerArray) || sizeof($this->workerArray) == 0) {
$this->setErrorMsg("No results to process.");
return;
}
$obj = new imdbObj();
$obj->setIMDB($this->getItemID());
foreach ($this->workerArray as $key => $data) {
$entry = $data[0];
$arrData = $data[1];
switch ($entry) {
case 'title':
$title = $arrData[1];
$obj->setTitle($title);
break;
case 'year':
$year = $arrData[2];
$obj->setYear($year);
break;
case 'poster':
$poster = $arrData[1];
$obj->setImage($poster);
break;
case 'director':
$director = $arrData[1];
$obj->setDirector($director);
break;
case 'genre':
$arr = array();
foreach ($arrData as $item) {
array_push($arr, $item[1]);
}
$obj->setGenre($arr);
break;
case 'rating':
$rating = $arrData[1].$arrData[2];
$rating = $rating/10;
$obj->setRating($rating);
break;
case 'cast':
// The cast list has been populated in the fetchDeeper function
if (is_array($arrData)) {
$obj->setCast($arrData);
}
break;
case 'runtime':
$runtime = $arrData[1];
$obj->setRuntime($runtime);
break;
case 'aka':
$akaTitles = $arrData[1];
$obj->setAltTitle($akaTitles);
break;
case 'plot':
if (is_array($arrData)) {
$plot = trim($arrData[1]);
} elseif (is_string($arrData)) {
$plot = trim($arrData);
}
$obj->setPlot($plot);
break;
case 'country':
if (sizeof($arrData) > 0) {
$arrCountries = array();
foreach ($arrData as $itemArr) {
array_push($arrCountries, $itemArr[1]);
}
$obj->setCountry($arrCountries);
}
break;
default:
break;
}
}
$this->fetchedObj = $obj;
}
protected function fetchDeeper($entry) {
switch ($entry) {
case 'cast':
$castList = NULL;
$regx = '/<a href="\/name\/nm[0-9]+\/">([^<]*)<\/a>[^"]*<\/td>[^"]*<td class="ellipsis">[^"]*<\/td>[^"]*<td class="character">[^"]*<div>[\r\t\n\f\x0B\0 ]*([^<\r\t\n\f\x0B\0]*)[ \r\t\n\f\x0B\0 ]*<\/div>[^"]*<\/td>/';
$regy = '/<a href="\/name\/nm[0-9]+\/">([^<]*)<\/a>[^"]*<\/td>[^"]*<td class="ellipsis">[^"]*...[^"]*<\/td>[^"]*<td class="character">[^"]*<div>[^"]*<a href="\/character\/ch[0-9]+\/">([^<]*)<\/a>/';
preg_match_all($regy, $this->getContents(), $matches);
if (is_array($matches) && sizeof($matches)>0) {
$actors = $matches[1];
$roles = $matches[2];
$castList = array();
for($i=0;$i<sizeof($actors);$i++) {
$pair = $actors[$i].' .... ' . strip_tags($roles[$i]);
$castList[] = $pair;
}
$matches = NULL;
}
preg_match_all($regx, $this->getContents(), $matches);
if (is_array($matches) && sizeof($matches)>0) {
$actors = $matches[1];
$roles = $matches[2];
if (!is_array($castList)) {
$castList = array();
}
for($i=0;$i<sizeof($actors);$i++) {
$pair = $actors[$i].' .... ' . strip_tags($roles[$i]);
$castList[] = $pair;
}
}
if (is_array($castList)) {
array_push($this->workerArray, array($entry, $castList));
}
break;
case 'poster':
$regx = '<a href="/media/rm[0-9]+/tt[0-9]+"><img src="([^<]*)"[^"]*height="([0-9]{2,3})"[^"]*width="([0-9]{2,3})"[^"]*alt="([^<]*)"[^"]*title="([^<]*)" /></a>';
if ($this->getItem($regx) == self::ITEM_OK) {
$res = $this->getFetchedItem();
}
break;
case 'akas':
$ret = array();
$contents = $this->getContents();
if(eregi('Also Known As:</b><br>(.*)<b class="ch"><a href="/mpaa">MPAA</a>',$contents, $y)) {
$contents = $y[0];
while(eregi('<br>([^<]*)', $contents, $x)) {
if (isset($x[1]) && strcmp(trim($x[1]),"") != 0) {
$ret[] = trim($x[1]);
}
$contents = substr($contents,strpos($contents,$x[0])+strlen($x[0]));
}
}
array_push($this->workerArray, array($entry, $ret));
break;
case 'plot':
// Save the old buffer
$itemBuffer = $this->getContents();
// Generate urls
$plotUrl = str_replace('[$]', $this->getItemID(), $this->itempath).'plotsummary';
$referer = "http://".$this->servername.str_replace('[$]', $this->getItemID(), $this->itempath);
$isPlot = $this->fetchPage($this->servername, $plotUrl, $referer);
if ($isPlot) {
$regxPlot = '<p class="plotpar">([^\<]*)<i>';
if ($this->getItem($regxPlot) == self::ITEM_OK) {
$plotArr = $this->getFetchedItem();
$plotText = trim($plotArr[1]);
array_push($this->workerArray, array($entry, $plotText));
} else {
// Plot not found, use the Tagline instead and use the old buffer again
$regExTagline = '<p>[^"]*<p>([^<]*)</p>[^"]*</p>[^"]*<div class="txt-block">[^"]*<h4 class="inline">';
$this->setContents($itemBuffer);
if ($this->getItem($regExTagline) == self::ITEM_OK ) {
$plotArr = $this->getFetchedItem();
$plotText = $plotArr[1];
array_push($this->workerArray, array($entry, $plotText));
}
}
}
break;
default:
break;
}
}
}
?>
