false, CURLOPT_CONNECTTIMEOUT => 15, CURLOPT_TIMEOUT => 15, CURLOPT_FOLLOWLOCATION => true, CURLOPT_MAXREDIRS => 3, CURLOPT_RETURNTRANSFER => 1, CURLOPT_FAILONERROR => true, CURLOPT_ENCODING => 'gzip,deflate', ); /** * Error strings */ private static $parseError = 'Parse error: Invalid RSS/XML document (%s). Please check RSS by http://feedvalidator.org/.'; private static $downloadError = 'Download error: %s'; private static $fileOpenErorr = 'File error: Unable to open file %s'; /** * If true, HTML will be stripped from HTML is suspected tags * @var boolean */ private $stripHtml = true; /** * Array of tags where HTML is suspected */ private $htmlSuspected = array('title', 'description'); private $channelTags = array('title', 'link', 'description', 'lastBuildDate'); private $itemTags = array('title', 'link', 'description', 'pubDate', 'guid'); /** * Maximum number of items to be returned */ private $itemsLimit = 0; /** * @var string Date time format for date related RSS items */ private $dateFormat = null; /** * XML name spaces */ private static $xmlns = array( // @todo: asi se nikde nepouziva - smazat 'atom' => 'http://www.w3.org/2005/Atom', 'content' => 'http://purl.org/rss/1.0/modules/content/', 'wfw' => 'http://wellformedweb.org/CommentAPI/', 'dc' => 'http://purl.org/dc/elements/1.1/', 'sy' => 'http://purl.org/rss/1.0/modules/syndication/', 'slash' => 'http://purl.org/rss/1.0/modules/slash/', ); /** * XML name spaces used in used tags */ private $usedXmlns = array(); public $lastError = null; /** * Register all xml namespaces used in channel tags or item tags. */ private function registerNameSpaces(&$xpath) { foreach ($this->usedXmlns as $ns) { $xpath->registerNamespace($ns, ''); } } /** * Recalc array of all xml namespaces used in channel tags or item tags. */ private function calcNamespaces() { // Get all XML name spaces used in required tags $tags = implode(',', array_merge($this->channelTags, $this->itemTags)); preg_match_all("'([a-zA-Z0-9]+):'si", $tags, $namespaces); $this->usedXmlns = $namespaces[1]; } /** * Set required channel tags * @param array $tags (default is array('title', 'link', 'description') */ public function setChannelTags($tags) { $this->channelTags = $tags; $this->calcNamespaces(); return $this; } /** * Set required item tags * @param array $tags Default is array('title', 'link', 'description', 'pubDate', 'guid') */ public function setItemTags($tags) { $this->itemTags = $tags; $this->calcNamespaces(); return $this; } /** * Set date/time format for date related items (lastBuildDate, pubDate). * To disable date format conversion set date format to null (default value). * @param string $dateFormat Date/time format compatible with PHP function date(). */ public function setDateFormat($dateFormat) { $this->dateFormat = $dateFormat; return $this; } /** * Fotmat the input variable into $dateFormat * * If input value is not valid date, change input value to null. * If $dateFormat is specified, do nothing. * @param string $dateString */ private function formatDate(&$dateString) { if ($this->dateFormat) { $timeStamp = strtotime($dateString); $dateString = ($timeStamp) ? date($this->dateFormat, $timeStamp) : null; } } /** * Get URL content using CURL * @param $url String * @return String Returns XML string on success, FALSE on failure. */ private function loadUrl($url) // @todo: prejmenovat na downloadUrl() { if (!function_exists('curl_init')) { throw new Exception('CURL is not installed!'); } $ch = curl_init(); curl_setopt_array($ch, $this->curlOptions); curl_setopt($ch, CURLOPT_URL, $url); if (!($content = curl_exec($ch))) { $this->lastError = sprintf(self::$downloadError, curl_error($ch)); } curl_close($ch); return $content; } /** * Parse xmlData and return parsed result * */ private function parse(&$xmlData) { $result = array(); $doc = new DomDocument(); // Try to load XML or return parse error if (false == @$doc->loadXml($xmlData)) { // Try to fix XML by Tidy and try to load one more time $xmlData = tidy_repair_string($xmlData, array('output-xml' => true, 'input-xml' => true), 'utf8'); if (false == @$doc->loadXml($xmlData)) { $this->lastError = self::$parseError; return false; } } unset($xmlData); $xpath = new DOMXPath($doc); // Register xml name spaces $this->registerNamespaces($xpath); // Parse channel tags foreach($this->channelTags as &$tag) { $result[$tag] = $xpath->evaluate("string(channel/$tag)"); if ($tag == 'lastBuildDate') { $this->formatDate($result[$tag]); } } // Parse items // @todo: Zjistit, co bude rychlejsi //$items = $doc->getElementsByTagName('item'); $items = $xpath->evaluate('/rss/channel/item'); foreach($items as $item) { $tmpItem = array(); // Parse item tags foreach ($this->itemTags as &$tag) { switch ($tag) { case 'pubDate': $tmpItem[$tag] = $xpath->evaluate('string(pubDate)', $item); $this->formatDate($tmpItem[$tag]); break; case 'category': // Category is multivalue tag $tmpItem[$tag] = array(); foreach ($xpath->query('category', $item) as $node) { $tmpItem[$tag][] = trim($node->nodeValue); } break; case 'title': case 'description': $tmpItem[$tag] = $xpath->evaluate("string($tag)", $item); if ($this->stripHtml) { $tmpItem[$tag] = strip_tags($tmpItem[$tag]); $tmpItem[$tag] = html_entity_decode($tmpItem[$tag], ENT_QUOTES, 'UTF-8'); } $tmpItem[$tag] = trim($tmpItem[$tag]); break; default: $tmpItem[$tag] = trim($xpath->evaluate("string($tag)", $item)); break; } } $result['items'][] = $tmpItem; // If limit number of items is reached, stop processing remaining items if (count($result['items']) == $this->itemsLimit) { break; } } // Calc items count $result['itemsCount'] = isset($result['items']) ? count($result['items']) : 0; return $result; } /** * Fetch RSS/Atom feed from remote URL * @param string $url URL * @return array */ public function get($url) { $this->lastError = ''; // Get feed content $xmlData = $this->loadUrl($url); return $this->parse($xmlData); } /** * Fetch RSS/Atom feed from local file * @param string $filename Local file name * @return array */ public function getFile($filename) { $this->lastError = ''; $xmlData = file_get_contents($filename); return $this->parse($xmlData); } /** * Get last error message * @return string Error description */ public function getLastError() { return $this->lastError; } /** * @param int $limit Maximum number of items. Default is 0 which means "no limit". */ public function setItemsLimit($limit = 0) { $this->itemsLimit = (int) $limit; return $this; } }