Created
October 31, 2023 16:40
-
-
Save mbunal/d4b2dcfd26a4927842fbcceb4b88e192 to your computer and use it in GitHub Desktop.
simple rss reader for php
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?php | |
| /** | |
| * RSS for PHP - small and easy-to-use library for consuming an RSS Feed | |
| * | |
| * @copyright Copyright (c) 2008 David Grudl | |
| * @license New BSD License | |
| * @version 1.5 | |
| */ | |
| class Feed | |
| { | |
| /** @var int */ | |
| public static $cacheExpire = '1 day'; | |
| /** @var string */ | |
| public static $cacheDir; | |
| /** @var string */ | |
| public static $userAgent = 'FeedFetcher-Google'; | |
| /** @var SimpleXMLElement */ | |
| protected $xml; | |
| /** | |
| * Loads RSS or Atom feed. | |
| * @param string | |
| * @param string | |
| * @param string | |
| * @return Feed | |
| * @throws FeedException | |
| */ | |
| public static function load($url, $user = null, $pass = null) | |
| { | |
| $xml = self::loadXml($url, $user, $pass); | |
| if ($xml->channel) { | |
| return self::fromRss($xml); | |
| } else { | |
| return self::fromAtom($xml); | |
| } | |
| } | |
| /** | |
| * Loads RSS feed. | |
| * @param string RSS feed URL | |
| * @param string optional user name | |
| * @param string optional password | |
| * @return Feed | |
| * @throws FeedException | |
| */ | |
| public static function loadRss($url, $user = null, $pass = null) | |
| { | |
| return self::fromRss(self::loadXml($url, $user, $pass)); | |
| } | |
| /** | |
| * Loads Atom feed. | |
| * @param string Atom feed URL | |
| * @param string optional user name | |
| * @param string optional password | |
| * @return Feed | |
| * @throws FeedException | |
| */ | |
| public static function loadAtom($url, $user = null, $pass = null) | |
| { | |
| return self::fromAtom(self::loadXml($url, $user, $pass)); | |
| } | |
| private static function fromRss(SimpleXMLElement $xml) | |
| { | |
| if (!$xml->channel) { | |
| throw new FeedException('Invalid feed.'); | |
| } | |
| self::adjustNamespaces($xml); | |
| foreach ($xml->channel->item as $item) { | |
| // converts namespaces to dotted tags | |
| self::adjustNamespaces($item); | |
| // generate 'url' & 'timestamp' tags | |
| $item->url = (string) $item->link; | |
| if (isset($item->{'dc:date'})) { | |
| $item->timestamp = strtotime($item->{'dc:date'}); | |
| } elseif (isset($item->pubDate)) { | |
| $item->timestamp = strtotime($item->pubDate); | |
| } | |
| } | |
| $feed = new self; | |
| $feed->xml = $xml->channel; | |
| return $feed; | |
| } | |
| private static function fromAtom(SimpleXMLElement $xml) | |
| { | |
| if (!in_array('http://www.w3.org/2005/Atom', $xml->getDocNamespaces(), true) | |
| && !in_array('http://purl.org/atom/ns#', $xml->getDocNamespaces(), true) | |
| ) { | |
| throw new FeedException('Invalid feed.'); | |
| } | |
| // generate 'url' & 'timestamp' tags | |
| foreach ($xml->entry as $entry) { | |
| $entry->url = (string) $entry->link['href']; | |
| $entry->timestamp = strtotime($entry->updated); | |
| } | |
| $feed = new self; | |
| $feed->xml = $xml; | |
| return $feed; | |
| } | |
| /** | |
| * Returns property value. Do not call directly. | |
| * @param string tag name | |
| * @return SimpleXMLElement | |
| */ | |
| public function __get($name) | |
| { | |
| return $this->xml->{$name}; | |
| } | |
| /** | |
| * Sets value of a property. Do not call directly. | |
| * @param string property name | |
| * @param mixed property value | |
| * @return void | |
| */ | |
| public function __set($name, $value) | |
| { | |
| throw new Exception("Cannot assign to a read-only property '$name'."); | |
| } | |
| /** | |
| * Converts a SimpleXMLElement into an array. | |
| * @param SimpleXMLElement | |
| * @return array | |
| */ | |
| public function toArray(SimpleXMLElement $xml = null) | |
| { | |
| if ($xml === null) { | |
| $xml = $this->xml; | |
| } | |
| if (!$xml->children()) { | |
| return (string) $xml; | |
| } | |
| $arr = []; | |
| foreach ($xml->children() as $tag => $child) { | |
| if (count($xml->$tag) === 1) { | |
| $arr[$tag] = $this->toArray($child); | |
| } else { | |
| $arr[$tag][] = $this->toArray($child); | |
| } | |
| } | |
| return $arr; | |
| } | |
| /** | |
| * Load XML from cache or HTTP. | |
| * @param string | |
| * @param string | |
| * @param string | |
| * @return SimpleXMLElement | |
| * @throws FeedException | |
| */ | |
| private static function loadXml($url, $user, $pass) | |
| { | |
| $e = self::$cacheExpire; | |
| $cacheFile = self::$cacheDir . '/feed.' . md5(serialize(func_get_args())) . '.xml'; | |
| if (self::$cacheDir | |
| && (time() - @filemtime($cacheFile) <= (is_string($e) ? strtotime($e) - time() : $e)) | |
| && $data = @file_get_contents($cacheFile) | |
| ) { | |
| // ok | |
| } elseif ($data = trim(self::httpRequest($url, $user, $pass))) { | |
| if (self::$cacheDir) { | |
| file_put_contents($cacheFile, $data); | |
| } | |
| } elseif (self::$cacheDir && $data = @file_get_contents($cacheFile)) { | |
| // ok | |
| } else { | |
| throw new FeedException('Cannot load feed.'); | |
| } | |
| return new SimpleXMLElement($data, LIBXML_NOWARNING | LIBXML_NOERROR | LIBXML_NOCDATA); | |
| } | |
| /** | |
| * Process HTTP request. | |
| * @param string | |
| * @param string | |
| * @param string | |
| * @return string|false | |
| * @throws FeedException | |
| */ | |
| private static function httpRequest($url, $user, $pass) | |
| { | |
| if (extension_loaded('curl')) { | |
| $curl = curl_init(); | |
| curl_setopt($curl, CURLOPT_URL, $url); | |
| if ($user !== null || $pass !== null) { | |
| curl_setopt($curl, CURLOPT_USERPWD, "$user:$pass"); | |
| } | |
| curl_setopt($curl, CURLOPT_USERAGENT, self::$userAgent); // some feeds require a user agent | |
| curl_setopt($curl, CURLOPT_HEADER, false); | |
| curl_setopt($curl, CURLOPT_TIMEOUT, 20); | |
| curl_setopt($curl, CURLOPT_ENCODING, ''); | |
| curl_setopt($curl, CURLOPT_RETURNTRANSFER, true); // no echo, just return result | |
| curl_setopt($curl, CURLOPT_USERAGENT, ''); | |
| if (!ini_get('open_basedir')) { | |
| curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true); // sometime is useful :) | |
| } | |
| $result = curl_exec($curl); | |
| return curl_errno($curl) === 0 && curl_getinfo($curl, CURLINFO_HTTP_CODE) === 200 | |
| ? $result | |
| : false; | |
| } else { | |
| $context = null; | |
| if ($user !== null && $pass !== null) { | |
| $options = [ | |
| 'http' => [ | |
| 'method' => 'GET', | |
| 'header' => 'Authorization: Basic ' . base64_encode($user . ':' . $pass) . "\r\n", | |
| ], | |
| ]; | |
| $context = stream_context_create($options); | |
| } | |
| return file_get_contents($url, false, $context); | |
| } | |
| } | |
| /** | |
| * Generates better accessible namespaced tags. | |
| * @param SimpleXMLElement | |
| * @return void | |
| */ | |
| private static function adjustNamespaces($el) | |
| { | |
| foreach ($el->getNamespaces(true) as $prefix => $ns) { | |
| if ($prefix === '') { | |
| continue; | |
| } | |
| $children = $el->children($ns); | |
| foreach ($children as $tag => $content) { | |
| $el->{$prefix . ':' . $tag} = $content; | |
| } | |
| } | |
| } | |
| } | |
| /** | |
| * An exception generated by Feed. | |
| */ | |
| class FeedException extends Exception | |
| { | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment