9 * @author Greg Beaver <cellog@php.net>
10 * @copyright 1997-2009 The Authors
11 * @license http://opensource.org/licenses/bsd-license.php New BSD License
12 * @link http://pear.php.net/package/PEAR
13 * @since File available since Release 1.4.0a1
17 * For downloading xml files
19 require_once 'PEAR.php';
20 require_once 'PEAR/XMLParser.php';
23 * Intelligently retrieve data, following hyperlinks if necessary, and re-directing
27 * @author Greg Beaver <cellog@php.net>
28 * @copyright 1997-2009 The Authors
29 * @license http://opensource.org/licenses/bsd-license.php New BSD License
30 * @version Release: 1.10.1
31 * @link http://pear.php.net/package/PEAR
32 * @since Class available since Release 1.4.0a1
39 function __construct(&$config, $options = array())
41 $this->config = &$config;
42 $this->_options = $options;
46 * Retrieve REST data, but always retrieve the local cache if it is available.
48 * This is useful for elements that should never change, such as information on a particular
50 * @param string full URL to this resource
51 * @param array|false contents of the accept-encoding header
52 * @param boolean if true, xml will be returned as a string, otherwise, xml will be
53 * parsed using PEAR_XMLParser
54 * @return string|array
56 function retrieveCacheFirst($url, $accept = false, $forcestring = false, $channel = false)
58 $cachefile = $this->config->get('cache_dir') . DIRECTORY_SEPARATOR .
59 md5($url) . 'rest.cachefile';
61 if (file_exists($cachefile)) {
62 return unserialize(implode('', file($cachefile)));
65 return $this->retrieveData($url, $accept, $forcestring, $channel);
69 * Retrieve a remote REST resource
70 * @param string full URL to this resource
71 * @param array|false contents of the accept-encoding header
72 * @param boolean if true, xml will be returned as a string, otherwise, xml will be
73 * parsed using PEAR_XMLParser
74 * @return string|array
76 function retrieveData($url, $accept = false, $forcestring = false, $channel = false)
78 $cacheId = $this->getCacheId($url);
79 if ($ret = $this->useLocalCache($url, $cacheId)) {
83 $file = $trieddownload = false;
84 if (!isset($this->_options['offline'])) {
85 $trieddownload = true;
86 $file = $this->downloadHttp($url, $cacheId ? $cacheId['lastChange'] : false, $accept, $channel);
89 if (PEAR::isError($file)) {
90 if ($file->getCode() !== -9276) {
94 $trieddownload = false;
95 $file = false; // use local copy if available on socket connect error
99 $ret = $this->getCache($url);
100 if (!PEAR::isError($ret) && $trieddownload) {
101 // reset the age of the cache if the server says it was unmodified
102 $result = $this->saveCache($url, $ret, null, true, $cacheId);
103 if (PEAR::isError($result)) {
104 return PEAR::raiseError($result->getMessage());
111 if (is_array($file)) {
113 $lastmodified = $file[1];
117 $lastmodified = false;
122 $result = $this->saveCache($url, $content, $lastmodified, false, $cacheId);
123 if (PEAR::isError($result)) {
124 return PEAR::raiseError($result->getMessage());
130 if (isset($headers['content-type'])) {
131 $content_type = explode(";", $headers['content-type']);
132 $content_type = $content_type[0];
133 switch ($content_type) {
135 case 'application/xml' :
137 if ($content_type === 'text/plain') {
138 $check = substr($content, 0, 5);
139 if ($check !== '<?xml') {
144 $parser = new PEAR_XMLParser;
145 PEAR::pushErrorHandling(PEAR_ERROR_RETURN);
146 $err = $parser->parse($content);
147 PEAR::popErrorHandling();
148 if (PEAR::isError($err)) {
149 return PEAR::raiseError('Invalid xml downloaded from "' . $url . '": ' .
152 $content = $parser->getData();
155 // use it as a string
159 $parser = new PEAR_XMLParser;
160 $parser->parse($content);
161 $content = $parser->getData();
164 $result = $this->saveCache($url, $content, $lastmodified, false, $cacheId);
165 if (PEAR::isError($result)) {
166 return PEAR::raiseError($result->getMessage());
172 function useLocalCache($url, $cacheid = null)
174 if ($cacheid === null) {
175 $cacheidfile = $this->config->get('cache_dir') . DIRECTORY_SEPARATOR .
176 md5($url) . 'rest.cacheid';
177 if (!file_exists($cacheidfile)) {
181 $cacheid = unserialize(implode('', file($cacheidfile)));
184 $cachettl = $this->config->get('cache_ttl');
185 // If cache is newer than $cachettl seconds, we use the cache!
186 if (time() - $cacheid['age'] < $cachettl) {
187 return $this->getCache($url);
193 function getCacheId($url)
195 $cacheidfile = $this->config->get('cache_dir') . DIRECTORY_SEPARATOR .
196 md5($url) . 'rest.cacheid';
198 if (!file_exists($cacheidfile)) {
202 $ret = unserialize(implode('', file($cacheidfile)));
206 function getCache($url)
208 $cachefile = $this->config->get('cache_dir') . DIRECTORY_SEPARATOR .
209 md5($url) . 'rest.cachefile';
211 if (!file_exists($cachefile)) {
212 return PEAR::raiseError('No cached content available for "' . $url . '"');
215 return unserialize(implode('', file($cachefile)));
219 * @param string full URL to REST resource
220 * @param string original contents of the REST resource
221 * @param array HTTP Last-Modified and ETag headers
222 * @param bool if true, then the cache id file should be regenerated to
223 * trigger a new time-to-live value
225 function saveCache($url, $contents, $lastmodified, $nochange = false, $cacheid = null)
227 $cache_dir = $this->config->get('cache_dir');
228 $d = $cache_dir . DIRECTORY_SEPARATOR . md5($url);
229 $cacheidfile = $d . 'rest.cacheid';
230 $cachefile = $d . 'rest.cachefile';
232 if (!is_dir($cache_dir)) {
233 if (System::mkdir(array('-p', $cache_dir)) === false) {
234 return PEAR::raiseError("The value of config option cache_dir ($cache_dir) is not a directory and attempts to create the directory failed.");
238 if (!is_writeable($cache_dir)) {
239 // If writing to the cache dir is not going to work, silently do nothing.
240 // An ugly hack, but retains compat with PEAR 1.9.1 where many commands
241 // work fine as non-root user (w/out write access to default cache dir).
245 if ($cacheid === null && $nochange) {
246 $cacheid = unserialize(implode('', file($cacheidfile)));
249 $idData = serialize(array(
251 'lastChange' => ($nochange ? $cacheid['lastChange'] : $lastmodified),
254 $result = $this->saveCacheFile($cacheidfile, $idData);
255 if (PEAR::isError($result)) {
257 } elseif ($nochange) {
261 $result = $this->saveCacheFile($cachefile, serialize($contents));
262 if (PEAR::isError($result)) {
263 if (file_exists($cacheidfile)) {
264 @unlink($cacheidfile);
273 function saveCacheFile($file, $contents)
275 $len = strlen($contents);
277 $cachefile_fp = @fopen($file, 'xb'); // x is the O_CREAT|O_EXCL mode
278 if ($cachefile_fp !== false) { // create file
279 if (fwrite($cachefile_fp, $contents, $len) < $len) {
280 fclose($cachefile_fp);
281 return PEAR::raiseError("Could not write $file.");
283 } else { // update file
284 $cachefile_fp = @fopen($file, 'r+b'); // do not truncate file
285 if (!$cachefile_fp) {
286 return PEAR::raiseError("Could not open $file for writing.");
290 $not_symlink = !is_link($file); // see bug #18834
292 $cachefile_lstat = lstat($file);
293 $cachefile_fstat = fstat($cachefile_fp);
294 $not_symlink = $cachefile_lstat['mode'] == $cachefile_fstat['mode']
295 && $cachefile_lstat['ino'] == $cachefile_fstat['ino']
296 && $cachefile_lstat['dev'] == $cachefile_fstat['dev']
297 && $cachefile_fstat['nlink'] === 1;
301 ftruncate($cachefile_fp, 0); // NOW truncate
302 if (fwrite($cachefile_fp, $contents, $len) < $len) {
303 fclose($cachefile_fp);
304 return PEAR::raiseError("Could not write $file.");
307 fclose($cachefile_fp);
308 $link = function_exists('readlink') ? readlink($file) : $file;
309 return PEAR::raiseError('SECURITY ERROR: Will not write to ' . $file . ' as it is symlinked to ' . $link . ' - Possible symlink attack');
313 fclose($cachefile_fp);
318 * Efficiently Download a file through HTTP. Returns downloaded file as a string in-memory
319 * This is best used for small files
321 * If an HTTP proxy has been configured (http_proxy PEAR_Config
322 * setting), the proxy will be used.
324 * @param string $url the URL to download
325 * @param string $save_dir directory to save file in
326 * @param false|string|array $lastmodified header values to check against for caching
327 * use false to return the header values from this download
328 * @param false|array $accept Accept headers to send
329 * @return string|array Returns the contents of the downloaded file or a PEAR
330 * error on failure. If the error is caused by
331 * socket-related errors, the error object will
332 * have the fsockopen error code available through
333 * getCode(). If caching is requested, then return the header
338 function downloadHttp($url, $lastmodified = null, $accept = false, $channel = false)
340 static $redirect = 0;
341 // always reset , so we are clean case of error
342 $wasredirect = $redirect;
345 $info = parse_url($url);
346 if (!isset($info['scheme']) || !in_array($info['scheme'], array('http', 'https'))) {
347 return PEAR::raiseError('Cannot download non-http URL "' . $url . '"');
350 if (!isset($info['host'])) {
351 return PEAR::raiseError('Cannot download from non-URL "' . $url . '"');
354 $host = isset($info['host']) ? $info['host'] : null;
355 $port = isset($info['port']) ? $info['port'] : null;
356 $path = isset($info['path']) ? $info['path'] : null;
357 $schema = (isset($info['scheme']) && $info['scheme'] == 'https') ? 'https' : 'http';
359 $proxy_host = $proxy_port = $proxy_user = $proxy_pass = '';
360 if ($this->config->get('http_proxy')&&
361 $proxy = parse_url($this->config->get('http_proxy'))
363 $proxy_host = isset($proxy['host']) ? $proxy['host'] : null;
364 if ($schema === 'https') {
365 $proxy_host = 'ssl://' . $proxy_host;
368 $proxy_port = isset($proxy['port']) ? $proxy['port'] : 8080;
369 $proxy_user = isset($proxy['user']) ? urldecode($proxy['user']) : null;
370 $proxy_pass = isset($proxy['pass']) ? urldecode($proxy['pass']) : null;
371 $proxy_schema = (isset($proxy['scheme']) && $proxy['scheme'] == 'https') ? 'https' : 'http';
375 $port = (isset($info['scheme']) && $info['scheme'] == 'https') ? 443 : 80;
378 if (isset($proxy['host'])) {
379 $request = "GET $url HTTP/1.1\r\n";
381 $request = "GET $path HTTP/1.1\r\n";
384 $request .= "Host: $host\r\n";
385 $ifmodifiedsince = '';
386 if (is_array($lastmodified)) {
387 if (isset($lastmodified['Last-Modified'])) {
388 $ifmodifiedsince = 'If-Modified-Since: ' . $lastmodified['Last-Modified'] . "\r\n";
391 if (isset($lastmodified['ETag'])) {
392 $ifmodifiedsince .= "If-None-Match: $lastmodified[ETag]\r\n";
395 $ifmodifiedsince = ($lastmodified ? "If-Modified-Since: $lastmodified\r\n" : '');
398 $request .= $ifmodifiedsince .
399 "User-Agent: PEAR/1.10.1/PHP/" . PHP_VERSION . "\r\n";
401 $username = $this->config->get('username', null, $channel);
402 $password = $this->config->get('password', null, $channel);
404 if ($username && $password) {
405 $tmp = base64_encode("$username:$password");
406 $request .= "Authorization: Basic $tmp\r\n";
409 if ($proxy_host != '' && $proxy_user != '') {
410 $request .= 'Proxy-Authorization: Basic ' .
411 base64_encode($proxy_user . ':' . $proxy_pass) . "\r\n";
415 $request .= 'Accept: ' . implode(', ', $accept) . "\r\n";
418 $request .= "Accept-Encoding:\r\n";
419 $request .= "Connection: close\r\n";
422 if ($proxy_host != '') {
423 $fp = @fsockopen($proxy_host, $proxy_port, $errno, $errstr, 15);
425 return PEAR::raiseError("Connection to `$proxy_host:$proxy_port' failed: $errstr", -9276);
428 if ($schema === 'https') {
429 $host = 'ssl://' . $host;
432 $fp = @fsockopen($host, $port, $errno, $errstr);
434 return PEAR::raiseError("Connection to `$host:$port' failed: $errstr", $errno);
438 fwrite($fp, $request);
442 while ($line = trim(fgets($fp, 1024))) {
443 if (preg_match('/^([^:]+):\s+(.*)\s*\\z/', $line, $matches)) {
444 $headers[strtolower($matches[1])] = trim($matches[2]);
445 } elseif (preg_match('|^HTTP/1.[01] ([0-9]{3}) |', $line, $matches)) {
446 $reply = (int)$matches[1];
447 if ($reply == 304 && ($lastmodified || ($lastmodified === false))) {
451 if (!in_array($reply, array(200, 301, 302, 303, 305, 307))) {
452 return PEAR::raiseError("File $schema://$host:$port$path not valid (received: $line)");
458 if (!isset($headers['location'])) {
459 return PEAR::raiseError("File $schema://$host:$port$path not valid (redirected but no location)");
462 if ($wasredirect > 4) {
463 return PEAR::raiseError("File $schema://$host:$port$path not valid (redirection looped more than 5 times)");
466 $redirect = $wasredirect + 1;
467 return $this->downloadHttp($headers['location'], $lastmodified, $accept, $channel);
470 $length = isset($headers['content-length']) ? $headers['content-length'] : -1;
473 while ($chunk = @fread($fp, 8192)) {
478 if ($lastmodified === false || $lastmodified) {
479 if (isset($headers['etag'])) {
480 $lastmodified = array('ETag' => $headers['etag']);
483 if (isset($headers['last-modified'])) {
484 if (is_array($lastmodified)) {
485 $lastmodified['Last-Modified'] = $headers['last-modified'];
487 $lastmodified = $headers['last-modified'];
491 return array($data, $lastmodified, $headers);