Home » Wordpress » How to fetch full content from RSS url?

How to fetch full content from RSS url?

Posted by: admin November 30, 2017 Leave a comment

Questions:

I’m developing an RSS fetcher API which can fetch the full RSS content from RSS URL. I have searched a lot but I am unable to make it work. Now my API gets only short content (description) from RSS feed URL.

The code used by me:

 $rss = fetch_feed($entry->rss_link);
    $number_of_post=2;
    $iCount=0;
    foreach ($rss->get_items() as $item)
    {
        $content= $item->get_description();
        if($entry->remove_link){
           $content=strip_tags($content, '<p><div><i><b><u><img>');
        }
        $iCount++;
        $sfp_page = array(
            'post_title' => $item->get_title(),
            'post_status' => 'publish',
            'post_content' =>$content,
            'post_type' => 'sfp_forum',
            'post_author' => $entry->author_id,
            'post_date' => current_time('mysql')
        );
        $rss_id = wp_insert_post($sfp_page);

    }

$content doesn’t have the full content.

Any suggestions?

Answers:

You probably need to do something like the following because you are overwriting your $content on each iteration so you only get the last item.

$rss = fetch_feed($entry->rss_link);
$number_of_post=2;
$iCount=0;
$content = array();
foreach ($rss->get_items() as $item){
    $content[$iCount] = $item->get_description();
    if($entry->remove_link){
       $content[$iCount] = strip_tags($content[$iCount], '<p><div><i><b><u><img>');
    }
    $iCount++;
}
var_dump($content);

Questions:
Answers:

Here is the best code for fetching contents from rss feeds by Tarun Narula

<?php
$path = ABSPATH . 'wp-content/plugins/xmlrss/readability/Readability.php';
require_once ($path);

$url = '';//    Enter URL Here
$x = 'error';
$ch = curl_init ();

curl_setopt ( $ch, CURLOPT_HEADER, 0 );
curl_setopt ( $ch, CURLOPT_RETURNTRANSFER, 1 );
curl_setopt ( $ch, CURLOPT_CONNECTTIMEOUT, 10 );
curl_setopt ( $ch, CURLOPT_TIMEOUT, 200 );
curl_setopt ( $ch, CURLOPT_REFERER, 'http://www.bing.com/' );
curl_setopt ( $ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.8) Gecko/2009032609 Firefox/3.0.8' );
curl_setopt ( $ch, CURLOPT_MAXREDIRS, 20 ); // Good leeway for redirections.
@curl_setopt ( $ch, CURLOPT_FOLLOWLOCATION, 1 ); // Many login forms redirect at least once.
curl_setopt ( $ch, CURLOPT_HTTPGET, 1 );
curl_setopt ( $ch, CURLOPT_URL, trim ( html_entity_decode($url)  ) );
while ( trim ( $x ) != '' ) {
$html = curl_exec ( $ch );
$x = curl_error ( $ch );
}

$url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
$html = preg_replace('{<script.*?script>}s', '', $html);
$readability = new Readability ( $html, $url );
$readability->debug = false;
$result = $readability->init ();

if ($result) {
$contenti = $readability->getContent()->innerHTML;
if (function_exists('tidy_parse_string')) {
$tidy = tidy_parse_string($contenti, array('indent'=>true, 'show-body-only' => true), 'UTF8');
$tidy->cleanRepair();
$content = $tidy->value;
} 
unset($readability);
unset($result);
}