CustomFeed::Nowa

うちも、nowaはじめました。http://hakobe.nowa.jp/です。

まぁ、いろいろあってCustomFeed::Nowa書いた、ということをnowaに書いた。ここからちょっとアップデートしたのでこっちに貼付け。ちなみにhttp://my.nowa.jp/friend/をCustomFeedします。

こんな感じに使える。

plugins:
  - module: CustomFeed::Nowa
    config:
      id: id
      password: password
      article: 1
      nanishiteru: 0

  - module: Publish::Feed
    config:
      type: RSS
      dir: /path/to/feeds
      filename: nowa.xml

nowaの方にも書いたけどCookie使うのにPlaggerの機構がうまく使えないのでがんばってログインしてCookieとってきてます。Web::Scraper++。あと、DateTime::Durationとか。

package Plagger::Plugin::CustomFeed::Nowa;
use strict;
use warnings;
use utf8;
use base qw( Plagger::Plugin );

use Web::Scraper;
use HTTP::Request::Common;
use HTTP::Cookies;
use DateTime::Duration;

use Plagger::UserAgent;
use Plagger::Util qw( decode_content );

sub register {
    my($self, $context) = @_;
    $context->register_hook(
        $self,
        'subscription.load' => \&load,
    );
}

sub load {
    my ($self, $context) = @_;

    my $feed = Plagger::Feed->new;
       $feed->aggregator(sub { $self->aggregate(@_) });
    $context->subscription->add($feed);
}

sub aggregate {
    my($self, $context, $args) = @_;

    my $feed = Plagger::Feed->new;
    $feed->type('nowa');
    $feed->title("[nowa] ナニシテル? & 新着記事"); # xxx
    $feed->id('nowa'); # xxx

    my $cookie_jar = HTTP::Cookies->new(
        file => '/tmp/cookies.txt',
        autosave => 1,
    );

    my $agent = Plagger::UserAgent->new;
    $agent->cookie_jar($cookie_jar);

    my $login_url = 'http://my.nowa.jp/login/';

    my $login_req = POST($login_url, {
        nowa_id =>  $self->conf->{id},
        password => $self->conf->{password},
    });
    my $login_res = $agent->request($login_req);

    my $url = 'http://my.nowa.jp/friend/';
    my $res = $agent->fetch($url, $self);

    if ($res->is_error) {
        $context->log(error => "GET $url failed: " . $res->http_status);
        return;
    }

    if (!$self->conf->{article} && !$self->conf->{nanishiteru}) {
        $self->conf->{article} = 1;
        $self->conf->{nanishiteru} = 1;
    }

    my $content = decode_content($res);
    scraper {
       process 'div[class="friendentrybody"]', 'articles[]' => sub {
            my $node = shift;
            my $entry = $self->_create_entry($node);
            if ($self->conf->{article} && $entry->body) {
                $feed->add_entry($entry);
            }
            elsif ($self->conf->{nanishiteru} && !$entry->body) {
                $feed->add_entry($entry);
            }
        };  
    }->scrape($content);

    $context->update->add($feed);
}

sub _create_entry {
    my ($self, $node) = @_;

    my $entry_scraped = scraper {
        process 'p',          body   => 'TEXT';
        process 'a.blue-cms', author => 'TEXT';
        process 'h2',         title  => 'TEXT';
        process 'h2 > a',     link   => '@href';
        process 'span.time',  time   => 'TEXT';
        result 'body', 'author', 'title', 'link', 'time';
    }->scrape($node);

    my $time_str = $entry_scraped->{time};
    my $time;
    my $duration;
    if (($time) = $time_str =~ m/^\((\d+)時間/xms) {
        $duration = DateTime::Duration->new(hours => $time);
    }
    elsif (($time) = $time_str =~ m/^\((\d+)/xms) {
        $duration = DateTime::Duration->new(minutes => $time);
    }
    elsif (($time) = $time_str =~ m/^\((\d+)/xms) {
        $duration = DateTime::Duration->new(seconds => $time);
    }
    my $date = Plagger::Date->now() - $duration;

    my $entry = Plagger::Entry->new;
    $entry->title($entry_scraped->{title});
    $entry->link($entry_scraped->{link});
    $entry->author($entry_scraped->{author});
    $entry->body($entry_scraped->{body});
    $entry->date($date);

    return $entry;
}

1;