# filter for slashdot.jp
# -*- coding: utf-8 -*-

import re

re_read_all = re.compile(ur'''<p>\s*<a href=['"][^'"]+['"]>\s*すべて読む\s*</a>.*?</p>''')
re_related = re.compile(ur'''<p>\s*関連ストーリー：.*?</p>''')
re_topics = re.compile(ur'''<a href="http://slashdot.jp/stories/\w+">(.*?)</a>''')

def entry_filter(entry):
    # すべて読む、関連ストーリーを削除
    body = entry['body']
    topics = []
    m = re_read_all.search(body)
    if m:
        s = m.group(0)
        itr = re_topics.findall(s)
        for items in itr:
            topics.append(items)
        
    body = re_read_all.sub('', body)
    body = re_related.sub('', body)
    if 'tags' in entry:
        entry['tags'].extend(topics)
    else:
        entry['tags'] = topics
    entry['body'] = body

    return entry

