#!/usr/bin/env python
# download-feedbooks
# Download all of Feedbooks
# (c) Reuben Thomas   26th December 2010
# Released under the GPL version 3, or (at your option) any later
# version.

# FIXME: Download files once each only, and link into category subdirs

import os
import fileinput
import re
import urllib
import subprocess

# Extract categories from page supplied on command line or stdin
categories = []
for line in fileinput.input():
    m = re.search('category=([^&]+).*range=week">([^<]+)', line)
    if m:
        categories.append({'name': m.group(2), 'code': m.group(1)})

for c in categories:
    print 'Category: ' + c['name']

    # Get book URLs
    books = []
    i = 1
    while True:
        found = 0
        page = 'http://www.feedbooks.com/books/top?category=' + c['code'] + '&lang=en&page=' + str(i)
        for line in urllib.urlopen(page).readlines():
            m = re.search('(http://.*)\.epub', line)
            if m:
                books.append(m.group(1))
                found += 1
                if found == 0:
                    break
                print 'page ' + str(i)
                i += 1

    # Download books from this category
    os.mkdir(c['name'])
    os.chdir(c['name'])
    for b in books:
        subprocess.check_call(['wget', b + '.mobi'])
    os.chdir('..')

Last updated 2011/01/15