#!/usr/bin/env python # download-feedbooks # Download all of Feedbooks # (c) Reuben Thomas 26th December 2010 # Released under the GPL version 3, or (at your option) any later # version. # FIXME: Download files once each only, and link into category subdirs import os import fileinput import re import urllib import subprocess # Extract categories from page supplied on command line or stdin categories = [] for line in fileinput.input(): m = re.search('category=([^&]+).*range=week">([^<]+)', line) if m: categories.append({'name': m.group(2), 'code': m.group(1)}) for c in categories: print 'Category: ' + c['name'] # Get book URLs books = [] i = 1 while True: found = 0 page = 'http://www.feedbooks.com/books/top?category=' + c['code'] + '&lang=en&page=' + str(i) for line in urllib.urlopen(page).readlines(): m = re.search('(http://.*)\.epub', line) if m: books.append(m.group(1)) found += 1 if found == 0: break print 'page ' + str(i) i += 1 # Download books from this category os.mkdir(c['name']) os.chdir(c['name']) for b in books: subprocess.check_call(['wget', b + '.mobi']) os.chdir('..')