#!/usr/bin/python # """Read FILEMAN page and download flash. usage: fileman < html-list """ # # Copyright (c) 2005 Satoshi Fukutomi . # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # import os import sys from time import sleep from threading import Thread from urlparse import urlparse from os.path import basename, exists from crawler import crawl, agent class Reader(Thread): def __init__(self): Thread.__init__(self) self.query = [] self.stopflag = False self.setDaemon(True) def run(self): while True: if self.query: url = self.query[0] self.read(url) self.query.pop(0) elif self.stopflag: return sleep(5) def download(self, url): fname = basename(urlparse(url)[2]) if exists(fname): print "%s: exists" % fname return print "%s -> %s" % (url, fname) rfile = agent.open(url) wfile = file(fname, "wb") size = int (rfile.info().getheader("Content-Length", 0)) count = 0 while True: buf = rfile.read(256*1024) if buf == "": break wfile.write(buf) count += len(buf) if size: print "%s %d/%s (%d%%)" % \ (fname, count, size, count*100/size) else: print "%s %d" % (fname, count) rfile.close() wfile.close() def read(self, url): urls = crawl(url) for u in urls: if u.endswith(".swf"): self.download(u) def append(self, url): self.query.append(url) def stop(self): self.stopflag = True def print_query(self): if self.query: print "----------------------" for q in self.query: print q def main(): reader = Reader() try: reader.start() while True: line = sys.stdin.readline() if line == "": reader.stop() break line = line.strip() if line: reader.append(line) reader.join() finally: reader.print_query() if __name__ == "__main__": try: main() except KeyboardInterrupt: sys.exit(1)