#! /usr/bin/env python2.2 # This is an ugly little script for automatically grabbing all the datasheets # for avr devices from atmel's website. Summary doc files are skipped. # I don't intend for this to be robust in any way. import os, re non_verbose = '' #non_verbose = '-nv' get_page = 'wget %s -O - http://www.atmel.com/atmel/products/prod200.htm' %(non_verbose) pg = os.popen(get_page).read() re_body = re.compile (r'(.*)',re.S) body = re_body.search(pg).group(1) # For debugging #open('prod200.html','w').write(body) re_file = re.compile (r'.*?(.*?)(.*?)
',re.S) re_rm = re.compile (r'\s*\(.*?\)') # for removing parens for mo in re_file.finditer(body): file = mo.group(1) device = mo.group(2) desc = mo.group(3) ofile = re_rm.sub('', device.strip()) ofile = ofile.replace(' ', '_') ofile = ofile.replace('.', '') ofile = ofile.replace('/', '_') ofile = ofile + '-' + file url = 'http://www.atmel.com/atmel/acrobat/%s' % (file) cmd = 'wget -O %s %s' % (ofile, url) os.system (cmd)