#!/usr/bin/env python
"""
html2s3.py, by David Wolever: http://wolever.net/~wolever
With bits stolen from http://www.holovaty.com/code/update_s3.py

Requires S3.py:
    http://developer.amazonwebservices.com/connect/entry.jspa?externalID=134&categoryID=47

Usage:
    find WebPage -name '*.html' | html2s3 -b BUCKET_NAME
"""
import S3  # You need S3.py for this; see above link
import mimetypes,  os.path,  sys,  shutil,  re
from optparse import OptionParser

print "Enter your access keys and then delete this line."; sys.exit(2)
AWS_ACCESS_KEY_ID = None # Change these!
AWS_SECRET_ACCESS_KEY = None

image_regex = re.compile("<img.*?src\s*=\s*['\"]?([^'\">\s]*)['\"]?.*?>", re.MULTILINE | re.DOTALL)

def upload_file(filename, remote_path = None):
  remote_path = remote_path or filename

  filedata = open(filename, 'rb').read()
  content_type = mimetypes.guess_type(filename)[0]
  if not content_type:
      content_type = 'text/plain'
  conn.put(bucket, remote_path, S3.S3Object(filedata),
      {'x-amz-acl': 'public-read', 'Content-Type': content_type})

def process_entry(line):
  file_name = os.path.normpath(line.strip())
  file = open(file_name, 'r')
  data = file.read()
  file.close()

  if not no_backup and os.path.exists(file_name + ".S3BACKUP"):
    print "File exists: %s.S3BACKUP! Please delete or restore this backup (hint: to delete it, use `find <directory> -name '*.S3BACKUP'  -exec rm \{} \;`)" %(file_name)
    sys.exit(1)

  if not no_backup:
    shutil.copy(file_name, file_name + ".S3BACKUP")

  file_dir = os.path.dirname(file_name) + "/"
  if file_dir == "/": file_dir = ""

  for image in image_regex.findall(data):
    if image[:4] == "http":
      print "Skipping image %s..." %(image)
    else:
      image_path = os.path.normpath(image)
      while image_path[0] == "/": image_path = image_path[1:]
      image_path = os.path.normpath(file_dir + image_path)

      remote_path = os.path.normpath(remote_base_path + image_path)

      print "Uploading", image_path, "to", remote_path + "...",
      upload_file(image_path, remote_path)

      # Finally, replace the old image with the new one and write out the file
      p = re.compile(r'(<img.*?src\s*=\s*[\'"]?)%s([\'"]?.*?>)' %(image), re.MULTILINE | re.DOTALL)
      data = p.sub(r'\1%s\2' %("http://" + bucket + "/" + remote_path), data)
      new_file = open(file_name, 'w')
      new_file.write(data)
      new_file.close()

      print "Done! \nStored at http://" + bucket + "/" + remote_path 

# Parse the command line arguments
parser = OptionParser(usage = "usage: %prog [options] [FILES]\nWill process HTML files FILES or read a list of files, separated by newlines, from stdin.")
parser.add_option("-b", "--bucket", action="store", dest="bucket", help="BUCKET to put files in.", metavar="BUCKET")
parser.add_option("-k", "--key-prefix", dest="remote_base_path", help="Prefix key (file name) with PREFIX", metavar="PREFIX")
parser.add_option("-n", "--no-backup", dest="no_backup", action="store_true", help="Do not create backups")
(options, args) = parser.parse_args()

bucket = options.bucket
if not bucket:
  print "You must specify a bucket!"
  sys.exit(1)
remote_base_path = options.remote_base_path
no_backup = options.no_backup

if not remote_base_path:
  remote_base_path = ""
else:
  # Strip all leading / and make sure there is a trailing /
  while remote_base_path[0] == "/": remote_base_path = remote_base_path[1:]
  remote_base_path = remote_base_path + "/"  

# Make the connection, check the bucket
conn = S3.AWSAuthConnection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
if conn.list_bucket(bucket).http_response.status > 299: # Not an HTTP OK, bad bucket
  print "Bad bucket!  Do you have permission to access it? Have you spelt it correctly?"
  sys.exit(1)

try:
  source = args or sys.stdin.xreadlines() # Process args, if they exist, otherwise read from stdin
  for line in source:
    process_entry(line)
except KeyboardInterrupt:
  print
  print "Oops! You killed me!  Don't forget about the backup files."
  sys.exit(1)

print "All done! Don't forget to delete backups and original images! (hint: `find <dirname> -name '*.S3BACKUP'  -exec rm \{} \;`)"
