amazon web services - Why is this python boto S3 multipart upload code not working? -


i trying upload 10 gb file aws s3, , said use s3 multipart upload, stumbled upon someone's github gist:

import os import sys import glob import subprocess import contextlib import functools import multiprocessing multiprocessing.pool import imapiterator optparse import optionparser boto.s3.connection import s3connection  #import rfc822  import boto  aws_access_key_id = 'key id here' aws_secret_access_key = 'access key here'  def main(transfer_file, bucket_name, s3_key_name=none, use_rr=true,          make_public=true, cores=none):     if s3_key_name none:         s3_key_name = os.path.basename(transfer_file)      conn = s3connection(aws_access_key_id, aws_secret_access_key)     bucket = conn.lookup(bucket_name)     if bucket none:         bucket = conn.create_bucket(bucket_name)      mb_size = os.path.getsize(transfer_file) / 1e6     if mb_size < 10:         _standard_transfer(bucket, s3_key_name, transfer_file, use_rr)     else:         _multipart_upload(bucket, s3_key_name, transfer_file, mb_size, use_rr,                           cores)     s3_key = bucket.get_key(s3_key_name)     if make_public:         s3_key.set_acl("public-read")  def upload_cb(complete, total):     sys.stdout.write(".")     sys.stdout.flush()  def _standard_transfer(bucket, s3_key_name, transfer_file, use_rr):     print(" upload standard transfer, not multipart", end=' ')     new_s3_item = bucket.new_key(s3_key_name)     new_s3_item.set_contents_from_filename(transfer_file, reduced_redundancy=use_rr,                                            cb=upload_cb, num_cb=10)     print()  def map_wrap(f):     @functools.wraps(f)     def wrapper(*args, **kwargs):         return f(*args, **kwargs)     return wrapper  def mp_from_ids(mp_id, mp_keyname, mp_bucketname):     """get multipart upload bucket , multipart ids.     allows reconstitute connection upload     within multiprocessing functions.     """     conn = s3connection(aws_access_key_id, aws_secret_access_key)     bucket = conn.lookup(mp_bucketname)     mp = boto.s3.multipart.multipartupload(bucket)     mp.key_name = mp_keyname     mp.id = mp_id     return mp  @map_wrap def transfer_part(mp_id, mp_keyname, mp_bucketname, i, part):     """transfer part of multipart upload. designed run in parallel.     """     mp = mp_from_ids(mp_id, mp_keyname, mp_bucketname)     print(" transferring", i, part)     open(part) t_handle:         mp.upload_part_from_file(t_handle, i+1)     os.remove(part)  def _multipart_upload(bucket, s3_key_name, tarball, mb_size, use_rr=true,                       cores=none):     """upload large files using amazon's multipart upload functionality.     """     def split_file(in_file, mb_size, split_num=5):         prefix = os.path.join(os.path.dirname(in_file),                               "%ss3part" % (os.path.basename(s3_key_name)))         # require split size between 5mb (aws minimum) , 250mb         split_size = int(max(min(mb_size / (split_num * 2.0), 250), 5))         if not os.path.exists("%saa" % prefix):             cl = ["split", "-b%sm" % split_size, in_file, prefix]             subprocess.check_call(cl)         return sorted(glob.glob("%s*" % prefix))      mp = bucket.initiate_multipart_upload(s3_key_name, reduced_redundancy=use_rr)     print(mp.id)     print(mp.key_name)     multimap(cores) pmap:         _ in pmap(transfer_part, ((mp.id, mp.key_name, mp.bucket_name, i, part) (i, part) in enumerate(split_file(tarball, mb_size, cores)))):             pass      mp.complete_upload()  @contextlib.contextmanager def multimap(cores=none):     """provide multiprocessing imap function.     context manager handles setting pool, worked around interrupt issues     , terminating pool on completion.     """     if cores none:         cores = max(multiprocessing.cpu_count() - 1, 1)     def wrapper(func):         def wrap(self, timeout=none):             return func(self, timeout=timeout if timeout not none else 1e100)         return wrap     imapiterator.next = wrapper(imapiterator.next)     pool = multiprocessing.pool(cores)     yield pool.imap     pool.terminate()  if __name__ == "__main__":     parser = optionparser()     parser.add_option("-r", "--norr", dest="use_rr",                       action="store_false", default=true)     parser.add_option("-p", "--public", dest="make_public",                       action="store_true", default=false)     parser.add_option("-c", "--cores", dest="cores",                       default=multiprocessing.cpu_count())     (options, args) = parser.parse_args()     if len(args) < 2:         print("no args")         sys.exit()     kwargs = dict(use_rr=options.use_rr, make_public=options.make_public,                   cores=int(options.cores))     main(*args, **kwargs) 

but not working, , not sure how fix error: "typeerror: transfer_part() missing 4 required positional arguments: 'mp_keyname', 'mp_bucketname', 'i', , 'part'"

edit:

full error trace requested:

multiprocessing.pool.remotetraceback:  """ traceback (most recent call last):   file "/library/frameworks/python.framework/versions/3.4/lib/python3.4/multiprocessing/pool.py", line 119, in worker     result = (true, func(*args, **kwds))   file "test.py", line 53, in wrapper     return f(*args, **kwargs) typeerror: transfer_part() missing 4 required positional arguments: 'mp_keyname', 'mp_bucketname', 'i', , 'part' """  above exception direct cause of following exception:  traceback (most recent call last):   file "test.py", line 132, in <module>     main(*args, **kwargs)   file "test.py", line 34, in main     cores)   file "test.py", line 96, in _multipart_upload     _ in pmap(transfer_part, ((mp.id, mp.key_name, mp.bucket_name, i, part) (i, part) in enumerate(split_file(tarball, mb_size, cores)))):   file "/library/frameworks/python.framework/versions/3.4/lib/python3.4/multiprocessing/pool.py", line 689, in next     raise value typeerror: transfer_part() missing 4 required positional arguments: 'mp_keyname', 'mp_bucketname', 'i', , 'part' 

if fits use case, may want use aws command-line interface (cli), can automatically use multi-part upload you.

aws s3 cp file.txt s3://bucket/file.txt


Comments

Popular posts from this blog

python - How to create jsonb index using GIN on SQLAlchemy? -

PHP DOM loadHTML() method unusual warning -

c# - TransactionScope not rolling back although no complete() is called -