The script now officially works! It works in many different situations, whether
authortilly-Q <nattilypigeonfowl@gmail.com>
Wed, 12 Feb 2014 19:37:00 +0000 (14:37 -0500)
committertilly-Q <nattilypigeonfowl@gmail.com>
Mon, 21 Apr 2014 16:10:18 +0000 (12:10 -0400)
the media is to be uploaded is stored locally or on the web. Still have to clean
up the code and look for errors. I may also refactor some of this into a functi-
on to be used with a GUI frontend in another project. Lastly, I need to merge
this with the metadata branch I've been working on, and convert the metadata.csv
information into the proper format for the new metadata column.

mediagoblin/gmg_commands/batchaddmedia.py

index 1c0f6784f5ccc12f92d18467b39b5e153b3e4810..7d7a2d4fe1f7ddf0a4dc1b7554095df483b60b4f 100644 (file)
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 import os
+import json, tempfile, urllib, tarfile, subprocess
+from csv import reader as csv_reader
+from urlparse import urlparse
+from pyld import jsonld
 
 from mediagoblin.gmg_commands import util as commands_util
 from mediagoblin.submit.lib import (
@@ -22,20 +26,26 @@ from mediagoblin.submit.lib import (
     FileUploadLimit, UserUploadLimit, UserPastUploadLimit)
 
 from mediagoblin import mg_globals
-import json, csv
 
 def parser_setup(subparser):
     subparser.add_argument(
         'username',
         help="Name of user this media entry belongs to")
-    subparser.add_argument(
-        'locationfile',
+    target_type = subparser.add_mutually_exclusive_group()
+    target_type.add_argument('-d',
+        '--directory', action='store_const',
+        const='directory', dest='target_type', 
+        default='directory', help=(
+"Target is a directory"))
+    target_type.add_argument('-a',
+        '--archive', action='store_const',
+        const='archive', dest='target_type',
         help=(
-"Local file on filesystem with the address of all the files to be uploaded"))
+"Target is an archive."))
     subparser.add_argument(
-        'metadatafile',
+        'target_path',
         help=(
-"Local file on filesystem with metadata of all the files to be uploaded"))
+"Path to a local archive or directory containing a location.csv and metadata.csv file"))
     subparser.add_argument(
         "-l", "--license",
         help=(
@@ -59,19 +69,36 @@ def batchaddmedia(args):
     if user is None:
         print "Sorry, no user by username '%s'" % args.username
         return
+
+    upload_limit, max_file_size = get_upload_file_limits(user)
+    temp_files = []
+
+    if args.target_type == 'archive':
+        dir_path = tempfile.mkdtemp()
+        temp_files.append(dir_path)
+        tar = tarfile.open(args.target_path)
+        tar.extractall(path=dir_path)
+
+    elif args.target_type == 'directory':
+        dir_path = args.target_path
+
+    location_file_path = "{dir_path}/location.csv".format(
+        dir_path=dir_path)
+    metadata_file_path = "{dir_path}/metadata.csv".format(
+        dir_path=dir_path)
     
     # check for the location file, if it exists...
-    location_filename = os.path.split(args.locationfile)[-1]
-    abs_location_filename = os.path.abspath(args.locationfile)
+    location_filename = os.path.split(location_file_path)[-1]
+    abs_location_filename = os.path.abspath(location_file_path)
     if not os.path.exists(abs_location_filename):
-        print "Can't find a file with filename '%s'" % args.locationfile
+        print "Can't find a file with filename '%s'" % location_file_path
         return
 
-    # check for the location file, if it exists...
-    metadata_filename = os.path.split(args.metadatafile)[-1]
-    abs_metadata_filename = os.path.abspath(args.metadatafile)
+    # check for the metadata file, if it exists...
+    metadata_filename = os.path.split(metadata_file_path)[-1]
+    abs_metadata_filename = os.path.abspath(metadata_file_path)
     if not os.path.exists(abs_metadata_filename):
-        print "Can't find a file with filename '%s'" % args.metadatafile
+        print "Can't find a file with filename '%s'" % metadata_file_path
         return
 
     upload_limit, max_file_size = get_upload_file_limits(user)
@@ -91,20 +118,85 @@ def batchaddmedia(args):
         contents = all_metadata.read()
         media_metadata = parse_csv_file(contents)
 
+    dcterms_context = { 'dcterms':'http://purl.org/dc/terms/' }
+
+    for media_id in media_locations.keys():
+        file_metadata = media_metadata[media_id]
+        json_ld_metadata = jsonld.compact(file_metadata, dcterms_context)
+        original_location = media_locations[media_id]['media:original']
+        url = urlparse(original_location)
+
+        title = file_metadata.get('dcterms:title')
+        description = file_metadata.get('dcterms:description')
+        license = file_metadata.get('dcterms:license')
+        filename = url.path.split()[-1]
+        print "Working with {filename}".format(filename=filename)
+
+        if url.scheme == 'http':
+            print "Downloading {filename}...".format(
+                filename=filename)
+            media_file = tempfile.TemporaryFile()
+            res = urllib.urlopen(url.geturl())
+            media_file.write(res.read())
+            media_file.seek(0)
+
+        elif url.scheme == '':
+            path = url.path
+            if os.path.isabs(path):
+                file_abs_path = os.path.abspath(path)
+            else:
+                file_path = "{dir_path}/{local_path}".format(
+                    dir_path=dir_path,
+                    local_path=path)
+                file_abs_path = os.path.abspath(file_path)
+            try:
+                media_file = file(file_abs_path, 'r')
+            except IOError:
+                print "Local file {filename} could not be accessed.".format(
+                    filename=filename)
+                print "Skipping it."
+                continue
+        print "Submitting {filename}...".format(filename=filename)
+        try:
+            submit_media(
+                mg_app=app,
+                user=user,
+                submitted_file=media_file,
+                filename=filename,
+                title=maybe_unicodeify(title),
+                description=maybe_unicodeify(description),
+                license=maybe_unicodeify(license),
+                tags_string=u"",
+                upload_limit=upload_limit, max_file_size=max_file_size)
+            print "Successfully uploading {filename}!".format(filename=filename)
+            print ""
+        except FileUploadLimit:
+            print "This file is larger than the upload limits for this site."
+        except UserUploadLimit:
+            print "This file will put this user past their upload limits."
+        except UserPastUploadLimit:
+            print "This user is already past their upload limits."
+    teardown(temp_files)
+
+        
+
 def parse_csv_file(file_contents):
     list_of_contents = file_contents.split('\n')
     key, lines = (list_of_contents[0].split(','),
                   list_of_contents[1:])
-    list_of_objects = []
+    objects_dict = {}
 
     # Build a dictionary
     for line in lines:
         if line.isspace() or line == '': continue
-        values = csv.reader([line]).next()
-        new_dict = dict([(key[i], val)
+        values = csv_reader([line]).next()
+        line_dict = dict([(key[i], val)
             for i, val in enumerate(values)])
-        list_of_objects.append(new_dict)
+        media_id = line_dict['media:id']
+        objects_dict[media_id] = (line_dict)
 
-    return list_of_objects
+    return objects_dict
 
-    
+def teardown(temp_files):
+    for temp_file in temp_files:
+        subprocess.call(['rm','-r',temp_file])