Merge branch 'bulk-upload' into metadata
authortilly-Q <nattilypigeonfowl@gmail.com>
Sun, 23 Mar 2014 20:43:20 +0000 (16:43 -0400)
committertilly-Q <nattilypigeonfowl@gmail.com>
Sun, 23 Mar 2014 20:43:20 +0000 (16:43 -0400)
mediagoblin/gmg_commands/__init__.py
mediagoblin/gmg_commands/batchaddmedia.py [new file with mode: 0644]

index a1eb599db09cc1c28c8280813bcdbe036e47d5ab..55e85116c2f934b49a94f3f60062cdbc368452c2 100644 (file)
@@ -53,6 +53,10 @@ SUBCOMMAND_MAP = {
         'setup': 'mediagoblin.gmg_commands.addmedia:parser_setup',
         'func': 'mediagoblin.gmg_commands.addmedia:addmedia',
         'help': 'Reprocess media entries'},
+    'batchaddmedia': {
+        'setup': 'mediagoblin.gmg_commands.batchaddmedia:parser_setup',
+        'func': 'mediagoblin.gmg_commands.batchaddmedia:batchaddmedia',
+        'help': 'Add many media entries at once'}
     # 'theme': {
     #     'setup': 'mediagoblin.gmg_commands.theme:theme_parser_setup',
     #     'func': 'mediagoblin.gmg_commands.theme:theme',
diff --git a/mediagoblin/gmg_commands/batchaddmedia.py b/mediagoblin/gmg_commands/batchaddmedia.py
new file mode 100644 (file)
index 0000000..678c8ab
--- /dev/null
@@ -0,0 +1,217 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import os
+import json, tempfile, urllib, tarfile, zipfile, subprocess
+from csv import reader as csv_reader
+from urlparse import urlparse
+from pyld import jsonld
+
+from mediagoblin.gmg_commands import util as commands_util
+from mediagoblin.submit.lib import (
+    submit_media, get_upload_file_limits,
+    FileUploadLimit, UserUploadLimit, UserPastUploadLimit)
+
+from mediagoblin import mg_globals
+
+def parser_setup(subparser):
+    subparser.description = """\
+This command allows the administrator to upload many media files at once."""
+    subparser.add_argument(
+        'username',
+        help="Name of user these media entries belong to")
+    subparser.add_argument(
+        'target_path',
+        help=("""\
+Path to a local archive or directory containing a "location.csv" and a 
+"metadata.csv" file. These are csv (comma seperated value) files with the
+locations and metadata of the files to be uploaded. The location must be listed
+with either the URL of the remote media file or the filesystem path of a local
+file. The metadata should be provided with one column for each of the 15 Dublin
+Core properties (http://dublincore.org/documents/dces/). Both "location.csv" and
+"metadata.csv" must begin with a row demonstrating the order of the columns. We
+have provided an example of these files at <url to be added>
+"""))
+    subparser.add_argument(
+        "-l", "--license",
+        help=(
+           "License these media entry will be released under, if all the same. "
+           "Should be a URL."))
+    subparser.add_argument(
+        '--celery',
+        action='store_true',
+        help="Don't process eagerly, pass off to celery")
+
+
+def batchaddmedia(args):
+    # Run eagerly unless explicetly set not to
+    if not args.celery:
+        os.environ['CELERY_ALWAYS_EAGER'] = 'true'
+
+    app = commands_util.setup_app(args)
+
+    files_uploaded, files_attempted = 0, 0
+
+    # get the user
+    user = app.db.User.query.filter_by(username=args.username.lower()).first()
+    if user is None:
+        print "Sorry, no user by username '%s' exists" % args.username
+        return
+
+    upload_limit, max_file_size = get_upload_file_limits(user)
+    temp_files = []
+
+    if os.path.isdir(args.target_path):
+        dir_path = args.target_path
+
+    elif tarfile.is_tarfile(args.target_path):
+        dir_path = tempfile.mkdtemp()
+        temp_files.append(dir_path)
+        tar = tarfile.open(args.target_path)
+        tar.extractall(path=dir_path)
+
+    elif zipfile.is_zipfile(args.target_path):
+        dir_path = tempfile.mkdtemp()
+        temp_files.append(dir_path)
+        zipped_file = zipfile.ZipFile(args.target_path)
+        zipped_file.extractall(path=dir_path)
+
+    else:
+        print "Couldn't recognize the file. This script only accepts tar files,\
+zip files and directories"
+    if dir_path.endswith('/'):
+        dir_path = dir_path[:-1]
+
+    location_file_path = "{dir_path}/location.csv".format(
+        dir_path=dir_path)
+    metadata_file_path = "{dir_path}/metadata.csv".format(
+        dir_path=dir_path)
+
+    # check for the location file, if it exists...
+    location_filename = os.path.split(location_file_path)[-1]
+    abs_location_filename = os.path.abspath(location_file_path)
+    if not os.path.exists(abs_location_filename):
+        print "Can't find a file with filename '%s'" % location_file_path
+        return
+
+    # check for the metadata file, if it exists...
+    metadata_filename = os.path.split(metadata_file_path)[-1]
+    abs_metadata_filename = os.path.abspath(metadata_file_path)
+    if not os.path.exists(abs_metadata_filename):
+        print "Can't find a file with filename '%s'" % metadata_file_path
+        return
+
+    upload_limit, max_file_size = get_upload_file_limits(user)
+
+    def maybe_unicodeify(some_string):
+        # this is kinda terrible
+        if some_string is None:
+            return None
+        else:
+            return unicode(some_string)
+
+    with file(abs_location_filename, 'r') as all_locations:
+        contents = all_locations.read()
+        media_locations = parse_csv_file(contents)
+
+    with file(abs_metadata_filename, 'r') as all_metadata:
+        contents = all_metadata.read()
+        media_metadata = parse_csv_file(contents)
+
+    dcterms_context = { 'dcterms':'http://purl.org/dc/terms/' }
+
+    for media_id in media_locations.keys():
+        file_metadata = media_metadata[media_id]
+        json_ld_metadata = jsonld.compact(file_metadata, dcterms_context)
+        original_location = media_locations[media_id]['media:original']
+        url = urlparse(original_location)
+
+        title = file_metadata.get('dcterms:title')
+        description = file_metadata.get('dcterms:description')
+        license = file_metadata.get('dcterms:license')
+        filename = url.path.split()[-1]
+        files_attempted += 1
+
+        if url.scheme == 'http':
+            media_file = tempfile.TemporaryFile()
+            res = urllib.urlopen(url.geturl())
+            media_file.write(res.read())
+            media_file.seek(0)
+
+        elif url.scheme == '':
+            path = url.path
+            if os.path.isabs(path):
+                file_abs_path = os.path.abspath(path)
+            else:
+                file_path = "{dir_path}/{local_path}".format(
+                    dir_path=dir_path,
+                    local_path=path)
+                file_abs_path = os.path.abspath(file_path)
+            try:
+                media_file = file(file_abs_path, 'r')
+            except IOError:
+                print "\
+FAIL: Local file {filename} could not be accessed.".format(filename=filename)
+                print "Skipping it."
+                continue
+        try:
+            submit_media(
+                mg_app=app,
+                user=user,
+                submitted_file=media_file,
+                filename=filename,
+                title=maybe_unicodeify(title),
+                description=maybe_unicodeify(description),
+                license=maybe_unicodeify(license),
+                tags_string=u"",
+                upload_limit=upload_limit, max_file_size=max_file_size)
+            print "Successfully uploading {filename}!".format(filename=filename)
+            print ""
+            files_uploaded += 1
+        except FileUploadLimit:
+            print "FAIL: This file is larger than the upload limits for this site."
+        except UserUploadLimit:
+            print "FAIL: This file will put this user past their upload limits."
+        except UserPastUploadLimit:
+            print "FAIL: This user is already past their upload limits."
+    print "\
+{files_uploaded} out of {files_attempted} files successfully uploaded".format(
+        files_uploaded=files_uploaded,
+        files_attempted=files_attempted)
+    teardown(temp_files)
+
+
+
+def parse_csv_file(file_contents):
+    list_of_contents = file_contents.split('\n')
+    key, lines = (list_of_contents[0].split(','),
+                  list_of_contents[1:])
+    objects_dict = {}
+
+    # Build a dictionary
+    for line in lines:
+        if line.isspace() or line == '': continue
+        values = csv_reader([line]).next()
+        line_dict = dict([(key[i], val)
+            for i, val in enumerate(values)])
+        media_id = line_dict['media:id']
+        objects_dict[media_id] = (line_dict)
+
+    return objects_dict
+
+def teardown(temp_files):
+    for temp_file in temp_files:
+        subprocess.call(['rm','-r',temp_file])