mediagoblin/gmg_commands/batchaddmedia.py

   1 # GNU MediaGoblin -- federated, autonomous media hosting
   2 # Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
   3 #
   4 # This program is free software: you can redistribute it and/or modify
   5 # it under the terms of the GNU Affero General Public License as published by
   6 # the Free Software Foundation, either version 3 of the License, or
   7 # (at your option) any later version.
   8 #
   9 # This program is distributed in the hope that it will be useful,
  10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 # GNU Affero General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU Affero General Public License
  15 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16
  17 from __future__ import print_function
  18
  19 import codecs
  20 import csv
  21 import os
  22 import sys
  23
  24 import requests
  25 import six
  26
  27 from six.moves.urllib.parse import urlparse
  28
  29 from mediagoblin.db.models import LocalUser
  30 from mediagoblin.gmg_commands import util as commands_util
  31 from mediagoblin.submit.lib import (
  32     submit_media, FileUploadLimit, UserUploadLimit, UserPastUploadLimit)
  33 from mediagoblin.tools.metadata import compact_and_validate
  34 from mediagoblin.tools.translate import pass_to_ugettext as _
  35 from jsonschema.exceptions import ValidationError
  36
  37
  38 def parser_setup(subparser):
  39     subparser.description = """\
  40 This command allows the administrator to upload many media files at once."""
  41     subparser.epilog = _(u"""For more information about how to properly run this
  42 script (and how to format the metadata csv file), read the MediaGoblin
  43 documentation page on command line uploading
  44 <http://docs.mediagoblin.org/siteadmin/commandline-upload.html>""")
  45     subparser.add_argument(
  46         'username',
  47         help=_(u"Name of user these media entries belong to"))
  48     subparser.add_argument(
  49         'metadata_path',
  50         help=_(
  51 u"""Path to the csv file containing metadata information."""))
  52     subparser.add_argument(
  53         '--celery',
  54         action='store_true',
  55         help=_(u"Don't process eagerly, pass off to celery"))
  56
  57
  58 def batchaddmedia(args):
  59     # Run eagerly unless explicetly set not to
  60     if not args.celery:
  61         os.environ['CELERY_ALWAYS_EAGER'] = 'true'
  62
  63     app = commands_util.setup_app(args)
  64
  65     files_uploaded, files_attempted = 0, 0
  66
  67     # get the user
  68     user = app.db.LocalUser.query.filter(
  69         LocalUser.username==args.username.lower()
  70     ).first()
  71     if user is None:
  72         print(_(u"Sorry, no user by username '{username}' exists".format(
  73                     username=args.username)))
  74         return
  75
  76     if os.path.isfile(args.metadata_path):
  77         metadata_path = args.metadata_path
  78
  79     else:
  80         error = _(u'File at {path} not found, use -h flag for help'.format(
  81                     path=args.metadata_path))
  82         print(error)
  83         return
  84
  85     abs_metadata_filename = os.path.abspath(metadata_path)
  86     abs_metadata_dir = os.path.dirname(abs_metadata_filename)
  87
  88     def maybe_unicodeify(some_string):
  89         # this is kinda terrible
  90         if some_string is None:
  91             return None
  92         else:
  93             return six.text_type(some_string)
  94
  95     with codecs.open(
  96             abs_metadata_filename, 'r', encoding='utf-8') as all_metadata:
  97         contents = all_metadata.read()
  98         media_metadata = parse_csv_file(contents)
  99
 100     for media_id, file_metadata in media_metadata.items():
 101         files_attempted += 1
 102         # In case the metadata was not uploaded initialize an empty dictionary.
 103         json_ld_metadata = compact_and_validate({})
 104
 105         # Get all metadata entries starting with 'media' as variables and then
 106         # delete them because those are for internal use only.
 107         original_location = file_metadata['location']
 108
 109         ### Pull the important media information for mediagoblin from the
 110         ### metadata, if it is provided.
 111         title = file_metadata.get('title') or file_metadata.get('dc:title')
 112         description = (file_metadata.get('description') or
 113             file_metadata.get('dc:description'))
 114         collection_slug = file_metadata.get('collection-slug')
 115
 116         license = file_metadata.get('license')
 117         try:
 118             json_ld_metadata = compact_and_validate(file_metadata)
 119         except ValidationError as exc:
 120             error = _(u"""Error with media '{media_id}' value '{error_path}': {error_msg}
 121 Metadata was not uploaded.""".format(
 122                 media_id=media_id,
 123                 error_path=exc.path[0],
 124                 error_msg=exc.message))
 125             print(error)
 126             continue
 127
 128         url = urlparse(original_location)
 129         filename = url.path.split()[-1]
 130
 131         if url.scheme == 'http':
 132             res = requests.get(url.geturl(), stream=True)
 133             media_file = res.raw
 134
 135         elif url.scheme == '':
 136             path = url.path
 137             if os.path.isabs(path):
 138                 file_abs_path = os.path.abspath(path)
 139             else:
 140                 file_path = os.path.join(abs_metadata_dir, path)
 141                 file_abs_path = os.path.abspath(file_path)
 142             try:
 143                 media_file = open(file_abs_path, 'rb')
 144             except IOError:
 145                 print(_(u"""\
 146 FAIL: Local file {filename} could not be accessed.
 147 {filename} will not be uploaded.""".format(filename=filename)))
 148                 continue
 149         try:
 150             submit_media(
 151                 mg_app=app,
 152                 user=user,
 153                 submitted_file=media_file,
 154                 filename=filename,
 155                 title=maybe_unicodeify(title),
 156                 description=maybe_unicodeify(description),
 157                 collection_slug=maybe_unicodeify(collection_slug),
 158                 license=maybe_unicodeify(license),
 159                 metadata=json_ld_metadata,
 160                 tags_string=u"")
 161             print(_(u"""Successfully submitted {filename}!
 162 Be sure to look at the Media Processing Panel on your website to be sure it
 163 uploaded successfully.""".format(filename=filename)))
 164             files_uploaded += 1
 165         except FileUploadLimit:
 166             print(_(
 167 u"FAIL: This file is larger than the upload limits for this site."))
 168         except UserUploadLimit:
 169             print(_(
 170 "FAIL: This file will put this user past their upload limits."))
 171         except UserPastUploadLimit:
 172             print(_("FAIL: This user is already past their upload limits."))
 173     print(_(
 174 "{files_uploaded} out of {files_attempted} files successfully submitted".format(
 175         files_uploaded=files_uploaded,
 176         files_attempted=files_attempted)))
 177
 178
 179 def unicode_csv_reader(unicode_csv_data, dialect=csv.excel, **kwargs):
 180     # csv.py doesn't do Unicode; encode temporarily as UTF-8:
 181     # TODO: this probably won't be necessary in Python 3
 182     csv_reader = csv.reader(utf_8_encoder(unicode_csv_data),
 183                             dialect=dialect, **kwargs)
 184     for row in csv_reader:
 185         # decode UTF-8 back to Unicode, cell by cell:
 186         yield [six.text_type(cell, 'utf-8') for cell in row]
 187
 188 def utf_8_encoder(unicode_csv_data):
 189     for line in unicode_csv_data:
 190         yield line.encode('utf-8')
 191
 192 def parse_csv_file(file_contents):
 193     """
 194     The helper function which converts the csv file into a dictionary where each
 195     item's key is the provided value 'id' and each item's value is another
 196     dictionary.
 197     """
 198     list_of_contents = file_contents.split('\n')
 199     key, lines = (list_of_contents[0].split(','),
 200                   list_of_contents[1:])
 201     objects_dict = {}
 202
 203     # Build a dictionary
 204     for index, line in enumerate(lines):
 205         if line.isspace() or line == u'': continue
 206         if (sys.version_info[0] == 3):
 207             # Python 3's csv.py supports Unicode out of the box.
 208             reader = csv.reader([line])
 209         else:
 210             reader = unicode_csv_reader([line])
 211         values = next(reader)
 212         line_dict = dict([(key[i], val)
 213             for i, val in enumerate(values)])
 214         media_id = line_dict.get('id') or index
 215         objects_dict[media_id] = (line_dict)
 216
 217     return objects_dict