Merge remote-tracking branch 'gsoc2016/Subtitle-1'
[mediagoblin.git] / mediagoblin / gmg_commands / batchaddmedia.py
1 # GNU MediaGoblin -- federated, autonomous media hosting
2 # Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS.
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17 from __future__ import print_function
18
19 import codecs
20 import csv
21 import os
22 import sys
23
24 import requests
25 import six
26
27 from six.moves.urllib.parse import urlparse
28
29 from mediagoblin.db.models import LocalUser
30 from mediagoblin.gmg_commands import util as commands_util
31 from mediagoblin.submit.lib import (
32 submit_media, FileUploadLimit, UserUploadLimit, UserPastUploadLimit)
33 from mediagoblin.tools.metadata import compact_and_validate
34 from mediagoblin.tools.translate import pass_to_ugettext as _
35 from jsonschema.exceptions import ValidationError
36
37
38 def parser_setup(subparser):
39 subparser.description = """\
40 This command allows the administrator to upload many media files at once."""
41 subparser.epilog = _(u"""For more information about how to properly run this
42 script (and how to format the metadata csv file), read the MediaGoblin
43 documentation page on command line uploading
44 <http://docs.mediagoblin.org/siteadmin/commandline-upload.html>""")
45 subparser.add_argument(
46 'username',
47 help=_(u"Name of user these media entries belong to"))
48 subparser.add_argument(
49 'metadata_path',
50 help=_(
51 u"""Path to the csv file containing metadata information."""))
52 subparser.add_argument(
53 '--celery',
54 action='store_true',
55 help=_(u"Don't process eagerly, pass off to celery"))
56
57
58 def batchaddmedia(args):
59 # Run eagerly unless explicetly set not to
60 if not args.celery:
61 os.environ['CELERY_ALWAYS_EAGER'] = 'true'
62
63 app = commands_util.setup_app(args)
64
65 files_uploaded, files_attempted = 0, 0
66
67 # get the user
68 user = app.db.LocalUser.query.filter(
69 LocalUser.username==args.username.lower()
70 ).first()
71 if user is None:
72 print(_(u"Sorry, no user by username '{username}' exists".format(
73 username=args.username)))
74 return
75
76 if os.path.isfile(args.metadata_path):
77 metadata_path = args.metadata_path
78
79 else:
80 error = _(u'File at {path} not found, use -h flag for help'.format(
81 path=args.metadata_path))
82 print(error)
83 return
84
85 abs_metadata_filename = os.path.abspath(metadata_path)
86 abs_metadata_dir = os.path.dirname(abs_metadata_filename)
87
88 def maybe_unicodeify(some_string):
89 # this is kinda terrible
90 if some_string is None:
91 return None
92 else:
93 return six.text_type(some_string)
94
95 with codecs.open(
96 abs_metadata_filename, 'r', encoding='utf-8') as all_metadata:
97 contents = all_metadata.read()
98 media_metadata = parse_csv_file(contents)
99
100 for media_id, file_metadata in media_metadata.items():
101 files_attempted += 1
102 # In case the metadata was not uploaded initialize an empty dictionary.
103 json_ld_metadata = compact_and_validate({})
104
105 # Get all metadata entries starting with 'media' as variables and then
106 # delete them because those are for internal use only.
107 original_location = file_metadata['location']
108
109 ### Pull the important media information for mediagoblin from the
110 ### metadata, if it is provided.
111 title = file_metadata.get('title') or file_metadata.get('dc:title')
112 description = (file_metadata.get('description') or
113 file_metadata.get('dc:description'))
114 collection_slug = file_metadata.get('collection-slug')
115
116 license = file_metadata.get('license')
117 try:
118 json_ld_metadata = compact_and_validate(file_metadata)
119 except ValidationError as exc:
120 error = _(u"""Error with media '{media_id}' value '{error_path}': {error_msg}
121 Metadata was not uploaded.""".format(
122 media_id=media_id,
123 error_path=exc.path[0],
124 error_msg=exc.message))
125 print(error)
126 continue
127
128 url = urlparse(original_location)
129 filename = url.path.split()[-1]
130
131 if url.scheme == 'http':
132 res = requests.get(url.geturl(), stream=True)
133 media_file = res.raw
134
135 elif url.scheme == '':
136 path = url.path
137 if os.path.isabs(path):
138 file_abs_path = os.path.abspath(path)
139 else:
140 file_path = os.path.join(abs_metadata_dir, path)
141 file_abs_path = os.path.abspath(file_path)
142 try:
143 media_file = open(file_abs_path, 'rb')
144 except IOError:
145 print(_(u"""\
146 FAIL: Local file {filename} could not be accessed.
147 {filename} will not be uploaded.""".format(filename=filename)))
148 continue
149 try:
150 submit_media(
151 mg_app=app,
152 user=user,
153 submitted_file=media_file,
154 filename=filename,
155 title=maybe_unicodeify(title),
156 description=maybe_unicodeify(description),
157 collection_slug=maybe_unicodeify(collection_slug),
158 license=maybe_unicodeify(license),
159 metadata=json_ld_metadata,
160 tags_string=u"")
161 print(_(u"""Successfully submitted {filename}!
162 Be sure to look at the Media Processing Panel on your website to be sure it
163 uploaded successfully.""".format(filename=filename)))
164 files_uploaded += 1
165 except FileUploadLimit:
166 print(_(
167 u"FAIL: This file is larger than the upload limits for this site."))
168 except UserUploadLimit:
169 print(_(
170 "FAIL: This file will put this user past their upload limits."))
171 except UserPastUploadLimit:
172 print(_("FAIL: This user is already past their upload limits."))
173 print(_(
174 "{files_uploaded} out of {files_attempted} files successfully submitted".format(
175 files_uploaded=files_uploaded,
176 files_attempted=files_attempted)))
177
178
179 def unicode_csv_reader(unicode_csv_data, dialect=csv.excel, **kwargs):
180 # csv.py doesn't do Unicode; encode temporarily as UTF-8:
181 # TODO: this probably won't be necessary in Python 3
182 csv_reader = csv.reader(utf_8_encoder(unicode_csv_data),
183 dialect=dialect, **kwargs)
184 for row in csv_reader:
185 # decode UTF-8 back to Unicode, cell by cell:
186 yield [six.text_type(cell, 'utf-8') for cell in row]
187
188 def utf_8_encoder(unicode_csv_data):
189 for line in unicode_csv_data:
190 yield line.encode('utf-8')
191
192 def parse_csv_file(file_contents):
193 """
194 The helper function which converts the csv file into a dictionary where each
195 item's key is the provided value 'id' and each item's value is another
196 dictionary.
197 """
198 list_of_contents = file_contents.split('\n')
199 key, lines = (list_of_contents[0].split(','),
200 list_of_contents[1:])
201 objects_dict = {}
202
203 # Build a dictionary
204 for index, line in enumerate(lines):
205 if line.isspace() or line == u'': continue
206 if (sys.version_info[0] == 3):
207 # Python 3's csv.py supports Unicode out of the box.
208 reader = csv.reader([line])
209 else:
210 reader = unicode_csv_reader([line])
211 values = next(reader)
212 line_dict = dict([(key[i], val)
213 for i, val in enumerate(values)])
214 media_id = line_dict.get('id') or index
215 objects_dict[media_id] = (line_dict)
216
217 return objects_dict