expand_json utility... yet another convenience function :)
[mediagoblin.git] / mediagoblin / gmg_commands / batchaddmedia.py
CommitLineData
8aa01597 1# GNU MediaGoblin -- federated, autonomous media hosting
2# Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS.
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU Affero General Public License as published by
6# the Free Software Foundation, either version 3 of the License, or
7# (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Affero General Public License for more details.
13#
14# You should have received a copy of the GNU Affero General Public License
15# along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17import os
6fab7734 18import tempfile, tarfile, zipfile, subprocess, requests
268f2430 19from csv import reader as csv_reader
20from urlparse import urlparse
21from pyld import jsonld
8aa01597 22
23from mediagoblin.gmg_commands import util as commands_util
24from mediagoblin.submit.lib import (
25 submit_media, get_upload_file_limits,
26 FileUploadLimit, UserUploadLimit, UserPastUploadLimit)
6fab7734 27from mediagoblin.tools.metadata import compact_and_validate
8aa01597 28
26b3d6cf 29from jsonschema.exceptions import ValidationError
af3a9107 30
8aa01597 31
32def parser_setup(subparser):
28ecc53a 33 subparser.description = """\
34This command allows the administrator to upload many media files at once."""
8aa01597 35 subparser.add_argument(
36 'username',
28ecc53a 37 help="Name of user these media entries belong to")
8aa01597 38 subparser.add_argument(
268f2430 39 'target_path',
28ecc53a 40 help=("""\
41Path to a local archive or directory containing a "location.csv" and a
42"metadata.csv" file. These are csv (comma seperated value) files with the
43locations and metadata of the files to be uploaded. The location must be listed
44with either the URL of the remote media file or the filesystem path of a local
45file. The metadata should be provided with one column for each of the 15 Dublin
46Core properties (http://dublincore.org/documents/dces/). Both "location.csv" and
47"metadata.csv" must begin with a row demonstrating the order of the columns. We
48have provided an example of these files at <url to be added>
49"""))
8aa01597 50 subparser.add_argument(
51 '--celery',
52 action='store_true',
53 help="Don't process eagerly, pass off to celery")
54
55
56def batchaddmedia(args):
57 # Run eagerly unless explicetly set not to
58 if not args.celery:
59 os.environ['CELERY_ALWAYS_EAGER'] = 'true'
60
61 app = commands_util.setup_app(args)
62
5c14f62d 63 files_uploaded, files_attempted = 0, 0
64
8aa01597 65 # get the user
66 user = app.db.User.query.filter_by(username=args.username.lower()).first()
67 if user is None:
28ecc53a 68 print "Sorry, no user by username '%s' exists" % args.username
8aa01597 69 return
268f2430 70
71 upload_limit, max_file_size = get_upload_file_limits(user)
72 temp_files = []
73
5c14f62d 74 if os.path.isdir(args.target_path):
75 dir_path = args.target_path
76
77 elif tarfile.is_tarfile(args.target_path):
268f2430 78 dir_path = tempfile.mkdtemp()
79 temp_files.append(dir_path)
80 tar = tarfile.open(args.target_path)
81 tar.extractall(path=dir_path)
82
28ecc53a 83 elif zipfile.is_zipfile(args.target_path):
84 dir_path = tempfile.mkdtemp()
85 temp_files.append(dir_path)
86 zipped_file = zipfile.ZipFile(args.target_path)
87 zipped_file.extractall(path=dir_path)
88
28ecc53a 89 else:
90 print "Couldn't recognize the file. This script only accepts tar files,\
91zip files and directories"
92 if dir_path.endswith('/'):
93 dir_path = dir_path[:-1]
94
18a9c50d 95 location_file_path = os.path.join(dir_path,"location.csv")
96 metadata_file_path = os.path.join(dir_path, "metadata.csv")
28ecc53a 97
8aa01597 98 # check for the location file, if it exists...
268f2430 99 abs_location_filename = os.path.abspath(location_file_path)
8aa01597 100 if not os.path.exists(abs_location_filename):
268f2430 101 print "Can't find a file with filename '%s'" % location_file_path
8aa01597 102 return
103
268f2430 104 # check for the metadata file, if it exists...
268f2430 105 abs_metadata_filename = os.path.abspath(metadata_file_path)
8aa01597 106 if not os.path.exists(abs_metadata_filename):
268f2430 107 print "Can't find a file with filename '%s'" % metadata_file_path
8aa01597 108 return
109
110 upload_limit, max_file_size = get_upload_file_limits(user)
111
112 def maybe_unicodeify(some_string):
113 # this is kinda terrible
114 if some_string is None:
115 return None
116 else:
117 return unicode(some_string)
118
119 with file(abs_location_filename, 'r') as all_locations:
120 contents = all_locations.read()
121 media_locations = parse_csv_file(contents)
122
123 with file(abs_metadata_filename, 'r') as all_metadata:
124 contents = all_metadata.read()
125 media_metadata = parse_csv_file(contents)
126
268f2430 127 for media_id in media_locations.keys():
e46760d3 128 files_attempted += 1
129
6fab7734
CAW
130 file_metadata = media_metadata[media_id]
131 try:
132 json_ld_metadata = compact_and_validate(file_metadata)
133 except ValidationError, exc:
134 print "Error with '%s' value '%s': %s" % (
135 media_id, exc.path[0], exc.message)
136 continue
26b3d6cf 137
268f2430 138 original_location = media_locations[media_id]['media:original']
139 url = urlparse(original_location)
140
6fab7734
CAW
141 title = json_ld_metadata.get('dcterms:title')
142 description = json_ld_metadata.get('dcterms:description')
af3a9107 143
6fab7734 144 license = json_ld_metadata.get('license')
268f2430 145 filename = url.path.split()[-1]
268f2430 146
147 if url.scheme == 'http':
7ff99dab 148 res = requests.get(url.geturl(), stream=True)
ecea4847 149 media_file = res.raw
268f2430 150
151 elif url.scheme == '':
152 path = url.path
153 if os.path.isabs(path):
154 file_abs_path = os.path.abspath(path)
155 else:
18a9c50d 156 file_path = os.path.join(dir_path, path)
268f2430 157 file_abs_path = os.path.abspath(file_path)
158 try:
159 media_file = file(file_abs_path, 'r')
160 except IOError:
5c14f62d 161 print "\
162FAIL: Local file {filename} could not be accessed.".format(filename=filename)
268f2430 163 print "Skipping it."
164 continue
268f2430 165 try:
166 submit_media(
167 mg_app=app,
168 user=user,
169 submitted_file=media_file,
170 filename=filename,
171 title=maybe_unicodeify(title),
172 description=maybe_unicodeify(description),
173 license=maybe_unicodeify(license),
45f426dd 174 metadata=json_ld_metadata,
268f2430 175 tags_string=u"",
176 upload_limit=upload_limit, max_file_size=max_file_size)
177 print "Successfully uploading {filename}!".format(filename=filename)
178 print ""
5c14f62d 179 files_uploaded += 1
268f2430 180 except FileUploadLimit:
5c14f62d 181 print "FAIL: This file is larger than the upload limits for this site."
268f2430 182 except UserUploadLimit:
5c14f62d 183 print "FAIL: This file will put this user past their upload limits."
268f2430 184 except UserPastUploadLimit:
5c14f62d 185 print "FAIL: This user is already past their upload limits."
186 print "\
187{files_uploaded} out of {files_attempted} files successfully uploaded".format(
188 files_uploaded=files_uploaded,
189 files_attempted=files_attempted)
268f2430 190 teardown(temp_files)
191
28ecc53a 192
8aa01597 193def parse_csv_file(file_contents):
194 list_of_contents = file_contents.split('\n')
195 key, lines = (list_of_contents[0].split(','),
196 list_of_contents[1:])
268f2430 197 objects_dict = {}
8aa01597 198
8c7cccf6 199 # Build a dictionaryfrom mediagoblin.tools.translate import lazy_pass_to_ugettext as _
8aa01597 200 for line in lines:
201 if line.isspace() or line == '': continue
268f2430 202 values = csv_reader([line]).next()
203 line_dict = dict([(key[i], val)
8aa01597 204 for i, val in enumerate(values)])
268f2430 205 media_id = line_dict['media:id']
206 objects_dict[media_id] = (line_dict)
8aa01597 207
268f2430 208 return objects_dict
8aa01597 209
77d51d4f 210
268f2430 211def teardown(temp_files):
212 for temp_file in temp_files:
213 subprocess.call(['rm','-r',temp_file])