Commit | Line | Data |
---|---|---|
8aa01597 | 1 | # GNU MediaGoblin -- federated, autonomous media hosting |
2 | # Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS. | |
3 | # | |
4 | # This program is free software: you can redistribute it and/or modify | |
5 | # it under the terms of the GNU Affero General Public License as published by | |
6 | # the Free Software Foundation, either version 3 of the License, or | |
7 | # (at your option) any later version. | |
8 | # | |
9 | # This program is distributed in the hope that it will be useful, | |
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | # GNU Affero General Public License for more details. | |
13 | # | |
14 | # You should have received a copy of the GNU Affero General Public License | |
15 | # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
16 | ||
17 | import os | |
6fab7734 | 18 | import tempfile, tarfile, zipfile, subprocess, requests |
268f2430 | 19 | from csv import reader as csv_reader |
20 | from urlparse import urlparse | |
21 | from pyld import jsonld | |
8aa01597 | 22 | |
23 | from mediagoblin.gmg_commands import util as commands_util | |
24 | from mediagoblin.submit.lib import ( | |
25 | submit_media, get_upload_file_limits, | |
26 | FileUploadLimit, UserUploadLimit, UserPastUploadLimit) | |
6fab7734 | 27 | from mediagoblin.tools.metadata import compact_and_validate |
8aa01597 | 28 | |
26b3d6cf | 29 | from jsonschema.exceptions import ValidationError |
af3a9107 | 30 | |
8aa01597 | 31 | |
32 | def parser_setup(subparser): | |
28ecc53a | 33 | subparser.description = """\ |
34 | This command allows the administrator to upload many media files at once.""" | |
8aa01597 | 35 | subparser.add_argument( |
36 | 'username', | |
28ecc53a | 37 | help="Name of user these media entries belong to") |
8aa01597 | 38 | subparser.add_argument( |
268f2430 | 39 | 'target_path', |
28ecc53a | 40 | help=("""\ |
41 | Path to a local archive or directory containing a "location.csv" and a | |
42 | "metadata.csv" file. These are csv (comma seperated value) files with the | |
43 | locations and metadata of the files to be uploaded. The location must be listed | |
44 | with either the URL of the remote media file or the filesystem path of a local | |
45 | file. The metadata should be provided with one column for each of the 15 Dublin | |
46 | Core properties (http://dublincore.org/documents/dces/). Both "location.csv" and | |
47 | "metadata.csv" must begin with a row demonstrating the order of the columns. We | |
48 | have provided an example of these files at <url to be added> | |
49 | """)) | |
8aa01597 | 50 | subparser.add_argument( |
51 | '--celery', | |
52 | action='store_true', | |
53 | help="Don't process eagerly, pass off to celery") | |
54 | ||
55 | ||
56 | def batchaddmedia(args): | |
57 | # Run eagerly unless explicetly set not to | |
58 | if not args.celery: | |
59 | os.environ['CELERY_ALWAYS_EAGER'] = 'true' | |
60 | ||
61 | app = commands_util.setup_app(args) | |
62 | ||
5c14f62d | 63 | files_uploaded, files_attempted = 0, 0 |
64 | ||
8aa01597 | 65 | # get the user |
66 | user = app.db.User.query.filter_by(username=args.username.lower()).first() | |
67 | if user is None: | |
28ecc53a | 68 | print "Sorry, no user by username '%s' exists" % args.username |
8aa01597 | 69 | return |
268f2430 | 70 | |
71 | upload_limit, max_file_size = get_upload_file_limits(user) | |
72 | temp_files = [] | |
73 | ||
5c14f62d | 74 | if os.path.isdir(args.target_path): |
75 | dir_path = args.target_path | |
76 | ||
77 | elif tarfile.is_tarfile(args.target_path): | |
268f2430 | 78 | dir_path = tempfile.mkdtemp() |
79 | temp_files.append(dir_path) | |
80 | tar = tarfile.open(args.target_path) | |
81 | tar.extractall(path=dir_path) | |
82 | ||
28ecc53a | 83 | elif zipfile.is_zipfile(args.target_path): |
84 | dir_path = tempfile.mkdtemp() | |
85 | temp_files.append(dir_path) | |
86 | zipped_file = zipfile.ZipFile(args.target_path) | |
87 | zipped_file.extractall(path=dir_path) | |
88 | ||
28ecc53a | 89 | else: |
90 | print "Couldn't recognize the file. This script only accepts tar files,\ | |
91 | zip files and directories" | |
92 | if dir_path.endswith('/'): | |
93 | dir_path = dir_path[:-1] | |
94 | ||
18a9c50d | 95 | location_file_path = os.path.join(dir_path,"location.csv") |
96 | metadata_file_path = os.path.join(dir_path, "metadata.csv") | |
28ecc53a | 97 | |
8aa01597 | 98 | # check for the location file, if it exists... |
268f2430 | 99 | abs_location_filename = os.path.abspath(location_file_path) |
8aa01597 | 100 | if not os.path.exists(abs_location_filename): |
268f2430 | 101 | print "Can't find a file with filename '%s'" % location_file_path |
8aa01597 | 102 | return |
103 | ||
268f2430 | 104 | # check for the metadata file, if it exists... |
268f2430 | 105 | abs_metadata_filename = os.path.abspath(metadata_file_path) |
8aa01597 | 106 | if not os.path.exists(abs_metadata_filename): |
268f2430 | 107 | print "Can't find a file with filename '%s'" % metadata_file_path |
8aa01597 | 108 | return |
109 | ||
110 | upload_limit, max_file_size = get_upload_file_limits(user) | |
111 | ||
112 | def maybe_unicodeify(some_string): | |
113 | # this is kinda terrible | |
114 | if some_string is None: | |
115 | return None | |
116 | else: | |
117 | return unicode(some_string) | |
118 | ||
119 | with file(abs_location_filename, 'r') as all_locations: | |
120 | contents = all_locations.read() | |
121 | media_locations = parse_csv_file(contents) | |
122 | ||
123 | with file(abs_metadata_filename, 'r') as all_metadata: | |
124 | contents = all_metadata.read() | |
125 | media_metadata = parse_csv_file(contents) | |
126 | ||
268f2430 | 127 | for media_id in media_locations.keys(): |
e46760d3 | 128 | files_attempted += 1 |
129 | ||
6fab7734 CAW |
130 | file_metadata = media_metadata[media_id] |
131 | try: | |
132 | json_ld_metadata = compact_and_validate(file_metadata) | |
133 | except ValidationError, exc: | |
134 | print "Error with '%s' value '%s': %s" % ( | |
135 | media_id, exc.path[0], exc.message) | |
136 | continue | |
26b3d6cf | 137 | |
268f2430 | 138 | original_location = media_locations[media_id]['media:original'] |
139 | url = urlparse(original_location) | |
140 | ||
6fab7734 CAW |
141 | title = json_ld_metadata.get('dcterms:title') |
142 | description = json_ld_metadata.get('dcterms:description') | |
af3a9107 | 143 | |
6fab7734 | 144 | license = json_ld_metadata.get('license') |
268f2430 | 145 | filename = url.path.split()[-1] |
268f2430 | 146 | |
147 | if url.scheme == 'http': | |
7ff99dab | 148 | res = requests.get(url.geturl(), stream=True) |
ecea4847 | 149 | media_file = res.raw |
268f2430 | 150 | |
151 | elif url.scheme == '': | |
152 | path = url.path | |
153 | if os.path.isabs(path): | |
154 | file_abs_path = os.path.abspath(path) | |
155 | else: | |
18a9c50d | 156 | file_path = os.path.join(dir_path, path) |
268f2430 | 157 | file_abs_path = os.path.abspath(file_path) |
158 | try: | |
159 | media_file = file(file_abs_path, 'r') | |
160 | except IOError: | |
5c14f62d | 161 | print "\ |
162 | FAIL: Local file {filename} could not be accessed.".format(filename=filename) | |
268f2430 | 163 | print "Skipping it." |
164 | continue | |
268f2430 | 165 | try: |
166 | submit_media( | |
167 | mg_app=app, | |
168 | user=user, | |
169 | submitted_file=media_file, | |
170 | filename=filename, | |
171 | title=maybe_unicodeify(title), | |
172 | description=maybe_unicodeify(description), | |
173 | license=maybe_unicodeify(license), | |
45f426dd | 174 | metadata=json_ld_metadata, |
268f2430 | 175 | tags_string=u"", |
176 | upload_limit=upload_limit, max_file_size=max_file_size) | |
177 | print "Successfully uploading {filename}!".format(filename=filename) | |
178 | print "" | |
5c14f62d | 179 | files_uploaded += 1 |
268f2430 | 180 | except FileUploadLimit: |
5c14f62d | 181 | print "FAIL: This file is larger than the upload limits for this site." |
268f2430 | 182 | except UserUploadLimit: |
5c14f62d | 183 | print "FAIL: This file will put this user past their upload limits." |
268f2430 | 184 | except UserPastUploadLimit: |
5c14f62d | 185 | print "FAIL: This user is already past their upload limits." |
186 | print "\ | |
187 | {files_uploaded} out of {files_attempted} files successfully uploaded".format( | |
188 | files_uploaded=files_uploaded, | |
189 | files_attempted=files_attempted) | |
268f2430 | 190 | teardown(temp_files) |
191 | ||
28ecc53a | 192 | |
8aa01597 | 193 | def parse_csv_file(file_contents): |
194 | list_of_contents = file_contents.split('\n') | |
195 | key, lines = (list_of_contents[0].split(','), | |
196 | list_of_contents[1:]) | |
268f2430 | 197 | objects_dict = {} |
8aa01597 | 198 | |
8c7cccf6 | 199 | # Build a dictionaryfrom mediagoblin.tools.translate import lazy_pass_to_ugettext as _ |
8aa01597 | 200 | for line in lines: |
201 | if line.isspace() or line == '': continue | |
268f2430 | 202 | values = csv_reader([line]).next() |
203 | line_dict = dict([(key[i], val) | |
8aa01597 | 204 | for i, val in enumerate(values)]) |
268f2430 | 205 | media_id = line_dict['media:id'] |
206 | objects_dict[media_id] = (line_dict) | |
8aa01597 | 207 | |
268f2430 | 208 | return objects_dict |
8aa01597 | 209 | |
77d51d4f | 210 | |
268f2430 | 211 | def teardown(temp_files): |
212 | for temp_file in temp_files: | |
213 | subprocess.call(['rm','-r',temp_file]) |