1 # GNU MediaGoblin -- federated, autonomous media hosting
2 # Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS.
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
17 from __future__
import print_function
27 from six
.moves
.urllib
.parse
import urlparse
29 from mediagoblin
.db
.models
import LocalUser
30 from mediagoblin
.gmg_commands
import util
as commands_util
31 from mediagoblin
.submit
.lib
import (
32 submit_media
, FileUploadLimit
, UserUploadLimit
, UserPastUploadLimit
)
33 from mediagoblin
.tools
.metadata
import compact_and_validate
34 from mediagoblin
.tools
.translate
import pass_to_ugettext
as _
35 from jsonschema
.exceptions
import ValidationError
38 def parser_setup(subparser
):
39 subparser
.description
= """\
40 This command allows the administrator to upload many media files at once."""
41 subparser
.epilog
= _(u
"""For more information about how to properly run this
42 script (and how to format the metadata csv file), read the MediaGoblin
43 documentation page on command line uploading
44 <http://docs.mediagoblin.org/siteadmin/commandline-upload.html>""")
45 subparser
.add_argument(
47 help=_(u
"Name of user these media entries belong to"))
48 subparser
.add_argument(
51 u
"""Path to the csv file containing metadata information."""))
52 subparser
.add_argument(
55 help=_(u
"Don't process eagerly, pass off to celery"))
58 def batchaddmedia(args
):
59 # Run eagerly unless explicetly set not to
61 os
.environ
['CELERY_ALWAYS_EAGER'] = 'true'
63 app
= commands_util
.setup_app(args
)
65 files_uploaded
, files_attempted
= 0, 0
68 user
= app
.db
.LocalUser
.query
.filter(
69 LocalUser
.username
==args
.username
.lower()
72 print(_(u
"Sorry, no user by username '{username}' exists".format(
73 username
=args
.username
)))
76 if os
.path
.isfile(args
.metadata_path
):
77 metadata_path
= args
.metadata_path
80 error
= _(u
'File at {path} not found, use -h flag for help'.format(
81 path
=args
.metadata_path
))
85 abs_metadata_filename
= os
.path
.abspath(metadata_path
)
86 abs_metadata_dir
= os
.path
.dirname(abs_metadata_filename
)
88 def maybe_unicodeify(some_string
):
89 # this is kinda terrible
90 if some_string
is None:
93 return six
.text_type(some_string
)
96 abs_metadata_filename
, 'r', encoding
='utf-8') as all_metadata
:
97 contents
= all_metadata
.read()
98 media_metadata
= parse_csv_file(contents
)
100 for media_id
, file_metadata
in media_metadata
.items():
102 # In case the metadata was not uploaded initialize an empty dictionary.
103 json_ld_metadata
= compact_and_validate({})
105 # Get all metadata entries starting with 'media' as variables and then
106 # delete them because those are for internal use only.
107 original_location
= file_metadata
['location']
109 ### Pull the important media information for mediagoblin from the
110 ### metadata, if it is provided.
111 title
= file_metadata
.get('title') or file_metadata
.get('dc:title')
112 description
= (file_metadata
.get('description') or
113 file_metadata
.get('dc:description'))
114 collection_slug
= file_metadata
.get('collection-slug')
116 license
= file_metadata
.get('license')
118 json_ld_metadata
= compact_and_validate(file_metadata
)
119 except ValidationError
as exc
:
120 error
= _(u
"""Error with media '{media_id}' value '{error_path}': {error_msg}
121 Metadata was not uploaded.""".format(
123 error_path
=exc
.path
[0],
124 error_msg
=exc
.message
))
128 url
= urlparse(original_location
)
129 filename
= url
.path
.split()[-1]
131 if url
.scheme
== 'http':
132 res
= requests
.get(url
.geturl(), stream
=True)
135 elif url
.scheme
== '':
137 if os
.path
.isabs(path
):
138 file_abs_path
= os
.path
.abspath(path
)
140 file_path
= os
.path
.join(abs_metadata_dir
, path
)
141 file_abs_path
= os
.path
.abspath(file_path
)
143 media_file
= open(file_abs_path
, 'rb')
146 FAIL: Local file {filename} could not be accessed.
147 {filename} will not be uploaded.""".format(filename
=filename
)))
153 submitted_file
=media_file
,
155 title
=maybe_unicodeify(title
),
156 description
=maybe_unicodeify(description
),
157 collection_slug
=maybe_unicodeify(collection_slug
),
158 license
=maybe_unicodeify(license
),
159 metadata
=json_ld_metadata
,
161 print(_(u
"""Successfully submitted {filename}!
162 Be sure to look at the Media Processing Panel on your website to be sure it
163 uploaded successfully.""".format(filename
=filename
)))
165 except FileUploadLimit
:
167 u
"FAIL: This file is larger than the upload limits for this site."))
168 except UserUploadLimit
:
170 "FAIL: This file will put this user past their upload limits."))
171 except UserPastUploadLimit
:
172 print(_("FAIL: This user is already past their upload limits."))
174 "{files_uploaded} out of {files_attempted} files successfully submitted".format(
175 files_uploaded
=files_uploaded
,
176 files_attempted
=files_attempted
)))
179 def unicode_csv_reader(unicode_csv_data
, dialect
=csv
.excel
, **kwargs
):
180 # csv.py doesn't do Unicode; encode temporarily as UTF-8:
181 # TODO: this probably won't be necessary in Python 3
182 csv_reader
= csv
.reader(utf_8_encoder(unicode_csv_data
),
183 dialect
=dialect
, **kwargs
)
184 for row
in csv_reader
:
185 # decode UTF-8 back to Unicode, cell by cell:
186 yield [six
.text_type(cell
, 'utf-8') for cell
in row
]
188 def utf_8_encoder(unicode_csv_data
):
189 for line
in unicode_csv_data
:
190 yield line
.encode('utf-8')
192 def parse_csv_file(file_contents
):
194 The helper function which converts the csv file into a dictionary where each
195 item's key is the provided value 'id' and each item's value is another
198 list_of_contents
= file_contents
.split('\n')
199 key
, lines
= (list_of_contents
[0].split(','),
200 list_of_contents
[1:])
204 for index
, line
in enumerate(lines
):
205 if line
.isspace() or line
== u
'': continue
206 if (sys
.version_info
[0] == 3):
207 # Python 3's csv.py supports Unicode out of the box.
208 reader
= csv
.reader([line
])
210 reader
= unicode_csv_reader([line
])
211 values
= next(reader
)
212 line_dict
= dict([(key
[i
], val
)
213 for i
, val
in enumerate(values
)])
214 media_id
= line_dict
.get('id') or index
215 objects_dict
[media_id
] = (line_dict
)