From 446cece470328610916f40c5dd58889e15fe1648 Mon Sep 17 00:00:00 2001 From: Natalie Foust-Pilcher Date: Sat, 21 Jun 2014 15:26:23 -0400 Subject: [PATCH] Used the codecs library to read the csv file in batchaddmedia as unicode. --- mediagoblin/gmg_commands/batchaddmedia.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/mediagoblin/gmg_commands/batchaddmedia.py b/mediagoblin/gmg_commands/batchaddmedia.py index b7f2569c..4931bda2 100644 --- a/mediagoblin/gmg_commands/batchaddmedia.py +++ b/mediagoblin/gmg_commands/batchaddmedia.py @@ -15,8 +15,8 @@ # along with this program. If not, see . import os -import requests -from csv import reader as csv_reader +import requests, codecs +import csv from urlparse import urlparse from mediagoblin.gmg_commands import util as commands_util @@ -87,7 +87,8 @@ def batchaddmedia(args): else: return unicode(some_string) - with file(abs_metadata_filename, 'r') as all_metadata: + with codecs.open( + abs_metadata_filename, 'r', encoding='utf-8') as all_metadata: contents = all_metadata.read() media_metadata = parse_csv_file(contents) @@ -169,6 +170,18 @@ u"FAIL: This file is larger than the upload limits for this site.") files_attempted=files_attempted)) +def unicode_csv_reader(unicode_csv_data, dialect=csv.excel, **kwargs): + # csv.py doesn't do Unicode; encode temporarily as UTF-8: + csv_reader = csv.reader(utf_8_encoder(unicode_csv_data), + dialect=dialect, **kwargs) + for row in csv_reader: + # decode UTF-8 back to Unicode, cell by cell: + yield [unicode(cell, 'utf-8') for cell in row] + +def utf_8_encoder(unicode_csv_data): + for line in unicode_csv_data: + yield line.encode('utf-8') + def parse_csv_file(file_contents): """ The helper function which converts the csv file into a dictionary where each @@ -182,8 +195,8 @@ def parse_csv_file(file_contents): # Build a dictionary for index, line in enumerate(lines): - if line.isspace() or line == '': continue - values = csv_reader([line]).next() + if line.isspace() or line == u'': continue + values = unicode_csv_reader([line]).next() line_dict = dict([(key[i], val) for i, val in enumerate(values)]) media_id = line_dict.get('id') or index -- 2.25.1