Add garbage collection task
authorJessica Tallon <jessica@megworld.co.uk>
Tue, 15 Jul 2014 20:24:25 +0000 (21:24 +0100)
committerJessica Tallon <jessica@megworld.co.uk>
Tue, 22 Jul 2014 22:13:16 +0000 (23:13 +0100)
mediagoblin.ini
mediagoblin/federation/routing.py
mediagoblin/federation/task.py [new file with mode: 0755]
mediagoblin/federation/views.py
mediagoblin/init/celery/__init__.py

index 5e2477a4b533588839240fd278b3053d6c7d8bce..6ccfa4f78599ed47415433f3e90ce8bf33e5a49d 100644 (file)
@@ -23,6 +23,10 @@ allow_registration = true
 # Set to false to disable the ability for users to report offensive content
 allow_reporting = true
 
+# Frequency garbage collection will run (setting to 0 or false to disable)
+# Setting units are minutes.
+garbage_collection = 60
+
 ## Uncomment this to put some user-overriding templates here
 # local_templates = %(here)s/user_dev/templates/
 
index 544edc689d5463c4ab6e77360b46168669287214..c5fa5ce82c87c650042d5c76fd13d081ebcbd1ab 100644 (file)
@@ -21,32 +21,32 @@ add_route(
     "mediagoblin.federation.user",
     "/api/user/<string:username>/",
     "mediagoblin.federation.views:user"
-    )
+)
 
 add_route(
     "mediagoblin.federation.user.profile",
     "/api/user/<string:username>/profile",
     "mediagoblin.federation.views:profile"
-    )
+)
 
 # Inbox and Outbox (feed)
 add_route(
     "mediagoblin.federation.feed",
     "/api/user/<string:username>/feed",
     "mediagoblin.federation.views:feed"
-    )
+)
 
 add_route(
     "mediagoblin.federation.user.uploads",
     "/api/user/<string:username>/uploads",
     "mediagoblin.federation.views:uploads"
-    )
+)
 
 add_route(
     "mediagoblin.federation.inbox",
     "/api/user/<string:username>/inbox",
     "mediagoblin.federation.views:feed"
-    )
+)
 
 # object endpoints
 add_route(
@@ -58,22 +58,22 @@ add_route(
     "mediagoblin.federation.object.comments",
     "/api/<string:objectType>/<string:uuid>/comments",
     "mediagoblin.federation.views:object_comments"
-    )
+)
 
 add_route(
     "mediagoblin.webfinger.well-known.host-meta",
     "/.well-known/host-meta",
     "mediagoblin.federation.views:host_meta"
-    )
+)
 
 add_route(
     "mediagoblin.webfinger.well-known.host-meta.json",
     "/.well-known/host-meta.json",
     "mediagoblin.federation.views:host_meta"
-    )
+)
 
 add_route(
     "mediagoblin.webfinger.whoami",
     "/api/whoami",
     "mediagoblin.federation.views:whoami"
-    )
+)
diff --git a/mediagoblin/federation/task.py b/mediagoblin/federation/task.py
new file mode 100755 (executable)
index 0000000..1d42e85
--- /dev/null
@@ -0,0 +1,49 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import celery
+import datetime
+import logging
+import pytz
+
+from mediagoblin.db.models import MediaEntry
+
+_log = logging.getLogger(__name__)
+logging.basicConfig()
+_log.setLevel(logging.DEBUG)
+
+@celery.task()
+def collect_garbage():
+       """
+               Garbage collection to clean up media
+
+               This will look for all critera on models to clean
+               up. This is primerally written to clean up media that's
+               entered a erroneous state.
+       """
+       _log.info("Garbage collection is running.")
+       now = datetime.datetime.now(pytz.UTC) - datetime.timedelta(days=1)
+
+       garbage = MediaEntry.query.filter(MediaEntry.created > now)
+       garbage = garbage.filter(MediaEntry.state == "unprocessed")
+
+       for entry in garbage.all():
+               _log.info("Garbage media found with ID '{0}'".format(entry.id))
+               entry.delete()
+
+
+
+
index af81cbcb9eed2e531edc32596a2ff65b92871868..c383b3ef5e2275e671564de8c112e712ae145362 100644 (file)
@@ -1,3 +1,19 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
 import json
 import io
 import mimetypes
@@ -135,7 +151,7 @@ def feed(request):
                 media.license = obj["license"]
 
             media.save()
-            manager = media.media_manager.api_add_to_feed(request, media)
+            media.media_manager.api_add_to_feed(request, media)
 
             return json_response({
                 "verb": "post",
@@ -263,7 +279,7 @@ def feed(request):
             "actor": request.user.serialize(request),
             "content": "{0} posted a picture".format(request.user.username),
             "id": 1,
-            })
+        })
         feed["items"][-1]["updated"] = feed["items"][-1]["object"]["updated"]
         feed["items"][-1]["published"] = feed["items"][-1]["object"]["published"]
         feed["items"][-1]["url"] = feed["items"][-1]["object"]["url"]
@@ -319,7 +335,6 @@ def object_comments(request):
 
     return response
 
-
 ##
 # Well known
 ##
@@ -331,19 +346,19 @@ def host_meta(request):
     links.append({
         "ref": "registration_endpoint",
         "href": request.urlgen("mediagoblin.oauth.client_register", qualified=True),
-        })
+    })
     links.append({
         "ref": "http://apinamespace.org/oauth/request_token",
         "href": request.urlgen("mediagoblin.oauth.request_token", qualified=True),
-        })
+    })
     links.append({
         "ref": "http://apinamespace.org/oauth/authorize",
         "href": request.urlgen("mediagoblin.oauth.authorize", qualified=True),
-        })
+    })
     links.append({
         "ref": "http://apinamespace.org/oauth/access_token",
         "href": request.urlgen("mediagoblin.oauth.access_token", qualified=True),
-        })
+    })
 
     return json_response({"links": links})
 
@@ -353,6 +368,6 @@ def whoami(request):
         "mediagoblin.federation.user.profile",
         username=request.user.username,
         qualified=True
-        )
+    )
 
     return redirect(request, location=profile)
index 57242bf6bd7a7f2116fa7c7b82db73d2c68c9f96..214d00c3c961456a9b7dad35c500c1f4090ce2bc 100644 (file)
@@ -16,6 +16,7 @@
 
 import os
 import sys
+import datetime
 import logging
 
 from celery import Celery
@@ -58,6 +59,18 @@ def get_celery_settings_dict(app_config, global_config,
         celery_settings['CELERY_ALWAYS_EAGER'] = True
         celery_settings['CELERY_EAGER_PROPAGATES_EXCEPTIONS'] = True
 
+    # Garbage collection periodic task
+    frequency = app_config.get('garbage_collection', 60)
+    if frequency:
+        frequency = int(app_config['garbage_collection'])
+        celery_settings['CELERYBEAT_SCHEDULE'] = {
+            'garbage-collection': {
+                'task': 'mediagoblin.federation.task.garbage_collection',
+                'schedule': datetime.timedelta(minutes=frequency),
+            }
+        }
+        celery_settings['BROKER_HEARTBEAT'] = 1
+
     return celery_settings