diff --git a/core/management/commands/cleanup_old_data.py b/core/management/commands/cleanup_old_data.py new file mode 100644 index 0000000..63b3f5e --- /dev/null +++ b/core/management/commands/cleanup_old_data.py @@ -0,0 +1,244 @@ +import os +from datetime import datetime, timedelta +from django.core.management.base import BaseCommand, CommandError +from django.conf import settings +from django.utils import timezone +from django.db import transaction +from cv.models import CVQuestionnaire, AIResponse + + +class Command(BaseCommand): + help = "Clean up old AI responses and orphaned resume files to save storage space" + + def add_arguments(self, parser): + parser.add_argument( + '--days', + type=int, + default=90, + help='Delete AI responses older than this many days (default: 90)' + ) + parser.add_argument( + '--dry-run', + action='store_true', + help='Show what would be deleted without actually deleting' + ) + parser.add_argument( + '--force', + action='store_true', + help='Skip confirmation prompts' + ) + parser.add_argument( + '--cleanup-files', + action='store_true', + help='Also clean up orphaned resume files' + ) + + def handle(self, *args, **options): + days = options['days'] + dry_run = options['dry_run'] + force = options['force'] + cleanup_files = options['cleanup_files'] + + if days <= 0: + raise CommandError('Days must be a positive integer') + + cutoff_date = timezone.now() - timedelta(days=days) + + self.stdout.write( + self.style.WARNING(f"Starting cleanup process...") + ) + self.stdout.write(f" Cutoff date: {cutoff_date.strftime('%Y-%m-%d %H:%M:%S')}") + + if dry_run: + self.stdout.write( + self.style.NOTICE(" Running in DRY-RUN mode - no changes will be made") + ) + + # Clean up old AI responses + self._cleanup_ai_responses(cutoff_date, dry_run, force) + + # Clean up orphaned files if requested + if cleanup_files: + self._cleanup_orphaned_files(dry_run, force) + + self.stdout.write( + self.style.SUCCESS("Cleanup process completed successfully!") + ) + + def _cleanup_ai_responses(self, cutoff_date, dry_run, force): + """Delete AI responses older than cutoff date""" + self.stdout.write("\nAnalyzing AI responses...") + + # Find old AI responses + old_responses = AIResponse.objects.filter(created_at__lt=cutoff_date) + count = old_responses.count() + + if count == 0: + self.stdout.write(" No old AI responses found.") + return + + self.stdout.write(f" Found {count} AI response(s) older than cutoff date") + + if dry_run: + self.stdout.write(" [DRY-RUN] Would delete these AI responses:") + for response in old_responses[:10]: # Show first 10 + self.stdout.write(f" - Response {response.id} from {response.created_at}") + if count > 10: + self.stdout.write(f" ... and {count - 10} more") + return + + # Confirm deletion unless force is used + if not force: + confirm = input(f"\nWarning: Delete {count} AI response(s)? [y/N]: ") + if confirm.lower() not in ['y', 'yes']: + self.stdout.write(" Skipped AI response cleanup.") + return + + # Delete AI responses + try: + with transaction.atomic(): + deleted_count, _ = old_responses.delete() + self.stdout.write( + self.style.SUCCESS(f" Deleted {deleted_count} AI response(s)") + ) + except Exception as e: + self.stdout.write( + self.style.ERROR(f" Error deleting AI responses: {str(e)}") + ) + + def _cleanup_orphaned_files(self, dry_run, force): + """Remove orphaned resume files that no longer have associated questionnaires""" + self.stdout.write("\nAnalyzing uploaded files...") + + # Get the resumes directory path + media_root = settings.MEDIA_ROOT + resumes_dir = os.path.join(media_root, 'resumes') + + if not os.path.exists(resumes_dir): + self.stdout.write(" Resumes directory does not exist.") + return + + # Get all files in resumes directory + try: + all_files = [] + for root, dirs, files in os.walk(resumes_dir): + for file in files: + file_path = os.path.join(root, file) + relative_path = os.path.relpath(file_path, media_root) + all_files.append((file_path, relative_path)) + except Exception as e: + self.stdout.write( + self.style.ERROR(f" Error reading resumes directory: {str(e)}") + ) + return + + if not all_files: + self.stdout.write(" No files found in resumes directory.") + return + + self.stdout.write(f" Found {len(all_files)} file(s) in resumes directory") + + # Get all resume file paths from questionnaires + active_resume_paths = set() + questionnaires_with_resumes = CVQuestionnaire.objects.exclude(resume='').exclude(resume__isnull=True) + + for questionnaire in questionnaires_with_resumes: + if questionnaire.resume: + # Convert to relative path for comparison + resume_path = questionnaire.resume.name + active_resume_paths.add(resume_path) + + self.stdout.write(f" Found {len(active_resume_paths)} active resume reference(s)") + + # Find orphaned files + orphaned_files = [] + total_size = 0 + + for file_path, relative_path in all_files: + if relative_path not in active_resume_paths: + try: + file_size = os.path.getsize(file_path) + orphaned_files.append((file_path, relative_path, file_size)) + total_size += file_size + except OSError: + # File might have been deleted or is inaccessible + continue + + if not orphaned_files: + self.stdout.write(" No orphaned files found.") + return + + self.stdout.write(f" Found {len(orphaned_files)} orphaned file(s)") + self.stdout.write(f" Total size: {self._format_file_size(total_size)}") + + if dry_run: + self.stdout.write(" [DRY-RUN] Would delete these orphaned files:") + for file_path, relative_path, file_size in orphaned_files[:10]: + self.stdout.write(f" - {relative_path} ({self._format_file_size(file_size)})") + if len(orphaned_files) > 10: + self.stdout.write(f" ... and {len(orphaned_files) - 10} more") + return + + # Confirm deletion unless force is used + if not force: + confirm = input(f"\nWarning: Delete {len(orphaned_files)} orphaned file(s) ({self._format_file_size(total_size)})? [y/N]: ") + if confirm.lower() not in ['y', 'yes']: + self.stdout.write(" Skipped orphaned files cleanup.") + return + + # Delete orphaned files + deleted_count = 0 + deleted_size = 0 + + for file_path, relative_path, file_size in orphaned_files: + try: + os.remove(file_path) + deleted_count += 1 + deleted_size += file_size + self.stdout.write(f" Deleted: {relative_path}") + except OSError as e: + self.stdout.write( + self.style.WARNING(f" Failed to delete {relative_path}: {str(e)}") + ) + + self.stdout.write( + self.style.SUCCESS( + f" Deleted {deleted_count} orphaned file(s) " + f"({self._format_file_size(deleted_size)} freed)" + ) + ) + + # Clean up empty directories + self._cleanup_empty_directories(resumes_dir) + + def _cleanup_empty_directories(self, directory): + """Remove empty directories within the resumes directory""" + try: + for root, dirs, files in os.walk(directory, topdown=False): + for dir_name in dirs: + dir_path = os.path.join(root, dir_name) + try: + if not os.listdir(dir_path): # Directory is empty + os.rmdir(dir_path) + self.stdout.write(f" Removed empty directory: {dir_path}") + except OSError: + pass # Directory not empty or other error + except Exception as e: + self.stdout.write( + self.style.WARNING(f" Warning: Could not clean up empty directories: {str(e)}") + ) + + def _format_file_size(self, size_bytes): + """Format file size in human readable format""" + if size_bytes == 0: + return "0 B" + + size_names = ["B", "KB", "MB", "GB"] + i = 0 + size = float(size_bytes) + + while size >= 1024.0 and i < len(size_names) - 1: + size /= 1024.0 + i += 1 + + return f"{size:.1f} {size_names[i]}" \ No newline at end of file diff --git a/core/management/commands/create_test_data.py b/core/management/commands/create_test_data.py new file mode 100644 index 0000000..99b099a --- /dev/null +++ b/core/management/commands/create_test_data.py @@ -0,0 +1,74 @@ +from datetime import datetime, timedelta +from django.core.management.base import BaseCommand +from django.utils import timezone +from django.contrib.auth import get_user_model +from cv.models import CVQuestionnaire, AIResponse +from django.core.files.uploadedfile import SimpleUploadedFile + +User = get_user_model() + + +class Command(BaseCommand): + help = "Create test data for cleanup command testing" + + def add_arguments(self, parser): + parser.add_argument( + '--count', + type=int, + default=5, + help='Number of old records to create (default: 5)' + ) + + def handle(self, *args, **options): + count = options['count'] + + self.stdout.write("🔧 Creating test data for cleanup testing...") + + # Get or create a test user + user, created = User.objects.get_or_create( + username='cleanup_test_user', + defaults={ + 'email': 'test@example.com', + 'first_name': 'Test', + 'last_name': 'User' + } + ) + + if created: + user.set_password('testpass123') + user.save() + self.stdout.write(" Created test user") + + # Create old questionnaires and AI responses + old_date = timezone.now() - timedelta(days=100) + + for i in range(count): + # Create questionnaire + questionnaire = CVQuestionnaire.objects.create( + user=user, + position=f'Test Position {i+1}', + industry='Technology', + experience_level='3-5', + company_size='medium', + location='Remote', + application_timeline='1-3 months', + job_description=f'Test job description {i+1}' + ) + + # Create AI response with old date + ai_response = AIResponse.objects.create( + questionnaire=questionnaire, + response_text=f'This is a test AI response {i+1} that should be cleaned up.' + ) + + # Manually update the created_at to be old + AIResponse.objects.filter(id=ai_response.id).update(created_at=old_date) + + self.stdout.write(f" Created test questionnaire and AI response {i+1}") + + self.stdout.write( + self.style.SUCCESS(f"Created {count} test records dated {old_date.strftime('%Y-%m-%d')}") + ) + self.stdout.write( + self.style.NOTICE("You can now test the cleanup command with --dry-run") + ) \ No newline at end of file diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh old mode 100644 new mode 100755