From f6655097c62efd69bdc014d0e50aa1199b987c17 Mon Sep 17 00:00:00 2001 From: Vlad0n20 Date: Wed, 29 Oct 2025 14:35:49 +0200 Subject: [PATCH 1/2] Update fix --- osf/models/preprint.py | 8 ++++++-- tests/test_preprints.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/osf/models/preprint.py b/osf/models/preprint.py index 864f5558e8b..9e5be2b038d 100644 --- a/osf/models/preprint.py +++ b/osf/models/preprint.py @@ -383,14 +383,18 @@ def create(cls, provider, title, creator, description, manual_guid=None, manual_ def get_last_not_rejected_version(self): """Get the last version that is not rejected. + Returns None if all versions are rejected. """ - return self.get_guid().versions.filter(is_rejected=False).order_by('-version').first().referent + last_not_rejected = self.get_guid().versions.filter(is_rejected=False).order_by('-version').first() + return last_not_rejected.referent if last_not_rejected else None def has_unpublished_pending_version(self): """Check if preprint has pending unpublished version. Note: use `.check_unfinished_or_unpublished_version()` if checking both types """ last_not_rejected_version = self.get_last_not_rejected_version() + if not last_not_rejected_version: + return False return not last_not_rejected_version.date_published and last_not_rejected_version.machine_state == 'pending' def has_initiated_but_unfinished_version(self): @@ -801,7 +805,7 @@ def date_created_first_version(self): if not base_guid: return self.created - first_version = base_guid.versions.filter(is_rejected=False).order_by('version').first() + first_version = base_guid.versions.order_by('version').first() if first_version and first_version.referent: return first_version.referent.created diff --git a/tests/test_preprints.py b/tests/test_preprints.py index 13d44d362b5..c71199b49b3 100644 --- a/tests/test_preprints.py +++ b/tests/test_preprints.py @@ -2701,6 +2701,38 @@ def test_preprint_version_withdrawal_request_post_mod(self, make_withdrawal_requ assert new_version.is_published is True assert new_version.machine_state == ReviewStates.WITHDRAWN.value + def test_date_created_first_version_with_rejected_v1(self, creator, moderator): + v1 = PreprintFactory(reviews_workflow='pre-moderation', is_published=False, creator=creator) + v1.run_submit(creator) + v1.run_reject(moderator, 'Rejecting v1') + v1.reload() + + assert v1.machine_state == ReviewStates.REJECTED.value + assert v1.versioned_guids.first().is_rejected is True + v1_created = v1.created + + v2 = PreprintFactory.create_version( + create_from=v1, + creator=creator, + final_machine_state='initial', + is_published=False, + set_doi=False + ) + v2.run_submit(creator) + v2.run_accept(moderator, 'Accepting v2') + v2.reload() + + assert v2.machine_state == ReviewStates.ACCEPTED.value + assert v2.is_published is True + v2_created = v2.created + + assert v2_created > v1_created + + assert v2.date_created_first_version == v1_created + assert v2.date_created_first_version != v2_created + + assert v1.date_created_first_version == v1_created + class TestEmberRedirect(OsfTestCase): From 19754f11ee4904af6414340a31a6e840d4e05a4f Mon Sep 17 00:00:00 2001 From: Vlad0n20 Date: Mon, 10 Nov 2025 18:48:02 +0200 Subject: [PATCH 2/2] add manage command to manual reindex preprints --- .../commands/reindex_versioned_preprints.py | 104 ++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 osf/management/commands/reindex_versioned_preprints.py diff --git a/osf/management/commands/reindex_versioned_preprints.py b/osf/management/commands/reindex_versioned_preprints.py new file mode 100644 index 00000000000..b48e53487e6 --- /dev/null +++ b/osf/management/commands/reindex_versioned_preprints.py @@ -0,0 +1,104 @@ +import logging +from django.core.management.base import BaseCommand + +from osf.models import Preprint + +logger = logging.getLogger(__name__) + + +def reindex_versioned_preprints(dry_run=False, batch_size=100, provider_id=None, guids=None): + if guids: + preprints = Preprint.objects.filter(guids___id__in=guids) + else: + preprints = Preprint.objects.filter(versioned_guids__isnull=False).distinct() + + if provider_id: + preprints = preprints.filter(provider___id=provider_id) + + preprints = preprints.filter(is_published=True) + + total_count = preprints.count() + logger.info(f'{"[DRY RUN] " if dry_run else ""}Found {total_count} versioned preprints to re-index') + + if total_count == 0: + logger.info('No preprints to re-index') + return + + processed = 0 + for preprint in preprints.iterator(chunk_size=batch_size): + processed += 1 + + if dry_run: + logger.info( + f'[DRY RUN] Would re-index preprint {preprint._id} ' + f'(version {preprint.versioned_guids.first().version if preprint.versioned_guids.exists() else "N/A"}, ' + f'date_created_first_version={preprint.date_created_first_version}) ' + f'[{processed}/{total_count}]' + ) + else: + try: + preprint.update_search() + if processed % 10 == 0: + logger.info( + f'Re-indexed preprint {preprint._id} ' + f'(version {preprint.versioned_guids.first().version if preprint.versioned_guids.exists() else "N/A"}) ' + f'[{processed}/{total_count}]' + ) + except Exception as e: + logger.error(f'Failed to re-index preprint {preprint._id}: {e}') + + logger.info( + f'{"[DRY RUN] " if dry_run else ""}Completed. ' + f'{"Would have re-indexed" if dry_run else "Re-indexed"} {processed} preprints' + ) + + +class Command(BaseCommand): + help = ( + 'Re-index all versioned preprints to Elasticsearch to ensure computed properties ' + 'like date_created_first_version are up to date.' + ) + + def add_arguments(self, parser): + super().add_arguments(parser) + parser.add_argument( + '--dry-run', + action='store_true', + dest='dry_run', + help='Preview what would be re-indexed without actually making changes', + ) + parser.add_argument( + '--batch-size', + type=int, + default=100, + help='Number of preprints to process in each batch (default: 100)', + ) + parser.add_argument( + '--provider', + type=str, + help='Optional provider ID to filter preprints', + ) + parser.add_argument( + '--guids', + type=str, + nargs='+', + help='Optional list of specific preprint GUIDs to re-index', + ) + + def handle(self, *args, **options): + dry_run = options.get('dry_run', False) + batch_size = options.get('batch_size', 100) + provider_id = options.get('provider') + guids = options.get('guids') + + if dry_run: + logger.info('=' * 60) + logger.info('DRY RUN MODE - No changes will be made') + logger.info('=' * 60) + + reindex_versioned_preprints( + dry_run=dry_run, + batch_size=batch_size, + provider_id=provider_id, + guids=guids + )