@@ -717,11 +717,12 @@ def calculate_retry_delay(attempt, headers):
717717 return delay + random .uniform (0 , delay * 0.1 )
718718
719719
720- def retrieve_data (args , template , query_args = None , paginated = True ):
720+ def retrieve_data (args , template , query_args = None , paginated = True , lazy = False ):
721721 """
722722 Fetch the data from GitHub API.
723723
724- Handle both single requests and pagination with yield of individual dicts.
724+ Handle both single requests and pagination. Returns a list by default, or
725+ a generator when lazy=True so callers can stop before fetching every page.
725726 Handles throttling, retries, read errors, and DMCA takedowns.
726727 """
727728 query_args = query_args or {}
@@ -851,6 +852,9 @@ def _extract_legal_url(response_body_bytes):
851852 ):
852853 break # No more data
853854
855+ if lazy :
856+ return fetch_all ()
857+
854858 return list (fetch_all ())
855859
856860
@@ -2229,7 +2233,7 @@ def retrieve_discussion_summaries(args, repository, since=None):
22292233 if updated_at and (newest_seen is None or updated_at > newest_seen ):
22302234 newest_seen = updated_at
22312235
2232- if since and updated_at and updated_at < since :
2236+ if since and updated_at and updated_at <= since :
22332237 stop = True
22342238 break
22352239
@@ -2650,26 +2654,28 @@ def track_newest_pull_update(pull):
26502654 newest_pull_update = updated_at
26512655
26522656 def pull_is_due_for_repository_checkpoint (pull ):
2653- return not repository_since or pull ["updated_at" ] >= repository_since
2657+ return not repository_since or pull ["updated_at" ] > repository_since
26542658
26552659 if not args .include_pull_details :
26562660 pull_states = ["open" , "closed" ]
26572661 for pull_state in pull_states :
26582662 query_args ["state" ] = pull_state
2659- _pulls = retrieve_data (args , _pulls_template , query_args = query_args )
2660- for pull in _pulls :
2663+ for pull in retrieve_data (
2664+ args , _pulls_template , query_args = query_args , lazy = True
2665+ ):
26612666 track_newest_pull_update (pull )
2662- if pulls_since and pull ["updated_at" ] < pulls_since :
2667+ if pulls_since and pull ["updated_at" ] <= pulls_since :
26632668 break
2664- if not pulls_since or pull ["updated_at" ] >= pulls_since :
2669+ if not pulls_since or pull ["updated_at" ] > pulls_since :
26652670 pulls [pull ["number" ]] = pull
26662671 else :
2667- _pulls = retrieve_data (args , _pulls_template , query_args = query_args )
2668- for pull in _pulls :
2672+ for pull in retrieve_data (
2673+ args , _pulls_template , query_args = query_args , lazy = True
2674+ ):
26692675 track_newest_pull_update (pull )
2670- if pulls_since and pull ["updated_at" ] < pulls_since :
2676+ if pulls_since and pull ["updated_at" ] <= pulls_since :
26712677 break
2672- if not pulls_since or pull ["updated_at" ] >= pulls_since :
2678+ if not pulls_since or pull ["updated_at" ] > pulls_since :
26732679 if pull_is_due_for_repository_checkpoint (pull ):
26742680 pulls [pull ["number" ]] = retrieve_data (
26752681 args ,
@@ -2913,7 +2919,12 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F
29132919 written_count += 1
29142920
29152921 if include_assets and not skip_assets :
2916- assets = retrieve_data (args , release ["assets_url" ])
2922+ # The releases list API already includes release asset metadata. Use
2923+ # it to avoid an extra /releases/{id}/assets request per release.
2924+ # Keep a fallback for older/enterprise responses that might omit it.
2925+ assets = release .get ("assets" )
2926+ if assets is None :
2927+ assets = retrieve_data (args , release ["assets_url" ])
29172928 if len (assets ) > 0 :
29182929 # give release asset files somewhere to live & download them (not including source archives)
29192930 release_assets_cwd = os .path .join (release_cwd , release_name_safe )
0 commit comments