From 173e77a64a3154348385330e05acb0b56d7052df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Miquel?= Date: Tue, 17 Mar 2026 14:34:55 +0100 Subject: [PATCH] Deal with quota exhausted gracefully --- correction.py | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/correction.py b/correction.py index cf374a7..75119e8 100644 --- a/correction.py +++ b/correction.py @@ -226,6 +226,7 @@ io_lock = threading.Lock() pro_lock = threading.Lock() pro_count = 0 flash_count = 0 +pro_quota_exhausted = False if overwrite: if output_path.exists(): @@ -243,10 +244,17 @@ else: completed_set = set((str(f), l) for f, l in completed_tasks) tasks_to_process = [t for t in tasks if (str(t[0]), t[1]) not in completed_set] -def call_gemini_with_retries(model_id, contents, config): - """Handles requests to Gemini with a 1min and 5min retry mechanism.""" +def call_gemini_with_retries(model_id, contents, config, + fallback_model_id=MODEL_ID_flash): + """Handles requests to Gemini with a 1min and 5min retry mechanism, and quota fallback.""" + global pro_quota_exhausted delays = [60, 300] + for attempt in range(3): + # Switch to fallback immediately if quota was exhausted by another thread + if model_id == MODEL_ID_pro and pro_quota_exhausted and fallback_model_id: + model_id = fallback_model_id + try: full_response_text = "" for chunk in client.models.generate_content_stream( @@ -258,6 +266,16 @@ def call_gemini_with_retries(model_id, contents, config): full_response_text += chunk.text return full_response_text except Exception as e: + error_msg = str(e).lower() + is_quota_error = "429" in error_msg or "quota" in error_msg or "exhausted" in error_msg + + # Immediately fallback to Flash without waiting if it's a Pro quota error + if is_quota_error and model_id == MODEL_ID_pro and fallback_model_id: + print(f"\tGemini Pro quota hit ({e}). Falling back to Flash permanently...") + model_id = fallback_model_id + pro_quota_exhausted = True + continue # Retry immediately with Flash + if attempt < 2: print(f"\tGemini API failure: {e}. Retrying in {delays[attempt]} seconds...") time.sleep(delays[attempt]) @@ -481,7 +499,7 @@ Here is a list of all possible labels. You need to answer with a list one of the return new_tasks def process_single_task(task_tuple): - global pro_count, flash_count + global pro_count, flash_count, pro_quota_exhausted file_path = task_tuple[0] label = task_tuple[1] can_spawn_tasks = task_tuple[2] if len(task_tuple) > 2 else True @@ -500,7 +518,9 @@ def process_single_task(task_tuple): if not use_flash: with pro_lock: - if limit is None or pro_count < limit: + if pro_quota_exhausted: + use_flash = True + elif limit is None or pro_count < limit: pro_count += 1 else: use_flash = True