From 173e77a64a3154348385330e05acb0b56d7052df Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Miquel?= <sebastien.miquel@posteo.eu>
Date: Tue, 17 Mar 2026 14:34:55 +0100
Subject: [PATCH] Deal with quota exhausted gracefully

---
 correction.py | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/correction.py b/correction.py
index cf374a7..75119e8 100644
--- a/correction.py
+++ b/correction.py
@@ -226,6 +226,7 @@ io_lock = threading.Lock()
 pro_lock = threading.Lock()
 pro_count = 0
 flash_count = 0
+pro_quota_exhausted = False
 
 if overwrite:
     if output_path.exists():
@@ -243,10 +244,17 @@ else:
 completed_set = set((str(f), l) for f, l in completed_tasks)
 tasks_to_process = [t for t in tasks if (str(t[0]), t[1]) not in completed_set]
 
-def call_gemini_with_retries(model_id, contents, config):
-    """Handles requests to Gemini with a 1min and 5min retry mechanism."""
+def call_gemini_with_retries(model_id, contents, config,
+                             fallback_model_id=MODEL_ID_flash):
+    """Handles requests to Gemini with a 1min and 5min retry mechanism, and quota fallback."""
+    global pro_quota_exhausted
     delays = [60, 300]
+
     for attempt in range(3):
+        # Switch to fallback immediately if quota was exhausted by another thread
+        if model_id == MODEL_ID_pro and pro_quota_exhausted and fallback_model_id:
+            model_id = fallback_model_id
+
         try:
             full_response_text = ""
             for chunk in client.models.generate_content_stream(
@@ -258,6 +266,16 @@ def call_gemini_with_retries(model_id, contents, config):
                     full_response_text += chunk.text
             return full_response_text
         except Exception as e:
+            error_msg = str(e).lower()
+            is_quota_error = "429" in error_msg or "quota" in error_msg or "exhausted" in error_msg
+
+            # Immediately fallback to Flash without waiting if it's a Pro quota error
+            if is_quota_error and model_id == MODEL_ID_pro and fallback_model_id:
+                print(f"\tGemini Pro quota hit ({e}). Falling back to Flash permanently...")
+                model_id = fallback_model_id
+                pro_quota_exhausted = True
+                continue # Retry immediately with Flash
+
             if attempt < 2:
                 print(f"\tGemini API failure: {e}. Retrying in {delays[attempt]} seconds...")
                 time.sleep(delays[attempt])
@@ -481,7 +499,7 @@ Here is a list of all possible labels. You need to answer with a list one of the
     return new_tasks
 
 def process_single_task(task_tuple):
-    global pro_count, flash_count
+    global pro_count, flash_count, pro_quota_exhausted
     file_path = task_tuple[0]
     label = task_tuple[1]
     can_spawn_tasks = task_tuple[2] if len(task_tuple) > 2 else True
@@ -500,7 +518,9 @@ def process_single_task(task_tuple):
 
     if not use_flash:
         with pro_lock:
-            if limit is None or pro_count < limit:
+            if pro_quota_exhausted:
+                use_flash = True
+            elif limit is None or pro_count < limit:
                 pro_count += 1
             else:
                 use_flash = True