Deal with quota exhausted gracefully
parent
38ea0069af
commit
173e77a64a
|
|
@ -226,6 +226,7 @@ io_lock = threading.Lock()
|
||||||
pro_lock = threading.Lock()
|
pro_lock = threading.Lock()
|
||||||
pro_count = 0
|
pro_count = 0
|
||||||
flash_count = 0
|
flash_count = 0
|
||||||
|
pro_quota_exhausted = False
|
||||||
|
|
||||||
if overwrite:
|
if overwrite:
|
||||||
if output_path.exists():
|
if output_path.exists():
|
||||||
|
|
@ -243,10 +244,17 @@ else:
|
||||||
completed_set = set((str(f), l) for f, l in completed_tasks)
|
completed_set = set((str(f), l) for f, l in completed_tasks)
|
||||||
tasks_to_process = [t for t in tasks if (str(t[0]), t[1]) not in completed_set]
|
tasks_to_process = [t for t in tasks if (str(t[0]), t[1]) not in completed_set]
|
||||||
|
|
||||||
def call_gemini_with_retries(model_id, contents, config):
|
def call_gemini_with_retries(model_id, contents, config,
|
||||||
"""Handles requests to Gemini with a 1min and 5min retry mechanism."""
|
fallback_model_id=MODEL_ID_flash):
|
||||||
|
"""Handles requests to Gemini with a 1min and 5min retry mechanism, and quota fallback."""
|
||||||
|
global pro_quota_exhausted
|
||||||
delays = [60, 300]
|
delays = [60, 300]
|
||||||
|
|
||||||
for attempt in range(3):
|
for attempt in range(3):
|
||||||
|
# Switch to fallback immediately if quota was exhausted by another thread
|
||||||
|
if model_id == MODEL_ID_pro and pro_quota_exhausted and fallback_model_id:
|
||||||
|
model_id = fallback_model_id
|
||||||
|
|
||||||
try:
|
try:
|
||||||
full_response_text = ""
|
full_response_text = ""
|
||||||
for chunk in client.models.generate_content_stream(
|
for chunk in client.models.generate_content_stream(
|
||||||
|
|
@ -258,6 +266,16 @@ def call_gemini_with_retries(model_id, contents, config):
|
||||||
full_response_text += chunk.text
|
full_response_text += chunk.text
|
||||||
return full_response_text
|
return full_response_text
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
error_msg = str(e).lower()
|
||||||
|
is_quota_error = "429" in error_msg or "quota" in error_msg or "exhausted" in error_msg
|
||||||
|
|
||||||
|
# Immediately fallback to Flash without waiting if it's a Pro quota error
|
||||||
|
if is_quota_error and model_id == MODEL_ID_pro and fallback_model_id:
|
||||||
|
print(f"\tGemini Pro quota hit ({e}). Falling back to Flash permanently...")
|
||||||
|
model_id = fallback_model_id
|
||||||
|
pro_quota_exhausted = True
|
||||||
|
continue # Retry immediately with Flash
|
||||||
|
|
||||||
if attempt < 2:
|
if attempt < 2:
|
||||||
print(f"\tGemini API failure: {e}. Retrying in {delays[attempt]} seconds...")
|
print(f"\tGemini API failure: {e}. Retrying in {delays[attempt]} seconds...")
|
||||||
time.sleep(delays[attempt])
|
time.sleep(delays[attempt])
|
||||||
|
|
@ -481,7 +499,7 @@ Here is a list of all possible labels. You need to answer with a list one of the
|
||||||
return new_tasks
|
return new_tasks
|
||||||
|
|
||||||
def process_single_task(task_tuple):
|
def process_single_task(task_tuple):
|
||||||
global pro_count, flash_count
|
global pro_count, flash_count, pro_quota_exhausted
|
||||||
file_path = task_tuple[0]
|
file_path = task_tuple[0]
|
||||||
label = task_tuple[1]
|
label = task_tuple[1]
|
||||||
can_spawn_tasks = task_tuple[2] if len(task_tuple) > 2 else True
|
can_spawn_tasks = task_tuple[2] if len(task_tuple) > 2 else True
|
||||||
|
|
@ -500,7 +518,9 @@ def process_single_task(task_tuple):
|
||||||
|
|
||||||
if not use_flash:
|
if not use_flash:
|
||||||
with pro_lock:
|
with pro_lock:
|
||||||
if limit is None or pro_count < limit:
|
if pro_quota_exhausted:
|
||||||
|
use_flash = True
|
||||||
|
elif limit is None or pro_count < limit:
|
||||||
pro_count += 1
|
pro_count += 1
|
||||||
else:
|
else:
|
||||||
use_flash = True
|
use_flash = True
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue