Better group sizes (bigger, slightly more homogeneous)

master
Sébastien Miquel 2026-03-10 15:23:49 +01:00
parent 40ac6d58c2
commit 4584993106
1 changed files with 24 additions and 6 deletions

View File

@ -11,8 +11,8 @@ import annotating
import annotating_with_checks import annotating_with_checks
# Roughly 10 A4 pages at 100 DPI # Roughly 10 A4 pages at 100 DPI
MAX_HEIGHT_PX = 11690 # MAX_HEIGHT_PX = 11690
# MAX_HEIGHT_PX = 18000 MAX_HEIGHT_PX = 17000 # Can be increased by 10%.
# MAX_HEIGHT_PX = 16000 # MAX_HEIGHT_PX = 16000
def render_item(item): def render_item(item):
@ -147,24 +147,42 @@ def main():
batches = [] batches = []
current_batch = [] current_batch = []
current_h = 0 current_h = 0
for r in rendered: for r in rendered:
sid = r[0] sid = r[0]
img_h = r[2].height img_h = r[2].height
# Split if we exceed max height AND we are on a new student # Split if we exceed max height AND we are on a new student
if current_batch and current_h + img_h > MAX_HEIGHT_PX and sid != last_sid: if current_batch and current_h + img_h > MAX_HEIGHT_PX and sid != last_sid:
batches.append(current_batch) batches.append(current_batch)
current_batch = [] current_batch = []
current_h = 0 current_h = 0
current_batch.append(r) current_batch.append(r)
current_h += img_h current_h += img_h
last_sid = sid last_sid = sid
if current_batch: if current_batch:
batches.append(current_batch) batches.append(current_batch)
batches2 = []
current_batch2 = []
current_h2 = 0
last_sid2 = None
for r in rendered:
sid = r[0]
img_h = r[2].height
# Split if we exceed max height AND we are on a new student
if current_batch2 and current_h2 + img_h > 1.1 *MAX_HEIGHT_PX \
and sid != last_sid2:
batches2.append(current_batch2)
current_batch2 = []
current_h2 = 0
current_batch2.append(r)
current_h2 += img_h
last_sid2 = sid
if current_batch2:
batches2.append(current_batch2)
if len(batches2) < len(batches):
batches = batches2
for i, batch in enumerate(batches, 1): for i, batch in enumerate(batches, 1):
save_batch(batch, prefix, i, root_dir, args.overwrite) save_batch(batch, prefix, i, root_dir, args.overwrite)