minor modifs to grouping
parent
e994655e58
commit
b95e55d088
24
grouping.py
24
grouping.py
|
|
@ -11,26 +11,12 @@ from pdf2image import convert_from_path, pdfinfo_from_path
|
||||||
DPI = 200 # Good balance for readability and size
|
DPI = 200 # Good balance for readability and size
|
||||||
A4_HEIGHT_INCHES = 11.69
|
A4_HEIGHT_INCHES = 11.69
|
||||||
FULL_PAGE_PX = int(A4_HEIGHT_INCHES * DPI)
|
FULL_PAGE_PX = int(A4_HEIGHT_INCHES * DPI)
|
||||||
MAX_GROUP_HEIGHT = 2. * FULL_PAGE_PX
|
MAX_GROUP_HEIGHT = 1.75 * FULL_PAGE_PX
|
||||||
MAX_GROUP_COUNT = 15
|
MAX_GROUP_COUNT = 8
|
||||||
SEPARATOR_HEIGHT = 20
|
SEPARATOR_HEIGHT = 20
|
||||||
LABEL_HEIGHT = 50
|
LABEL_HEIGHT = 50
|
||||||
MAX_FILE_SIZE_BYTES = 2.5 * 1024 * 1024 # 2MB
|
MAX_FILE_SIZE_BYTES = 2.5 * 1024 * 1024 # 2MB
|
||||||
|
|
||||||
|
|
||||||
# def get_pdf_height(path):
|
|
||||||
# """Returns height in pixels at defined DPI without rendering."""
|
|
||||||
# try:
|
|
||||||
# info = pdfinfo_from_path(path)
|
|
||||||
# # info["Page size"] is usually "width height pts"
|
|
||||||
# # 1 pt = 1/72 inch
|
|
||||||
# # We assume single page PDFs as per prompt implication, or take the first page
|
|
||||||
# pts_height = float(info['Page size'].split(' ')[2]) if 'Page size' in info else 0
|
|
||||||
# return int((pts_height / 72.0) * DPI)
|
|
||||||
# except Exception as e:
|
|
||||||
# print(f"Error reading {path}: {e}")
|
|
||||||
# return 0
|
|
||||||
|
|
||||||
def get_pdf_height(path):
|
def get_pdf_height(path):
|
||||||
"""Returns total height of all pages in pixels at defined DPI."""
|
"""Returns total height of all pages in pixels at defined DPI."""
|
||||||
try:
|
try:
|
||||||
|
|
@ -103,7 +89,7 @@ def group_files(file_list):
|
||||||
overhead = (SEPARATOR_HEIGHT + 30) if group['items'] else 0
|
overhead = (SEPARATOR_HEIGHT + 30) if group['items'] else 0
|
||||||
|
|
||||||
# Check Height Constraint
|
# Check Height Constraint
|
||||||
if (group['current_height'] + height + overhead) <= MAX_GROUP_HEIGHT:
|
if group['current_height'] + height + overhead <= MAX_GROUP_HEIGHT:
|
||||||
group['items'].append(item)
|
group['items'].append(item)
|
||||||
group['current_height'] += height + overhead
|
group['current_height'] += height + overhead
|
||||||
placed = True
|
placed = True
|
||||||
|
|
@ -181,8 +167,6 @@ def create_jpg(identifier, group_index, group, root_dir):
|
||||||
try:
|
try:
|
||||||
# Convert pdf to image
|
# Convert pdf to image
|
||||||
imgs = convert_from_path(path, dpi=DPI)
|
imgs = convert_from_path(path, dpi=DPI)
|
||||||
# if imgs:
|
|
||||||
# images.append((dd, imgs[0])) # Assume 1 page per pdf !! ??
|
|
||||||
if imgs:
|
if imgs:
|
||||||
# Concatenate multi-page PDFs into one single image object
|
# Concatenate multi-page PDFs into one single image object
|
||||||
combined_img = stitch_pdf_pages(imgs)
|
combined_img = stitch_pdf_pages(imgs)
|
||||||
|
|
@ -256,7 +240,7 @@ def create_jpg(identifier, group_index, group, root_dir):
|
||||||
break
|
break
|
||||||
quality -= 5
|
quality -= 5
|
||||||
|
|
||||||
print(f"Saved {output_path} ({os.path.getsize(output_path)/1024/1024:.2f} MB)")
|
print(f"Saved {output_path} with {len(group)} ({os.path.getsize(output_path)/1024/1024:.2f} MB)")
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
if len(sys.argv) < 2:
|
if len(sys.argv) < 2:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue