import os
import json
import re
import sys
import shutil
from pathlib import Path
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor
from PIL import Image, ImageDraw, ImageFont
from pdf2image import convert_from_path, pdfinfo_from_path

# Configuration
DPI = 200  # Good balance for readability and size
A4_HEIGHT_INCHES = 11.69
FULL_PAGE_PX = int(A4_HEIGHT_INCHES * DPI)
MAX_GROUP_HEIGHT = 1.5 * FULL_PAGE_PX
MAX_GROUP_COUNT = 8
SEPARATOR_HEIGHT = 20
LABEL_HEIGHT = 50
MAX_FILE_SIZE_BYTES = 2.5 * 1024 * 1024  # 2MB

def get_pdf_height(path):
    """Returns total height of all pages in pixels at defined DPI."""
    try:
        info = pdfinfo_from_path(path)
        # Get page count (default to 1)
        num_pages = int(info["Pages"]) if "Pages" in info else 1

        # 1 pt = 1/72 inch
        pts_height = float(info['Page size'].split(' ')[2]) if 'Page size' in info else 0

        # Height of one page in pixels
        single_page_px = int((pts_height / 72.0) * DPI)

        # Return total height
        return single_page_px * num_pages
    except Exception as e:
        print(f"Error reading {path}: {e}")
        return 0

def collect_files(root_dir):
    """
    Scans Dir/Copiedd/identifier.pdf
    Returns dict: {identifier: [(dd, path, height), ...]}
    """
    data = defaultdict(list)

    # Regex to match 'Copie' followed by 2 digits
    folder_pattern = re.compile(r'Copie(\d{2})')

    for root, dirs, files in os.walk(root_dir):
        folder_name = os.path.basename(root)
        match = folder_pattern.match(folder_name)

        if match:
            dd = match.group(1)
            for file in files:
                if file.lower().endswith('.pdf'):
                    identifier = os.path.splitext(file)[0]
                    full_path = os.path.join(root, file)

                    # Calculate height (c)
                    height = get_pdf_height(full_path)

                    # Store triple (a, b, c)
                    data[identifier].append((dd, full_path, height))
    return data

def group_files(file_list):
    """
    Groups files using First Fit Decreasing algorithm to minimize group count.
    """
    # 1. Sort by height DESCENDING. Large items are hardest to fit, handle them first.
    #    (Remove this sort if you must strictly preserve input order logic)
    sorted_files = sorted(file_list, key=lambda x: x[2], reverse=True)

    # Each group is a dict: {'items': [], 'current_height': 0}
    groups = []

    for item in sorted_files:
        dd, path, height = item
        placed = False

        # 2. Try to fit item into an existing group (First Fit)
        for group in groups:
            # Check Count Constraint
            if len(group['items']) >= MAX_GROUP_COUNT:
                continue

            # Calculate Overhead (only if group is not empty)
            overhead = (SEPARATOR_HEIGHT + 30) if group['items'] else 0

            # Check Height Constraint
            if group['current_height'] + height + overhead <= MAX_GROUP_HEIGHT:
                group['items'].append(item)
                group['current_height'] += height + overhead
                placed = True
                break

        # 3. If it doesn't fit anywhere, create a new group
        if not placed:
            groups.append({
                'items': [item],
                'current_height': height
            })

    # Return list of lists (strip the metadata)
    return [g['items'] for g in groups]

def stitch_pdf_pages(images_list):
    """Vertically concatenates a list of PIL images with no separator."""
    if not images_list:
        return None
    if len(images_list) == 1:
        return images_list[0]

    max_width = max(img.width for img in images_list)
    total_height = sum(img.height for img in images_list)

    combined = Image.new('RGB', (max_width, total_height), 'white')

    y_offset = 0
    for img in images_list:
        combined.paste(img, (0, y_offset))
        y_offset += img.height

    return combined

def create_jpg(identifier, group_index, group, root_dir):
    images = []
    metadata = []  # To store (id, h_min, h_max)

    # Render PDFs to images
    for dd, path, _ in group:
        try:
            # Convert pdf to image
            imgs = convert_from_path(path, dpi=DPI)
            if imgs:
                # Concatenate multi-page PDFs into one single image object
                combined_img = stitch_pdf_pages(imgs)
                if combined_img:
                    images.append((dd, combined_img))
        except Exception as e:
            print(f"Failed to convert {path}: {e}")

    if not images:
        return

    # Calculate total canvas size
    total_width = max(img.width for _, img in images)
    total_height = sum(img.height for _, img in images) + ((len(images) - 1) * SEPARATOR_HEIGHT)

    # Add space for text (approx 40px per label)
    total_height += len(images) * LABEL_HEIGHT

    canvas = Image.new('RGB', (total_width, total_height), 'white')
    draw = ImageDraw.Draw(canvas)

    # Try loading a font, fallback to default
    try:
        font = ImageFont.truetype("DejaVuSans.ttf", 40)
    except IOError:
        print("font not found")
        font = ImageFont.load_default()

    y_offset = 0

    for i, (dd, img) in enumerate(images):
        # Draw separator if not first image
        if i > 0:
            draw.rectangle([0, y_offset, total_width, y_offset + SEPARATOR_HEIGHT], fill='black')
            y_offset += SEPARATOR_HEIGHT

        # Draw Text (dd)
        text = f"ID: {dd}"
        draw.text((10, y_offset + 5), text, fill='black', font=font)
        y_offset += LABEL_HEIGHT # Space for text

        # Record Image Coordinates
        h_min = y_offset
        h_max = y_offset + img.height
        # identifier should be a label
        metadata.append((dd, h_min, h_max, img.width/total_width, identifier))

        # Draw Image
        x_pos = 0
        canvas.paste(img, (x_pos, y_offset))
        y_offset += img.height

    target_folder = os.path.join(root_dir, identifier)
    os.makedirs(target_folder, exist_ok=True)

    # Save JSON metadata
    json_filename = f"Group_{group_index+1}.json"
    json_path = os.path.join(target_folder, json_filename)
    with open(json_path, 'w') as f:
        json.dump(metadata, f)

    # Save with size constraints
    output_filename = f"Group_{group_index+1}.jpg"
    output_path = os.path.join(target_folder, output_filename)

    quality = 90
    while quality > 10:
        canvas.save(output_path, "JPEG", quality=quality, optimize=True)
        if os.path.getsize(output_path) <= MAX_FILE_SIZE_BYTES:
            if quality < 90:
                print("quality : ", quality)
            break
        quality -= 5

    print(f"Saved {output_path} with {len(group)} ({os.path.getsize(output_path)/1024/1024:.2f} MB)")

from utils import natural_key


def process_identifier(identifier, files_info, output_dir):
    # Clear output directory if it exists
    target_folder = os.path.join(output_dir, identifier)
    if os.path.exists(target_folder):
        shutil.rmtree(target_folder)
    os.makedirs(target_folder, exist_ok=True)

    # files_info is list of (dd, path, height)
    file_groups = group_files(files_info)

    for idx, group in enumerate(file_groups):
        create_jpg(identifier, idx, group, output_dir)

def main():
    if len(sys.argv) < 2:
        print("Usage: python app.py <Path_to_Dir>")
        sys.exit(1)

    root_dir = Path(sys.argv[1])

    copies_dir = root_dir / "Copies"
    par_label_dir = root_dir / "Par label"

    print("Scanning files...")
    data = collect_files(copies_dir)

    print(f"Found {len(data)} identifiers. Processing...")

    # Sort identifiers naturally
    sorted_identifiers = sorted(data.keys(), key=natural_key)

    # Process using 8 threads
    with ThreadPoolExecutor(max_workers=8) as executor:
        for identifier in sorted_identifiers:
            executor.submit(process_identifier, identifier, data[identifier],
                            par_label_dir)

    print("Done.")

if __name__ == "__main__":
    main()