import json
from datetime import datetime, timezone
from pathlib import Path
import uuid

def convert_to_label_studio_format(dls_jsonl_path, ls_json_path, input_folder, output_images_folder, ls_document_root, project_id=1):
    """
    Converts DLS (Data Labeling Service) JSONL annotations to Label Studio compatible JSON format.

    This function reads annotations from a DLS JSONL file, processes them to align with 
    Label Studio's expected import format, and saves the converted annotations to a specified path. 
    
    Args:
        dls_jsonl_path (str): Path to the input DLS JSONL annotation file.
        ls_json_path (str): Path where the converted Label Studio JSON file will be saved.
        input_folder (str): Root directory containing the input documents (e.g., PDFs, images).
        output_images_folder (str): Directory where the processed image files are or will be stored.
        ls_document_root (str): The document root prefix expected by Label Studio for image references.
        project_id (int, optional): ID of the Label Studio project. Defaults to 1.

    Example:
        convert_dls_to_ls_json(
            "annotations.dls.jsonl",
            "annotations.ls.json",
            "input_pdfs",
            "out_input_pdfs",
            "/datasets",
            project_id=2
        )
    """
    # function implementation here

    input_folder = Path(input_folder).resolve()
    output_images_folder = Path(output_images_folder).resolve()
    ls_document_root = Path(ls_document_root).resolve()

    input_folder_name = input_folder.relative_to(ls_document_root)
    output_images_folder_name = output_images_folder.relative_to(ls_document_root)

    tasks = []
    task_id_counter = 1
    annotation_id_counter = 1

    with open(dls_jsonl_path, 'r') as infile:
        records = [json.loads(line) for line in infile]

    for record in records:
        doc_path = record['sourceDetails']['path']
        doc_name = Path(doc_path).stem
        doc_ext = Path(doc_path).suffix.lower()

        # Prepare document and pages paths
        if doc_ext == '.pdf':
            document_url = f"/data/local-files/?d={input_folder_name}/{doc_path}"
            pages = [f"/data/local-files/?d={output_images_folder_name}/{doc_name}/page_{entity['documentEntityMetadata']['pageNumber']}.png"
                     for entity in record['annotations'][0]['entities']]
        else:
            document_url = f"/data/local-files/?d={input_folder_name}/{doc_path}"
            pages = [document_url]

        # Build annotation results
        results = []
        for idx, entity in enumerate(record['annotations'][0]['entities']):
            label_name = entity['labels'][0]['label_name'] if entity['labels'] else 'unknown'
            result = {
                "value": {
                    "choices": [label_name]
                },
                "id": uuid.uuid4().hex[:10],
                "from_name": "choices",
                "to_name": "pdf",
                "type": "choices",
                "origin": "manual",
                "item_index": idx
            }
            results.append(result)

        current_time = datetime.now(timezone.utc).isoformat()

        task = {
            "id": task_id_counter,
            "annotations": [
                {
                    "id": annotation_id_counter,
                    "completed_by": 1,
                    "result": results,
                    "was_cancelled": False,
                    "ground_truth": False,
                    "created_at": current_time,
                    "updated_at": current_time,
                    "draft_created_at": current_time,
                    "lead_time": 10.0,
                    "prediction": {},
                    "result_count": len(results),
                    "unique_id": str(uuid.uuid4()),
                    "import_id": None,
                    "last_action": None,
                    "bulk_created": False,
                    "task": task_id_counter,
                    "project": project_id,
                    "updated_by": 1,
                    "parent_prediction": None,
                    "parent_annotation": None,
                    "last_created_by": None
                }
            ],
            "file_upload": "generated-file.json",
            "drafts": [],
            "predictions": [],
            "data": {
                "document": document_url,
                "pages": pages,
                "ls_document_root": str(ls_document_root)
            },
            "meta": {},
            "created_at": current_time,
            "updated_at": current_time,
            "inner_id": 1,
            "total_annotations": 1,
            "cancelled_annotations": 0,
            "total_predictions": 0,
            "comment_count": 0,
            "unresolved_comment_count": 0,
            "last_comment_updated_at": None,
            "project": project_id,
            "updated_by": 1,
            "comment_authors": []
        }

        tasks.append(task)
        task_id_counter += 1
        annotation_id_counter += 1

    with open(ls_json_path, 'w') as outfile:
        json.dump(tasks, outfile, indent=2)

    print(f"Conversion completed. Output written to {ls_json_path}")

## Test Example
## Replace the following paths and filenames with your actual files and directories
ls_document_root=r"/home/raraushk/LS_integration/label_studio/datasets"
input_folder=r"/home/raraushk/LS_integration/label_studio/datasets/input_pdfs"
output_images_folder=r"/home/raraushk/LS_integration/label_studio/datasets/output_images"
dls_jsonl_path=r"/home/raraushk/LS_integration/label_studio/annotation_records.jsonl"
ls_json_path=r"ls_annotations.json"
convert_to_label_studio_format(dls_jsonl_path, ls_json_path, input_folder, output_images_folder, ls_document_root)