[PTDT-4832] Prelabels for text subclasses under global text classes by lgluszek · Pull Request #2040

[PTDT-4832] Prelabels for text subclasses under global text classes by lgluszek · Pull Request #2040 · Labelbox/labelbox-python
import labelbox.types as lb_types
import json
import os
from uuid import uuid4
from labelbox.schema.tool_building.types import FeatureSchemaAttribute

client = lb.Client(
    api_key=os.environ["API_KEY"],
    endpoint=os.environ["ENDPOINT"],
)

# Step 1: Define the data row JSON
print("Defining data row...")
row_data = {
    "type": "application/vnd.labelbox.conversational.model-chat-evaluation",
    "version": 2,
    "actors": {
        "user1": {"role": "human", "metadata": {"name": "User"}},
        "model1": {"role": "model", "metadata": {"modelConfigName": "Assistant"}}
    },
    "messages": {
        "message1": {
            "actorId": "user1",
            "content": [
                {"type": "dataRowAttachment", "attachmentName": "image_0"},
                {"type": "text", "content": "**Question:**\n\nDescribe the visual elements and composition of this image."}
            ],
            "childMessageIds": ["message2"]
        },
        "message2": {
            "actorId": "model1",
            "content": [{"type": "text", "content": "The image displays an interior space, likely an art gallery, with a polished wooden floor and white walls. There are three large paintings prominently featured on the walls. The left painting has a predominantly turquoise color with white and black strokes, depicting abstract forms. The central painting is smaller and has a darker, muted color palette with indistinct forms. The right painting is similar in size to the left one and features abstract figures in blue and white tones.\n\nAbove the paintings, there are multiple spotlights affixed to the ceiling, providing illumination to the artworks. The ceiling is industrial in style, with visible ductwork and a series of recessed lights. The gallery space is well-lit, with the lighting creating a warm ambiance.\n\nThe text \"artasiapacific A Gift Like Kung Pao Chicken Xie Nanxing\" is overlaid on the image at the top, suggesting the title of the artwork or exhibition and the artist's name. The gallery's name, \"artasiapacific,\" is also visible, indicating the venue or the organizer of the exhibition. The overall composition of the image is balanced, with the artworks evenly spaced and the spotlights symmetrically arranged."}],
            "childMessageIds": []
        }
    },
    "rootMessageIds": ["message1"]
}

# Step 2: Create dataset and upload data row
print("Creating dataset...")
dataset = client.create_dataset(name="AI Automotive Assistant Tool Verification Dataset")
print(f"Created dataset: {dataset.name} (ID: {dataset.uid})")

global_key = f"automotive-tool-verification-{uuid4()}"
asset = {
    "global_key": global_key,
    "row_data": json.dumps(row_data),
}

print("Uploading data row...")
task = dataset.create_data_rows([asset])
task.wait_till_done()
print(f"Data row uploaded with global_key: {global_key}")

# Get the data row ID for prelabels
data_rows = list(dataset.data_rows())
data_row_id = data_rows[0].uid
print(f"Data row ID: {data_row_id}")

# Step 3: Create project
print("\nCreating offline MMC project...")
project = client.create_offline_model_evaluation_project(
   name="MMC Offline Prelabels Project 2",
    description="Offline MMC project with nested ontology and prelabels demo",
)
print(f"Created project: {project.name} (ID: {project.uid})")

# Step 4: Create ontology with nested text classifications
print("\nCreating ontology...")
ontology_builder = lb.OntologyBuilder(
    tools=[],
    classifications=[
        lb.Classification(
            class_type=lb.Classification.Type.TEXT,
            name="competitor_a_response_1",
            instructions="Competitor_A_Response_1",
            scope=lb.Classification.Scope.GLOBAL,
            required=True,
            options=[
                lb.Classification(
                    class_type=lb.Classification.Type.TEXT,
                    name="non_verifiable_rubric_criteria",
                    instructions="Non-Verifiable Rubric Criteria",
                    required=True,
                    attributes=[
                        FeatureSchemaAttribute(
                            attributeName="instructions",
                            attributeValue="For this competitor response please respond in a JSON/dictionary format that has the key-value pairs as criterion name (copied from above) and PASS/FAIL\n\ne.g.\n\n{\nwrites in childish tone: PASS,\noutput in haiku: FAIL,\nsummarises main points of conversation: PASS\n}"
                        )
                    ]
                )
            ]
        )
    ]
)

ontology = client.create_ontology(
    "AI Automotive Assistant Tool Verification Ontology",
    ontology_builder.asdict(),
    media_type=lb.MediaType.Conversational,
    ontology_kind=lb.OntologyKind.ModelEvaluation
)
print(f"Created ontology: {ontology.name} (ID: {ontology.uid})")

# Step 5: Connect ontology to project
print("\nConnecting ontology to project...")
project.connect_ontology(ontology)
print("Successfully connected ontology to project!")

# Step 6: Create batch to add data row to project
print("\nCreating batch...")
project.create_batch(
    name="prelabels-batch",
    global_keys=[global_key],
    priority=1,
)
print("Batch created successfully!")

# Step 7: Create prelabels
print("\nCreating prelabels...")

# Global prelabel for competitor_a_response_1 with nested non_verifiable_rubric_criteria
competitor_label = lb_types.ClassificationAnnotation(
    name="Competitor_A_Response_1",
    value=lb_types.Text(answer="Sample competitor response text", classifications=[
        lb_types.ClassificationAnnotation(
            name="Non-Verifiable Rubric Criteria",
            value=lb_types.Text(answer="{\nwrites in childish tone: PASS,\noutput in haiku: FAIL\n}")
        )
    ]),
    
)

labels = [
    lb_types.Label(
        data={"uid": data_row_id},
        annotations=[competitor_label]
    )
]

upload_job = lb.MALPredictionImport.create_from_objects(
    client=client,
    project_id=str(project.uid),
    name="import-mmc-labels-" + str(uuid4()),
    predictions=labels,
)
upload_job.wait_till_done()

print(f"\nPrelabel upload complete!")
print(f"Errors: {upload_job.errors}")

assert len(upload_job.errors) == 0, f"Upload had errors: {upload_job.errors}"

print(f"Statuses: {upload_job.statuses}")

# Print summary
print("\n" + "=" * 50)
print("SUMMARY")
print("=" * 50)
print(f"Dataset ID: {dataset.uid}")
print(f"Data Row ID: {data_row_id}")
print(f"Project ID: {project.uid}")
print(f"Ontology ID: {ontology.uid}")
Please delete options that are not relevant.