Capture and annotate images for training

Prerequisites

A machine connected to Viam
Add a new machine on Viam. On the machine’s page, follow the setup instructions to install viam-server on the computer you’re using for your project. Wait until your machine has successfully connected to Viam.
A camera, connected to your machine, to capture images
Follow the guide to configure a webcam or similar camera component.

Capture images

Capture individual images

You can add images to a dataset directly from a camera or vision component feed in the machine’s CONTROL or CONFIGURATION tabs.

To add an image directly to a dataset from a visual feed, complete the following steps:

  1. Open the TEST panel of any camera or vision service component to view a feed of images from the camera.

  2. Click the button marked with the camera icon to save the currently displayed image to a dataset:

    A button marked with the outline of a camera, emphasized in red
  3. Select an existing dataset.

  4. Click Add to add the image to the selected dataset.

  5. When you see a success notification that reads “Saved image to dataset”, you have successfully added the image to the dataset.

To view images added to your dataset, go to the DATA page, open the DATASETS tab, then select your dataset.

To capture an image and add it to your DATA page, fetch an image from your camera through your machine. Pass that image and an appropriate set of metadata to data_client.binary_data_capture_upload:

import asyncio

from datetime import datetime
from viam.rpc.dial import DialOptions, Credentials
from viam.app.viam_client import ViamClient
from viam.components.camera import Camera
from viam.robot.client import RobotClient

# Configuration constants – replace with your actual values
API_KEY = ""  # API key, find or create in your organization settings
API_KEY_ID = ""  # API key ID, find or create in your organization settings
MACHINE_ADDRESS = ""  # the address of the machine you want to capture images from
CAMERA_NAME = ""  # the name of the camera you want to capture images from
PART_ID = ""  # the part ID of the binary data you want to add to the dataset

async def connect() -> ViamClient:
    """Establish a connection to the Viam client using API credentials."""
    dial_options = DialOptions(
        credentials=Credentials(
            type="api-key",
            payload=API_KEY,
        ),
        auth_entity=API_KEY_ID
    )
    return await ViamClient.create_from_dial_options(dial_options)

async def connect_machine() -> RobotClient:
    """Establish a connection to the robot using the robot address."""
    machine_opts = RobotClient.Options.with_api_key(
        api_key=API_KEY,
        api_key_id=API_KEY_ID
    )
    return await RobotClient.at_address(MACHINE_ADDRESS, machine_opts)

async def main() -> int:
    viam_client = await connect()
    data_client = viam_client.data_client
    machine_client = await connect_machine()

    camera = Camera.from_robot(machine_client, CAMERA_NAME)

    # Capture image
    image_frame = await camera.get_image()

    # Upload image
    file_id = await data_client.binary_data_capture_upload(
        part_id=PART_ID,
        component_type="camera",
        component_name=CAMERA_NAME,
        method_name="GetImage",
        data_request_times=[datetime.utcnow(), datetime.utcnow()],
        file_extension=".jpg",
        binary_data=image_frame.data
    )
    print(f"Uploaded image: {file_id}")

    viam_client.close()
    await machine_client.close()

    return 0

if __name__ == "__main__":
    asyncio.run(main())

To capture an image and add it to your DATA page, fetch an image from your camera through your machine. Pass that image and an appropriate set of metadata to DataClient.BinaryDataCaptureUpload:

package main

import (
	"context"
	"fmt"
	"time"

	"go.viam.com/rdk/app"
	"go.viam.com/rdk/components/camera"
	"go.viam.com/rdk/logging"
	"go.viam.com/rdk/robot/client"
	"go.viam.com/rdk/utils"
	"go.viam.com/utils/rpc"
)


func main() {
	apiKey := ""
	apiKeyID := ""
	machineAddress := ""
	cameraName := ""
	partID := ""


	logger := logging.NewDebugLogger("client")
	ctx := context.Background()

	viamClient, err := app.CreateViamClientWithAPIKey(
		ctx, app.Options{}, apiKey, apiKeyID, logger)
	if err != nil {
		logger.Fatal(err)
	}
	defer viamClient.Close()

	machine, err := client.New(
		context.Background(),
		machineAddress,
		logger,
		client.WithDialOptions(rpc.WithEntityCredentials(
			apiKeyID,
			rpc.Credentials{
				Type:    rpc.CredentialsTypeAPIKey,
				Payload: apiKey,
			})),
	)
	if err != nil {
		logger.Fatal(err)
	}

	// Capture image from camera
	cam, err := camera.FromRobot(machine, cameraName)
	if err != nil {
		logger.Fatal(err)
	}

	image, _, err := cam.Image(ctx, utils.MimeTypeJPEG, nil)
	if err != nil {
		logger.Fatal(err)
	}

	// Upload image to Viam
	dataClient := viamClient.DataClient()

	binaryDataID, err := dataClient.BinaryDataCaptureUpload(
		ctx,
		image,
		partID,
		"camera",
		cameraName,
		"GetImage",
		".jpg",
		&app.BinaryDataCaptureUploadOptions{
			DataRequestTimes: &[2]time.Time{time.Now(), time.Now()},
		},
	)
	if err != nil {
		logger.Fatal(err)
	}

	fmt.Printf("Uploaded image: %s\n", binaryDataID)

}

To capture an image and add it to your DATA page, fetch an image from your camera through your machine. Pass that image and an appropriate set of metadata to dataClient.binaryDataCaptureUpload:

import { createViamClient, createRobotClient, RobotClient, CameraClient } from "@viamrobotics/sdk";

// Configuration constants – replace with your actual values
let API_KEY = "";  // API key, find or create in your organization settings
let API_KEY_ID = "";  // API key ID, find or create in your organization settings
let MACHINE_ADDRESS = "";  // the address of the machine you want to capture images from
let CAMERA_NAME = "";  // the name of the camera you want to capture images from
let PART_ID = "";  // the part ID of the binary data you want to add to the dataset


async function connect(): Promise<any> {
    // Establish a connection to the Viam client using API credentials
    return await createViamClient({
        credentials: {
            type: "api-key",
            authEntity: API_KEY_ID,
            payload: API_KEY,
        },
    });
}

async function connectMachine(): Promise<RobotClient> {
    // Establish a connection to the robot using the robot address
    return await createRobotClient({
        host: MACHINE_ADDRESS,
        credentials: {
          type: 'api-key',
          payload: API_KEY,
          authEntity: API_KEY_ID,
        },
        signalingAddress: 'https://app.viam.com:443',
      });
}

async function main(): Promise<number> {
    const viamClient = await connect();
    const dataClient = viamClient.dataClient;
    const machine = await connectMachine();
    const camera = new CameraClient(machine, CAMERA_NAME);

    // Capture image
    const imageFrame = await camera.getImage();

    // Upload image
    const fileId = await dataClient.binaryDataCaptureUpload(
        imageFrame,
        PART_ID,
        "camera",
        CAMERA_NAME,
        "GetImage",
        ".jpg",
        [new Date(), new Date()]
    );
    console.log(`Uploaded image: ${fileId}`);

    return 0;
}


main().catch((error) => {
    console.error("Script failed:", error);
    process.exit(1);
});

Once you’ve captured enough images for training, you must annotate the images before you can use them to train a model.

Capture images over time

To capture a large number of images for training an ML model, use the data management service to capture and sync image data from your camera.

When you sync with data management, Viam stores the images saved by capture and sync on the DATA page, but does not add the images to a dataset. To use your captured images for training, add the images to a dataset and annotate them, so you can use them to train a model.

You can either manually add annotations through the Viam web UI, or add annotations with an existing ML model.

Annotate images

You must annotate images in order to train an ML model on them. Viam supports two ways to annotate an image:

Add tags to an image

Use tags to add metadata about an entire image, for example if the quality of a manufacturing output is good or bad.

The DATA page provides an interface for annotating images.

To tag an image:

  1. Click on an image, then click the + next to the Tags option.

  2. Add one or more tags to your image.

Repeat these steps for all images in the dataset.

Use an ML model to generate tags for an image or set of images. Then, pass the tags and image IDs to data_client.add_tags_to_binary_data_by_ids:

import asyncio

from viam.rpc.dial import DialOptions, Credentials
from viam.app.viam_client import ViamClient
from viam.media.video import ViamImage
from viam.robot.client import RobotClient
from viam.services.vision import VisionClient
from grpclib.exceptions import GRPCError

# Configuration constants – replace with your actual values
API_KEY = ""  # API key, find or create in your organization settings
API_KEY_ID = ""  # API key ID, find or create in your organization settings
MACHINE_ADDRESS = ""  # the address of the machine you want to capture images from
CLASSIFIER_NAME = ""  # the name of the classifier you want to use
BINARY_DATA_ID = ""  # the ID of the image you want to label


async def connect() -> ViamClient:
    """Establish a connection to the Viam client using API credentials."""
    dial_options = DialOptions(
        credentials=Credentials(
            type="api-key",
            payload=API_KEY,
        ),
        auth_entity=API_KEY_ID
    )
    return await ViamClient.create_from_dial_options(dial_options)

async def connect_machine() -> RobotClient:
    """Establish a connection to the robot using the robot address."""
    machine_opts = RobotClient.Options.with_api_key(
        api_key=API_KEY,
        api_key_id=API_KEY_ID
    )
    return await RobotClient.at_address(MACHINE_ADDRESS, machine_opts)

async def main() -> int:
    viam_client = await connect()
    data_client = viam_client.data_client
    machine = await connect_machine()

    classifier = VisionClient.from_robot(machine, CLASSIFIER_NAME)

    # Get image from data in Viam
    data = await data_client.binary_data_by_ids([BINARY_DATA_ID])
    binary_data = data[0]

    # Convert binary data to ViamImage
    image = ViamImage(binary_data.binary, binary_data.metadata.capture_metadata.mime_type)

    # Get tags using the image
    tags = await classifier.get_classifications(image=image, image_format=binary_data.metadata.capture_metadata.mime_type, count=2)

    if not len(tags):
        print("No tags found")
        return 1
    else:
        for tag in tags:
            await data_client.add_tags_to_binary_data_by_ids(
                tags=[tag.class_name],
                binary_ids=[BINARY_DATA_ID]
            )
            print(f"Added tag to image: {tag}")

    viam_client.close()
    await machine.close()

    return 0

if __name__ == "__main__":
    asyncio.run(main())

Use an ML model to generate tags for an image or set of images. Then, pass the tags and image IDs to DataClient.AddTagsToBinaryDataByIDs:

package main

import (
	"context"
	"fmt"
	"image/jpeg"
	"bytes"

	"go.viam.com/rdk/app"
	"go.viam.com/rdk/logging"
	"go.viam.com/rdk/robot/client"
	"go.viam.com/rdk/services/vision"
	"go.viam.com/utils/rpc"
)


func main() {
	apiKey := ""
	apiKeyID := ""
	machineAddress := ""
	classifierName := ""
	binaryDataID := ""


	logger := logging.NewDebugLogger("client")
	ctx := context.Background()

	viamClient, err := app.CreateViamClientWithAPIKey(
		ctx, app.Options{}, apiKey, apiKeyID, logger)
	if err != nil {
		logger.Fatal(err)
	}
	defer viamClient.Close()

	machine, err := client.New(
		context.Background(),
		machineAddress,
		logger,
		client.WithDialOptions(rpc.WithEntityCredentials(
			apiKeyID,
			rpc.Credentials{
				Type:    rpc.CredentialsTypeAPIKey,
				Payload: apiKey,
			})),
	)

	if err != nil {
		logger.Fatal(err)
	}

	dataClient := viamClient.DataClient()


	data, err := dataClient.BinaryDataByIDs(ctx, []string{binaryDataID})
	if err != nil {
		logger.Fatal(err)
	}
	binaryData := data[0]

	// Convert binary data to image.Image
	img, err := jpeg.Decode(bytes.NewReader(binaryData.Binary))
	if err != nil {
		logger.Fatal(err)
	}

	// Get classifications using the image
	classifier, err := vision.FromRobot(machine, classifierName)
	if err != nil {
		logger.Fatal(err)
	}

	classifications, err := classifier.Classifications(ctx, img, 2, nil)
	if err != nil {
		logger.Fatal(err)
	}

    if len(classifications) == 0 {
        logger.Fatal(err)
    } else {
        for _, classification := range classifications {
			err := dataClient.AddTagsToBinaryDataByIDs(ctx, []string{classification.Label()}, []string{binaryDataID})
			if err != nil {
				logger.Fatal(err)
			}
			fmt.Printf("Added tag to image: %s\n", classification.Label())
		}
	}

}

Use an ML model to generate tags for an image or set of images. Then, pass the tags and image IDs to dataClient.addTagsToBinaryDataByIds:

import { createViamClient, createRobotClient, RobotClient, VisionClient } from "@viamrobotics/sdk";

// Configuration constants – replace with your actual values
let API_KEY = "";  // API key, find or create in your organization settings
let API_KEY_ID = "";  // API key ID, find or create in your organization settings
let MACHINE_ADDRESS = "";  // the address of the machine you want to capture images from
let CLASSIFIER_NAME = "";  // the name of the classifier you want to use
let BINARY_DATA_ID = "";  // the ID of the image you want to label

async function connect(): Promise<any> {
    // Establish a connection to the Viam client using API credentials
    return await createViamClient({
        credentials: {
            type: "api-key",
            authEntity: API_KEY_ID,
            payload: API_KEY,
        },
    });
}

async function connectMachine(): Promise<RobotClient> {
    // Establish a connection to the robot using the machine address
    return await createRobotClient({
        host: MACHINE_ADDRESS,
        credentials: {
          type: 'api-key',
          payload: API_KEY,
          authEntity: API_KEY_ID,
        },
        signalingAddress: 'https://app.viam.com:443',
      });
}

async function main(): Promise<number> {
    const viamClient = await connect();
    const dataClient = viamClient.dataClient;
    const machine = await connectMachine();
    const classifier = new VisionClient(machine, CLASSIFIER_NAME);

    // Get image from data in Viam
    const data = await dataClient.binaryDataByIds([BINARY_DATA_ID]);
    const binaryData = data[0];

    // Convert binary data to image
    const image = binaryData.binary; // This should be Uint8Array

    // Get tags using the image
    const tags = await classifier.getClassifications(
        image,
        binaryData.metadata.captureMetadata.width ?? 0,
        binaryData.metadata.captureMetadata.height ?? 0,
        binaryData.metadata.captureMetadata.mimeType ?? "",
        2
    );

    if (tags.length === 0) {
        console.log("No tags found");
        return 1;
    } else {
        for (const tag of tags) {
            await dataClient.addTagsToBinaryDataByIds(
                [tag.className ?? ""],
                [BINARY_DATA_ID]
            );
            console.log(`Added tag to image: ${tag.className}`);
        }
    }

    return 0;
}

main().catch((error) => {
    console.error("Script failed:", error);
    process.exit(1);
});

Once you’ve annotated your dataset, you can train an ML model to make inferences.

Label objects within an image

Use labels to add metadata about objects within an image, for example by drawing bounding boxes around each bicycle in a street scene and adding the bicycle label.

The DATA page provides an interface for annotating images.

To label an object with a bounding box:

  1. Click on an image, then click the Annotate button in right side menu.

  2. Choose an existing label or create a new label.

  3. Holding the command key (on macOS), or the control key (on Linux and Windows), click and drag on the image to create the bounding box:

Repeat these steps for all images in the dataset.

Use an ML model to generate bounding boxes for an image. Then, separately pass each bounding box and the image ID to data_client.add_bounding_box_to_image_by_id:

import asyncio

from viam.rpc.dial import DialOptions, Credentials
from viam.app.viam_client import ViamClient
from viam.media.video import ViamImage
from viam.robot.client import RobotClient
from viam.services.vision import VisionClient
from grpclib.exceptions import GRPCError

# Configuration constants – replace with your actual values
API_KEY = ""  # API key, find or create in your organization settings
API_KEY_ID = ""  # API key ID, find or create in your organization settings
MACHINE_ADDRESS = ""  # the address of the machine you want to capture images from
DETECTOR_NAME = ""  # the name of the detector you want to use
BINARY_DATA_ID = ""  # the ID of the image you want to label


async def connect() -> ViamClient:
    """Establish a connection to the Viam client using API credentials."""
    dial_options = DialOptions(
        credentials=Credentials(
            type="api-key",
            payload=API_KEY,
        ),
        auth_entity=API_KEY_ID
    )
    return await ViamClient.create_from_dial_options(dial_options)

async def connect_machine() -> RobotClient:
    """Establish a connection to the robot using the robot address."""
    machine_opts = RobotClient.Options.with_api_key(
        api_key=API_KEY,
        api_key_id=API_KEY_ID
    )
    return await RobotClient.at_address(MACHINE_ADDRESS, machine_opts)

async def main() -> int:
    viam_client = await connect()
    data_client = viam_client.data_client
    machine = await connect_machine()

    detector = VisionClient.from_robot(machine, DETECTOR_NAME)

    # Get image from data in Viam
    data = await data_client.binary_data_by_ids([BINARY_DATA_ID])
    binary_data = data[0]

    # Convert binary data to ViamImage
    image = ViamImage(binary_data.binary, binary_data.metadata.capture_metadata.mime_type)

    # Get detections using the image
    detections = await detector.get_detections(
        image=image, image_format=binary_data.metadata.capture_metadata.mime_type)

    if not len(detections):
        print("No detections found")
        return 1
    else:
        for detection in detections:
            # Ensure bounding box is big enough to be useful
            if detection.x_max_normalized - detection.x_min_normalized <= 0.01 or \
                detection.y_max_normalized - detection.y_min_normalized <= 0.01:
                    continue
            bbox_id = await data_client.add_bounding_box_to_image_by_id(
                binary_id=BINARY_DATA_ID,
                label=detection.class_name,
                x_min_normalized=detection.x_min_normalized,
                y_min_normalized=detection.y_min_normalized,
                x_max_normalized=detection.x_max_normalized,
                y_max_normalized=detection.y_max_normalized
            )
            print(f"Added bounding box to image: {bbox_id}")

    viam_client.close()
    await machine.close()

    return 0

if __name__ == "__main__":
    asyncio.run(main())

Use an ML model to generate bounding boxes for an image. Then, separately pass each bounding box and the image ID to DataClient.AddBoundingBoxToImageByID:

package main

import (
	"context"
	"fmt"
	"image/jpeg"
	"bytes"

	"go.viam.com/rdk/app"
	"go.viam.com/rdk/logging"
	"go.viam.com/rdk/robot/client"
	"go.viam.com/rdk/services/vision"
	"go.viam.com/utils/rpc"
)


func main() {
	apiKey := ""
	apiKeyID := ""
	machineAddress := ""
	detectorName := ""
	binaryDataID := ""


	logger := logging.NewDebugLogger("client")
	ctx := context.Background()

	viamClient, err := app.CreateViamClientWithAPIKey(
		ctx, app.Options{}, apiKey, apiKeyID, logger)
	if err != nil {
		logger.Fatal(err)
	}
	defer viamClient.Close()

	machine, err := client.New(
		context.Background(),
		machineAddress,
		logger,
		client.WithDialOptions(rpc.WithEntityCredentials(
			apiKeyID,
			rpc.Credentials{
				Type:    rpc.CredentialsTypeAPIKey,
				Payload: apiKey,
			})),
	)
	if err != nil {
		logger.Fatal(err)
	}

	dataClient := viamClient.DataClient()


	detector, err := vision.FromRobot(machine, detectorName)
	if err != nil {
		logger.Fatal(err)
	}

	data, err := dataClient.BinaryDataByIDs(ctx, []string{binaryDataID})
	if err != nil {
		logger.Fatal(err)
	}
	binaryData := data[0]

	// Convert binary data to image.Image
	img, err := jpeg.Decode(bytes.NewReader(binaryData.Binary))
	if err != nil {
		logger.Fatal(err)
	}

	// Get detections using the image
	detections, err := detector.Detections(ctx, img, nil)
	if err != nil {
		logger.Fatal(err)
	}

    if len(detections) == 0 {
        logger.Fatal(err)
    } else {
        for _, detection := range detections {
            // Ensure bounding box is big enough to be useful
			if float64(detection.NormalizedBoundingBox()[2]-detection.NormalizedBoundingBox()[0]) <= 0.01 ||
				float64(detection.NormalizedBoundingBox()[3]-detection.NormalizedBoundingBox()[1]) <= 0.01 {
				continue
			}
			bboxID, err := dataClient.AddBoundingBoxToImageByID(
				ctx,
				binaryDataID,
				detection.Label(),
				float64(detection.NormalizedBoundingBox()[0]),
				float64(detection.NormalizedBoundingBox()[1]),
				float64(detection.NormalizedBoundingBox()[2]),
				float64(detection.NormalizedBoundingBox()[3]),
			)
			if err != nil {
				logger.Fatal(err)
			}
			fmt.Printf("Added bounding box to image: %s\n", bboxID)
		}
	}

}

Use an ML model to generate bounding boxes for an image. Then, separately pass each bounding box and the image ID to dataClient.addBoundingBoxToImageById:

import { createViamClient, createRobotClient, RobotClient, VisionClient } from "@viamrobotics/sdk";

// Configuration constants – replace with your actual values
let API_KEY = "";  // API key, find or create in your organization settings
let API_KEY_ID = "";  // API key ID, find or create in your organization settings
let MACHINE_ADDRESS = "";  // the address of the machine you want to capture images from
let DETECTOR_NAME = "";  // the name of the detector you want to use
let BINARY_DATA_ID = "";  // the ID of the image you want to label

async function connect(): Promise<any> {
    // Establish a connection to the Viam client using API credentials
    return await createViamClient({
        credentials: {
            type: "api-key",
            authEntity: API_KEY_ID,
            payload: API_KEY,
        },
    });
}

async function connectMachine(): Promise<RobotClient> {
    // Establish a connection to the robot using the robot address
    return await createRobotClient({
        host: MACHINE_ADDRESS,
        credentials: {
          type: 'api-key',
          payload: API_KEY,
          authEntity: API_KEY_ID,
        },
        signalingAddress: 'https://app.viam.com:443',
      });
}

async function main(): Promise<number> {
    const viamClient = await connect();
    const dataClient = viamClient.dataClient;
    const machine = await connectMachine();
    const detector = new VisionClient(machine, DETECTOR_NAME);

    // Get image from data in Viam
    const data = await dataClient.binaryDataByIds([BINARY_DATA_ID]);
    const binaryData = data[0];

    // Convert binary data to image
    const image = binaryData.binary; // This should be Uint8Array

    // Get detections using the image
    const detections = await detector.getDetections(
        image,
        binaryData.metadata.captureMetadata.width ?? 0,
        binaryData.metadata.captureMetadata.height ?? 0,
        binaryData.metadata.captureMetadata.mimeType ?? ""
    );

    if (detections.length === 0) {
        console.log("No detections found");
        return 1;
    } else {
        for (const detection of detections) {
            // Ensure bounding box is big enough to be useful
            if (detection.xMaxNormalized - detection.xMinNormalized <= 0.01 ||
                detection.yMaxNormalized - detection.yMinNormalized <= 0.01) {
                continue;
            }
            const bboxId = await dataClient.addBoundingBoxToImageById(
                BINARY_DATA_ID,
                detection.className,
                detection.xMinNormalized,
                detection.yMinNormalized,
                detection.xMaxNormalized,
                detection.yMaxNormalized
            );
            console.log(`Added bounding box to image: ${bboxId}`);
        }
    }

    return 0;
}

main().catch((error) => {
    console.error("Script failed:", error);
    process.exit(1);
});

Once you’ve annotated your dataset, you can train an ML model to make inferences.

Capture, annotate, and add images to a dataset in a single script

The following example demonstrates how you can capture an image, use an ML model to generate annotations, and then add the image to a dataset. You can use this logic to expand and improve your datasets continuously over time. Check the annotation accuracy in the DATA tab, then re-train your ML model on the improved dataset to improve the ML model.

import asyncio
import time

from datetime import datetime
from viam.rpc.dial import DialOptions, Credentials
from viam.app.viam_client import ViamClient
from viam.components.camera import Camera
from viam.media.video import ViamImage
from viam.robot.client import RobotClient
from viam.services.vision import VisionClient

# Configuration constants – replace with your actual values
API_KEY = ""  # API key, find or create in your organization settings
API_KEY_ID = ""  # API key ID, find or create in your organization settings
DATASET_ID = ""  # the ID of the dataset you want to add the image to
MACHINE_ADDRESS = ""  # the address of the machine you want to capture images from
CLASSIFIER_NAME = ""  # the name of the classifier you want to use
CAMERA_NAME = ""  # the name of the camera you want to capture images from
PART_ID = ""  # the part ID of the binary data you want to add to the dataset


async def connect() -> ViamClient:
    """Establish a connection to the Viam client using API credentials."""
    dial_options = DialOptions(
        credentials=Credentials(
            type="api-key",
            payload=API_KEY,
        ),
        auth_entity=API_KEY_ID
    )
    return await ViamClient.create_from_dial_options(dial_options)

async def connect_machine() -> RobotClient:
    """Establish a connection to the robot using the robot address."""
    machine_opts = RobotClient.Options.with_api_key(
        api_key=API_KEY,
        api_key_id=API_KEY_ID
    )
    return await RobotClient.at_address(MACHINE_ADDRESS, machine_opts)

async def main() -> int:
    viam_client = await connect()
    data_client = viam_client.data_client
    machine = await connect_machine()

    camera = Camera.from_robot(machine, CAMERA_NAME)
    classifier = VisionClient.from_robot(machine, CLASSIFIER_NAME)

    # Capture image
    image_frame = await camera.get_image()

    # Upload data
    file_id = await data_client.binary_data_capture_upload(
        part_id=PART_ID,
        component_type="camera",
        component_name=CAMERA_NAME,
        method_name="GetImage",
        data_request_times=[datetime.utcnow(), datetime.utcnow()],
        file_extension=".jpg",
        binary_data=image_frame.data
    )
    print(f"Uploaded image: {file_id}")

    # Annotate image
    await data_client.add_tags_to_binary_data_by_ids(
        tags=["test"],
        binary_ids=[file_id]
    )

    # Get image from data in Viam
    data = await data_client.binary_data_by_ids([file_id])
    binary_data = data[0]

    # Convert binary data to ViamImage
    image = ViamImage(binary_data.binary, binary_data.metadata.capture_metadata.mime_type)

    # Get tags using the image
    tags = await classifier.get_classifications(
        image=image, image_format=binary_data.metadata.capture_metadata.mime_type, count=2)

    if not len(tags):
        print("No tags found")
        return 1

    for tag in tags:
        await data_client.add_tags_to_binary_data_by_ids(
            tags=[tag.class_name],
            binary_ids=[file_id]
        )
        print(f"Added tag to image: {tag}")

    print("Adding image to dataset...")
    await data_client.add_binary_data_to_dataset_by_ids(
        binary_ids=[file_id],
        dataset_id=DATASET_ID
    )
    print(f"Added image to dataset: {file_id}")

    viam_client.close()
    await machine.close()
    return 0

if __name__ == "__main__":
    asyncio.run(main())
package main

import (
	"context"
	"fmt"
	"time"
	"image/jpeg"
	"bytes"

	"go.viam.com/rdk/app"
	"go.viam.com/rdk/logging"
	"go.viam.com/rdk/robot/client"
	"go.viam.com/rdk/services/vision"
	"go.viam.com/rdk/components/camera"
	"go.viam.com/rdk/utils"
	"go.viam.com/utils/rpc"
)


func main() {
	apiKey := ""
	apiKeyID := ""
	datasetID := ""
	machineAddress := ""
	classifierName := ""
	cameraName := ""
	partID := ""


	logger := logging.NewDebugLogger("client")
	ctx := context.Background()

	viamClient, err := app.CreateViamClientWithAPIKey(
		ctx, app.Options{}, apiKey, apiKeyID, logger)

	machine, err := client.New(
		context.Background(),
		machineAddress,
		logger,
		client.WithDialOptions(rpc.WithEntityCredentials(
			apiKeyID,
			rpc.Credentials{
				Type:    rpc.CredentialsTypeAPIKey,
				Payload: apiKey,
			})),
	)
	if err != nil {
		logger.Fatal(err)
	}

	if err != nil {
		logger.Fatal(err)
	}
	defer viamClient.Close()

	// Capture image from camera
	cam, err := camera.FromRobot(machine, cameraName)
	if err != nil {
		logger.Fatal(err)
	}

	image, _, err := cam.Image(ctx, utils.MimeTypeJPEG, nil)
	if err != nil {
		logger.Fatal(err)
	}

	dataClient := viamClient.DataClient()


	// Upload image to Viam
	binaryDataID, err := dataClient.BinaryDataCaptureUpload(
		ctx,
		image,
		partID,
		"camera",
		cameraName,
		"GetImage",
		".jpg",
		&app.BinaryDataCaptureUploadOptions{
			DataRequestTimes: &[2]time.Time{time.Now(), time.Now()},
		},
	)

	fmt.Printf("Uploaded image: %s\n", binaryDataID)

	// Convert binary data to image.Image
	img, err := jpeg.Decode(bytes.NewReader(image))
	if err != nil {
		logger.Fatal(err)
	}

	// Get classifications using the image
	classifier, err := vision.FromRobot(machine, classifierName)
	if err != nil {
		logger.Fatal(err)
	}

	classifications, err := classifier.Classifications(ctx, img, 2, nil)
	if err != nil {
		logger.Fatal(err)
	}

    if len(classifications) == 0 {
        logger.Fatal(err)
    } else {
        for _, classification := range classifications {
			err := dataClient.AddTagsToBinaryDataByIDs(ctx, []string{classification.Label()}, []string{binaryDataID})
			if err != nil {
				logger.Fatal(err)
			}
			fmt.Printf("Added tag to image: %s\n", classification.Label())
		}
	}

	// Add image to dataset
	err = dataClient.AddBinaryDataToDatasetByIDs(ctx, []string{binaryDataID}, datasetID)
	if err != nil {
		logger.Fatal(err)
	}
	fmt.Printf("Added image to dataset: %s\n", binaryDataID)

}
import { createViamClient, createRobotClient, RobotClient, VisionClient, CameraClient } from "@viamrobotics/sdk";

// Configuration constants – replace with your actual values
let API_KEY = "";  // API key, find or create in your organization settings
let API_KEY_ID = "";  // API key ID, find or create in your organization settings
let DATASET_ID = "";  // the ID of the dataset you want to add the image to
let MACHINE_ADDRESS = "";  // the address of the machine you want to capture images from
let CLASSIFIER_NAME = "";  // the name of the classifier you want to use
let CAMERA_NAME = "";  // the name of the camera you want to capture images from
let PART_ID = "";  // the part ID of the binary data you want to add to the dataset

async function connect(): Promise<any> {
    // Establish a connection to the Viam client using API credentials
    return await createViamClient({
        credentials: {
            type: "api-key",
            authEntity: API_KEY_ID,
            payload: API_KEY,
        },
    });
}

async function connectMachine(): Promise<RobotClient> {
    // Establish a connection to the robot using the machine address
    return await createRobotClient({
        host: MACHINE_ADDRESS,
        credentials: {
          type: 'api-key',
          payload: API_KEY,
          authEntity: API_KEY_ID,
        },
        signalingAddress: 'https://app.viam.com:443',
      });
}

async function main(): Promise<number> {
    const viamClient = await connect();
    const dataClient = viamClient.dataClient;
    const machine = await connectMachine();
    const camera = new CameraClient(machine, CAMERA_NAME);
    const classifier = new VisionClient(machine, CLASSIFIER_NAME);

    // Capture image
    const imageFrame = await camera.getImage();

    // Upload data
    const fileId = await dataClient.binaryDataCaptureUpload(
        imageFrame,
        PART_ID,
        "camera",
        CAMERA_NAME,
        "GetImage",
        ".jpg",
        [new Date(), new Date()]
    );
    console.log(`Uploaded image: ${fileId}`);

    // Annotate image
    await dataClient.addTagsToBinaryDataByIds(
        ["test"],
        [fileId]
    );

    // Get image from data in Viam
    const data = await dataClient.binaryDataByIds([fileId]);
    const binaryData = data[0];

    // Convert binary data to image
    const image = binaryData.binary; // This should be Uint8Array

    // Get tags using the image
    const tags = await classifier.getClassifications(
        image,
        binaryData.metadata.captureMetadata.width ?? 0,
        binaryData.metadata.captureMetadata.height ?? 0,
        binaryData.metadata.captureMetadata.mimeType ?? "",
        2
    );

    if (tags.length === 0) {
        console.log("No tags found");
        return 1;
    }

    for (const tag of tags) {
        await dataClient.addTagsToBinaryDataByIds(
            [tag.className ?? ""],
            [fileId]
        );
        console.log(`Added tag to image: ${tag}`);
    }

    console.log("Adding image to dataset...");
    await dataClient.addBinaryDataToDatasetByIds(
        [fileId],
        DATASET_ID
    );
    console.log(`Added image to dataset: ${fileId}`);

    return 0;
}

main().catch((error) => {
    console.error("Script failed:", error);
    process.exit(1);
});