Next
Capture and annotate images
To train a machine learning model, you will need a dataset that meets the following conditions:
When you label your dataset, include:
You can create a dataset using the web UI, the CLI, or one of the SDKs:
Navigate to the DATA page and open the DATASETS tab.
Click the + Create dataset button.
Enter a unique name for the dataset.
Click Create dataset.
Run the following Viam CLI command to create a dataset, replacing the <org-id>
and <name>
placeholders with your organization ID and a unique name for the dataset:
viam dataset create --org-id=<org-id> --name=<name>
To create a dataset, pass a unique dataset name and organization ID to data_client.create_dataset
:
import asyncio
from viam.rpc.dial import DialOptions, Credentials
from viam.app.viam_client import ViamClient
# Configuration constants – replace with your actual values
API_KEY = "" # API key, find or create in your organization settings
API_KEY_ID = "" # API key ID, find or create in your organization settings
ORG_ID = "" # your organization ID, find in your organization settings
DATASET_NAME = "" # a unique, new name for the dataset you want to create
async def connect() -> ViamClient:
"""Establish a connection to the Viam client using API credentials."""
dial_options = DialOptions(
credentials=Credentials(
type="api-key",
payload=API_KEY,
),
auth_entity=API_KEY_ID
)
return await ViamClient.create_from_dial_options(dial_options)
async def main() -> int:
viam_client = await connect()
data_client = viam_client.data_client
print("Creating dataset...")
try:
dataset_id = await data_client.create_dataset(
name=DATASET_NAME,
organization_id=ORG_ID,
)
print(f"Created dataset: {dataset_id}")
except Exception as e:
print("Error creating dataset. It may already exist.")
print("See: https://app.viam.com/data/datasets")
print(f"Exception: {e}")
return 1
viam_client.close()
return 0
if __name__ == "__main__":
asyncio.run(main())
To create a dataset, pass a unique dataset name and organization ID to DataClient.CreateDataset
:
package main
import (
"context"
"fmt"
"os"
"go.viam.com/rdk/app"
"go.viam.com/rdk/logging"
)
func main() {
apiKey := ""
apiKeyID := ""
orgID := ""
datasetName := ""
logger := logging.NewDebugLogger("client")
ctx := context.Background()
viamClient, err := app.CreateViamClientWithAPIKey(
ctx, app.Options{}, apiKey, apiKeyID, logger)
if err != nil {
logger.Fatal(err)
}
defer viamClient.Close()
dataClient := viamClient.DataClient()
fmt.Println("Creating dataset...")
datasetID, err := dataClient.CreateDataset(ctx, datasetName, orgID)
if err != nil {
fmt.Println("Error creating dataset. It may already exist.")
fmt.Printf("Exception: %v\n", err)
return
}
fmt.Printf("Created dataset: %s\n", datasetID)
}
To create a dataset, pass a unique dataset name and organization ID to dataClient.createDataset
:
import { createViamClient } from "@viamrobotics/sdk";
// Configuration constants – replace with your actual values
let API_KEY = ""; // API key, find or create in your organization settings
let API_KEY_ID = ""; // API key ID, find or create in your organization settings
let ORG_ID = ""; // your organization ID, find in your organization settings
let DATASET_NAME = ""; // a unique, new name for the dataset you want to create
async function connect(): Promise<any> {
// Establish a connection to the Viam client using API credentials
return await createViamClient({
credentials: {
type: "api-key",
authEntity: API_KEY_ID,
payload: API_KEY,
},
});
}
async function main(): Promise<number> {
const viamClient = await connect();
const dataClient = viamClient.dataClient;
console.log("Creating dataset...");
try {
const datasetId = await dataClient.createDataset(
DATASET_NAME,
ORG_ID
);
console.log(`Created dataset: ${datasetId}`);
} catch (error) {
console.log("Error creating dataset. It may already exist.");
console.log("See: https://app.viam.com/data/datasets");
console.log(`Exception: ${error}`);
return 1;
}
return 0;
}
// Run the script
main().catch((error) => {
console.error("Script failed:", error);
process.exit(1);
});
You can now add images to your dataset.
You can add images to a dataset from the Images tab of the DATA page:
Click to select the images you would like to add to your dataset.
Click the Add to dataset button in the top right.
From the Dataset dropdown, select the name of your dataset.
Click Add <n> images to add the selected images to the dataset.
To select a range of images, select one image, then hold Ctrl/Cmd while clicking another image. This will select both images as well as the entire range of images between those images.
Use the Viam CLI to filter images by label and add the filtered images to a dataset:
First, create a dataset, if you haven’t already.
If you just created a dataset, use the dataset ID output by the creation command. If your dataset already exists, run the following command to get a list of dataset names and corresponding IDs:
viam dataset list
Run the following command to add all images labeled with a subset of tags to the dataset, replacing the <dataset-id>
placeholder with the dataset ID output by the command in the previous step:
viam dataset data add filter --dataset-id=<dataset-id> --tags=red_star,blue_square
To add an image to a dataset, find the binary data ID for the image and the dataset ID.
Pass both IDs to data_client.add_binary_data_to_dataset_by_ids
:
import asyncio
from viam.rpc.dial import DialOptions, Credentials
from viam.app.viam_client import ViamClient
# Configuration constants – replace with your actual values
API_KEY = "" # API key, find or create in your organization settings
API_KEY_ID = "" # API key ID, find or create in your organization settings
DATASET_ID = "" # the ID of the dataset you want to add the image to
BINARY_DATA_ID = "" # the ID of the image you want to add to the dataset
async def connect() -> ViamClient:
"""Establish a connection to the Viam client using API credentials."""
dial_options = DialOptions(
credentials=Credentials(
type="api-key",
payload=API_KEY,
),
auth_entity=API_KEY_ID
)
return await ViamClient.create_from_dial_options(dial_options)
async def main() -> int:
viam_client = await connect()
data_client = viam_client.data_client
print("Adding image to dataset...")
await data_client.add_binary_data_to_dataset_by_ids(
binary_ids=[BINARY_DATA_ID],
dataset_id=DATASET_ID
)
viam_client.close()
return 0
if __name__ == "__main__":
asyncio.run(main())
To add an image to a dataset, find the binary data ID for the image and the dataset ID.
Pass both IDs to DataClient.AddBinaryDataToDatasetByIDs
:
package main
import (
"context"
"fmt"
"go.viam.com/rdk/app"
"go.viam.com/rdk/logging"
)
func main() {
apiKey := ""
apiKeyID := ""
datasetID := ""
binaryDataID := ""
logger := logging.NewDebugLogger("client")
ctx := context.Background()
viamClient, err := app.CreateViamClientWithAPIKey(
ctx, app.Options{}, apiKey, apiKeyID, logger)
if err != nil {
logger.Fatal(err)
}
defer viamClient.Close()
dataClient := viamClient.DataClient()
fmt.Println("Adding image to dataset...")
err = dataClient.AddBinaryDataToDatasetByIDs(
ctx,
[]string{binaryDataID},
datasetID,
)
if err != nil {
fmt.Println("Error adding image to dataset.")
fmt.Printf("Exception: %v\n", err)
return
}
fmt.Println("Image added to dataset successfully")
}
To add an image to a dataset, find the binary data ID for the image and the dataset ID.
Pass both IDs to dataClient.addBinaryDataToDatasetByIDs
:
import { createViamClient } from "@viamrobotics/sdk";
// Configuration constants – replace with your actual values
let API_KEY = ""; // API key, find or create in your organization settings
let API_KEY_ID = ""; // API key ID, find or create in your organization settings
let DATASET_ID = ""; // the ID of the dataset you want to add the image to
let BINARY_DATA_ID = ""; // the ID of the image you want to add to the dataset
async function connect(): Promise<any> {
// Establish a connection to the Viam client using API credentials
return await createViamClient({
credentials: {
type: "api-key",
authEntity: API_KEY_ID,
payload: API_KEY,
},
});
}
async function main(): Promise<number> {
const viamClient = await connect();
const dataClient = viamClient.dataClient;
console.log("Adding image to dataset...");
await dataClient.addBinaryDataToDatasetByIds(
[BINARY_DATA_ID],
DATASET_ID
);
return 0;
}
// Run the script
main().catch((error) => {
console.error("Script failed:", error);
process.exit(1);
});
You can add images to a dataset from the Images tab of the DATA page:
To select a range of images, select one image, then hold Ctrl/Cmd while clicking another image. This will select both images as well as the entire range of images between those images.
The following script adds all images captured from a certain machine to a new dataset:
import asyncio
from typing import List, Optional
from viam.utils import create_filter
from viam.rpc.dial import DialOptions, Credentials
from viam.app.viam_client import ViamClient
# Configuration constants – replace with your actual values
API_KEY = "" # API key, find or create in your organization settings
API_KEY_ID = "" # API key ID, find or create in your organization settings
ORG_ID = "" # your organization ID, find in your organization settings
PART_ID = "" # the part ID of the binary data you want to add to the dataset
DATASET_ID = "" # the ID of the dataset you want to add the image to
MAX_MATCHES = 50 # the maximum number of binary data objects to fetch
async def connect() -> ViamClient:
"""Establish a connection to the Viam client using API credentials."""
dial_options = DialOptions(
credentials=Credentials(
type="api-key",
payload=API_KEY,
),
auth_entity=API_KEY_ID
)
return await ViamClient.create_from_dial_options(dial_options)
async def fetch_binary_data_ids(data_client, part_id: str) -> List[str]:
"""Fetch binary data metadata and return a list of BinaryData objects."""
data_filter = create_filter(part_id=part_id)
all_matches = []
last: Optional[str] = None
print("Getting data for part...")
while len(all_matches) < MAX_MATCHES:
print("Fetching more data...")
data, _, last = await data_client.binary_data_by_filter(
data_filter,
limit=50,
last=last,
include_binary_data=False,
)
if not data:
break
all_matches.extend(data)
return all_matches
async def main() -> int:
viam_client = await connect()
data_client = viam_client.data_client
matching_data = await fetch_binary_data_ids(data_client, PART_ID)
await data_client.add_binary_data_to_dataset_by_ids(
binary_ids=[
obj.metadata.binary_data_id for obj in matching_data
],
dataset_id=DATASET_ID
)
print("Added files to dataset:")
print(f"https://app.viam.com/data/datasets?id={DATASET_ID}")
viam_client.close()
return 0
if __name__ == "__main__":
asyncio.run(main())
The following script adds all images captured from a certain machine to a new dataset:
package main
import (
"context"
"fmt"
"go.viam.com/rdk/app"
"go.viam.com/rdk/logging"
)
func fetchBinaryDataIDs(
ctx context.Context,
dataClient *app.DataClient,
partID string,
maxMatches int) ([]string, error) {
filter := &app.Filter{
PartID: partID,
Interval: app.CaptureInterval{
Start: time.Now().Add(-200 * time.Hour),
End: time.Now(),
},
}
var allMatches []string
last := ""
fmt.Println("Getting data for part...")
for len(allMatches) < maxMatches {
fmt.Println("Fetching more data...")
resp, err := dataClient.BinaryDataByFilter(
ctx, false, &app.DataByFilterOptions{
Filter: filter,
Limit: 5,
Last: last,
IncludeInternalData: false,
},
)
if err != nil {
return nil, fmt.Errorf("failed to fetch binary data: %w", err)
}
if len(resp.BinaryData) == 0 {
break
}
for _, data := range resp.BinaryData {
allMatches = append(allMatches, data.Metadata.BinaryDataID)
}
last = resp.Last
}
fmt.Println("All matches:")
fmt.Println(allMatches)
return allMatches, nil
}
func main() {
apiKey := ""
apiKeyID := ""
partID := ""
datasetID := ""
maxMatches := 50
logger := logging.NewDebugLogger("client")
ctx := context.Background()
viamClient, err := app.CreateViamClientWithAPIKey(
ctx, app.Options{}, apiKey, apiKeyID, logger)
if err != nil {
logger.Fatal(err)
}
defer viamClient.Close()
dataClient := viamClient.DataClient()
fmt.Println("Fetching machine images...")
binaryDataIDs, err := fetchBinaryDataIDs(ctx, dataClient, partID, maxMatches)
if err != nil {
fmt.Println("Error fetching machine images.")
fmt.Printf("Exception: %v\n", err)
return
}
fmt.Printf("Fetched %d machine images.\n", len(binaryDataIDs))
fmt.Println("Adding machine images to dataset...")
err = dataClient.AddBinaryDataToDatasetByIDs(
ctx,
binaryDataIDs,
datasetID,
)
if err != nil {
fmt.Println("Error adding machine images to dataset.")
fmt.Printf("Exception: %v\n", err)
return
}
fmt.Println("Machine images added to dataset successfully")
}
The following script adds all images captured from a certain machine to a new dataset:
import { createViamClient } from "@viamrobotics/sdk";
// Configuration constants – replace with your actual values
let API_KEY = ""; // API key, find or create in your organization settings
let API_KEY_ID = ""; // API key ID, find or create in your organization settings
let ORG_ID = ""; // your organization ID, find in your organization settings
let PART_ID = ""; // the part ID of the binary data you want to add to the dataset
let DATASET_ID = ""; // the ID of the dataset you want to add the image to
const MAX_MATCHES = 50; // the maximum number of binary data objects to fetch
async function connect(): Promise<any> {
// Establish a connection to the Viam client using API credentials
return await createViamClient({
credentials: {
type: "api-key",
authEntity: API_KEY_ID,
payload: API_KEY,
},
});
}
async function fetchBinaryDataIds(dataClient: any, partId: string): Promise<string[]> {
/** Fetch binary data metadata and return a list of BinaryData objects. */
const dataFilter = { partId: partId };
const allMatches: any[] = [];
let last: string | undefined = undefined;
console.log("Getting data for part...");
while (allMatches.length < MAX_MATCHES) {
console.log("Fetching more data...");
const result = await dataClient.binaryDataByFilter(
dataFilter,
50,
0,
last,
false // includeBinary = false to allow limit > 1
);
const data = result.data || result;
const newLast = result.last;
if (!data || data.length === 0) {
break;
}
allMatches.push(...data);
last = newLast;
}
return allMatches;
}
async function main(): Promise<number> {
const viamClient = await connect();
const dataClient = viamClient.dataClient;
const matchingData = await fetchBinaryDataIds(dataClient, PART_ID);
console.log(`Found ${matchingData.length} matching data objects`);
console.log(matchingData);
await dataClient.addBinaryDataToDatasetByIds(
matchingData.map(obj => obj.metadata.binaryDataId),
DATASET_ID
);
console.log("Added files to dataset:");
console.log(`https://app.viam.com/data/datasets?id=${DATASET_ID}`);
return 0;
}
// Run the script
main().catch((error) => {
console.error("Script failed:", error);
process.exit(1);
});
If you have used the viam dataset export
command to export a dataset or if you’ve been given a dataset from someone else you can use the following script to import the dataset.
If you have a dataset that was not exported with Viam, you will need to make changes to this script.
# Assumption: The dataset was exported using the `viam dataset export` command.
# This script is being run from the `destination` directory.
import asyncio
import os
import json
from viam.rpc.dial import DialOptions, Credentials
from viam.app.viam_client import ViamClient
# Configuration constants – replace with your actual values
API_KEY = "" # API key, find or create in your organization settings
API_KEY_ID = "" # API key ID, find or create in your organization settings
ORG_ID = "" # the ID of the organization you want to add the image to
PART_ID = "" # the ID of the machine part you want to add the image to
LOCATION_ID = "" # the ID of the location you want to add the image to
DATASET_NAME = "" # the name of the dataset you want to add the image to
FOLDER_NAME = "" # the name of the folder that contains the dataset
async def connect() -> ViamClient:
"""Establish a connection to the Viam client using API credentials."""
dial_options = DialOptions(
credentials=Credentials(
type="api-key",
payload=API_KEY,
),
auth_entity=API_KEY_ID
)
return await ViamClient.create_from_dial_options(dial_options)
async def main():
viam_client = await connect()
data_client = viam_client.data_client
print("Creating dataset...")
try:
dataset_id = await data_client.create_dataset(
name=DATASET_NAME,
organization_id=ORG_ID,
)
print(f"Created dataset: {dataset_id}")
except Exception as e:
print("Error creating dataset. It may already exist.")
print("See: https://app.viam.com/data/datasets")
print(f"Exception: {e}")
return 1
file_ids = []
for file_name in os.listdir(FOLDER_NAME + "/metadata/"):
with open(FOLDER_NAME + "/metadata/" + file_name) as f:
try:
data = json.load(f)
except Exception as e:
print(f"Skipping file: {file_name} because it is not valid JSON")
print(f"Exception: {e}")
continue
tags = None
if "captureMetadata" in data.keys():
if "tags" in data["captureMetadata"].keys():
tags = data["captureMetadata"]["tags"]
annotations = None
if "annotations" in data.keys():
annotations = data["annotations"]
print(data)
print(annotations)
image_file = file_name.replace(".json", "")
print("Uploading: " + image_file)
file_id = await data_client.file_upload_from_path(
part_id=PART_ID,
tags=tags,
filepath=os.path.join(FOLDER_NAME + "/data/", image_file)
)
print("FileID: " + file_id)
if annotations:
bboxes = annotations["bboxes"]
for box in bboxes:
await data_client.add_bounding_box_to_image_by_id(
binary_id=file_id,
label=box["label"],
x_min_normalized=box["xMinNormalized"],
y_min_normalized=box["yMinNormalized"],
x_max_normalized=box["xMaxNormalized"],
y_max_normalized=box["yMaxNormalized"]
)
if tags:
await data_client.add_tags_to_binary_data_by_ids(
tags=tags,
binary_ids=[file_id]
)
file_ids.append(file_id)
await data_client.add_binary_data_to_dataset_by_ids(
binary_ids=file_ids,
dataset_id=dataset_id
)
print("Added files to dataset.")
print("https://app.viam.com/data/datasets?id=" + dataset_id)
viam_client.close()
if __name__ == '__main__':
asyncio.run(main())
Was this page helpful?
Glad to hear it! If you have any other feedback please let us know:
We're sorry about that. To help us improve, please tell us what we can do better:
Thank you!