Skip to content

Smart Datasets (🧪Beta)

albert.collections.smart_datasets.SmartDatasetCollection

SmartDatasetCollection(*, session: AlbertSession)

Bases: BaseCollection

A collection for managing smart datasets in the Albert platform (🧪Beta).

Beta Feature!

Please do not use in production or without explicit guidance from Albert. You might otherwise have a bad experience. This feature currently falls outside of the Albert support contract, but we'd love your feedback!

Parameters:

Name Type Description Default
session AlbertSession

The Albert session instance.

required

Attributes:

Name Type Description
base_path str

The base URL for smart dataset API requests.

Methods:

Name Description
create

Creates a new smart dataset entity.

get_all

Lists all smart datasets for the tenant.

get_by_id

Retrieves a smart dataset by its ID.

update

Updates a smart dataset.

delete

Deletes a smart dataset by its ID.

get_data

Retrieves the data for a smart dataset.

Parameters:

Name Type Description Default
session AlbertSession

The Albert session instance.

required
Source code in src/albert/collections/smart_datasets.py
def __init__(self, *, session: AlbertSession):
    """
    Initializes the SmartDatasetCollection with the provided session.

    Parameters
    ----------
    session : AlbertSession
        The Albert session instance.
    """
    super().__init__(session=session)
    self.base_path = f"/api/{SmartDatasetCollection._api_version}/smartdatasets"

base_path

base_path = f'/api/{_api_version}/smartdatasets'

create

create(
    *,
    scope: SmartDatasetScope,
    parent_id: ProjectId | None = None,
    build: bool = True,
) -> SmartDataset

Creates a new smart dataset entity.

Parameters:

Name Type Description Default
scope SmartDatasetScope

The scope of the smart dataset.

required
parent_id ProjectId

The ID of the parent project to inherit the ACL policy from. When set, the smart dataset inherits its ACL policy from the referenced project.

None
build bool

Whether to populate the smart dataset with data from Albert.

True

Returns:

Type Description
SmartDataset

The created smart dataset entity.

Source code in src/albert/collections/smart_datasets.py
@validate_call
def create(
    self,
    *,
    scope: SmartDatasetScope,
    parent_id: ProjectId | None = None,
    build: bool = True,
) -> SmartDataset:
    """
    Creates a new smart dataset entity.

    Parameters
    ----------
    scope : SmartDatasetScope
        The scope of the smart dataset.
    parent_id : ProjectId, optional
        The ID of the parent project to inherit the ACL policy from. When set,
        the smart dataset inherits its ACL policy from the referenced project.
    build : bool, optional
        Whether to populate the smart dataset with data from Albert.

    Returns
    -------
    SmartDataset
        The created smart dataset entity.
    """
    body = {"scope": scope.model_dump(by_alias=True, exclude_none=False, mode="json")}
    if parent_id is not None:
        body["parentId"] = parent_id
    response = self.session.post(
        self.base_path,
        json=body,
        params={"build": build},
    )
    return SmartDataset(**response.json())

get_all

get_all() -> list[SmartDataset]

Lists all smart datasets for the tenant.

Returns:

Type Description
list[SmartDataset]

A list of SmartDataset entities.

Source code in src/albert/collections/smart_datasets.py
def get_all(self) -> list[SmartDataset]:
    """
    Lists all smart datasets for the tenant.

    Returns
    -------
    list[SmartDataset]
        A list of SmartDataset entities.
    """
    response = self.session.get(self.base_path)
    data = response.json()
    return [SmartDataset(**item) for item in data.get("Items", [])]

get_by_id

get_by_id(
    *,
    id: SmartDatasetId,
    parent_id: ProjectId | None = None,
) -> SmartDataset

Retrieves a smart dataset by its ID.

Parameters:

Name Type Description Default
id SmartDatasetId

The ID of the smart dataset to retrieve.

required
parent_id ProjectId

The ID of the parent project to inherit the ACL policy from when the caller does not own the smart dataset record.

None

Returns:

Type Description
SmartDataset

The SmartDataset entity.

Source code in src/albert/collections/smart_datasets.py
@validate_call
def get_by_id(self, *, id: SmartDatasetId, parent_id: ProjectId | None = None) -> SmartDataset:
    """
    Retrieves a smart dataset by its ID.

    Parameters
    ----------
    id : SmartDatasetId
        The ID of the smart dataset to retrieve.
    parent_id : ProjectId, optional
        The ID of the parent project to inherit the ACL policy from when
        the caller does not own the smart dataset record.

    Returns
    -------
    SmartDataset
        The SmartDataset entity.
    """
    url = f"{self.base_path}/{id}"
    params = {"parentId": parent_id} if parent_id is not None else None
    response = self.session.get(url, params=params)
    return SmartDataset(**response.json())

update

update(*, smart_dataset: SmartDataset) -> SmartDataset

Update a smart dataset.

Parameters:

Name Type Description Default
smart_dataset SmartDataset

The smart dataset with updated fields. Must have an id set.

required

Returns:

Type Description
SmartDataset

The updated SmartDataset.

Source code in src/albert/collections/smart_datasets.py
@validate_call
def update(
    self,
    *,
    smart_dataset: SmartDataset,
) -> SmartDataset:
    """
    Update a smart dataset.

    Parameters
    ----------
    smart_dataset : SmartDataset
        The smart dataset with updated fields. Must have an id set.

    Returns
    -------
    SmartDataset
        The updated SmartDataset.
    """
    existing = self.get_by_id(id=smart_dataset.id, parent_id=smart_dataset.parent_id)
    payload = self._generate_patch_payload(existing=existing, updated=smart_dataset)
    if payload.data:
        self.session.patch(
            url=f"{self.base_path}/{smart_dataset.id}",
            json=payload.model_dump(mode="json", by_alias=True, exclude_none=False),
        )
    return self.get_by_id(id=smart_dataset.id, parent_id=smart_dataset.parent_id)

delete

delete(*, id: SmartDatasetId) -> None

Deletes a smart dataset by its ID.

Parameters:

Name Type Description Default
id SmartDatasetId

The ID of the smart dataset to delete.

required

Returns:

Type Description
None
Source code in src/albert/collections/smart_datasets.py
@validate_call
def delete(self, *, id: SmartDatasetId) -> None:
    """
    Deletes a smart dataset by its ID.

    Parameters
    ----------
    id : SmartDatasetId
        The ID of the smart dataset to delete.

    Returns
    -------
    None
    """
    url = f"{self.base_path}/{id}"
    self.session.delete(url)

get_data

get_data(
    *,
    id: SmartDatasetId,
    parent_id: ProjectId | None = None,
    aggregate_by: SmartDatasetAggregateBy = PTD,
    ids: list[str] | None = None,
    variables: list[str] | None = None,
) -> SmartDatasetData

Retrieves the experiment data for a smart dataset.

Parameters:

Name Type Description Default
id SmartDatasetId

The ID of the smart dataset.

required
parent_id ProjectId

The ID of the parent project to inherit the ACL policy from when the caller does not own the smart dataset record.

None
aggregate_by SmartDatasetAggregateBy

The aggregation level for the returned data. Defaults to ptd.

PTD
ids list[str]

Filter results to these identifier keys.

None
variables list[str]

Filter results to these variable keys.

None

Returns:

Type Description
SmartDatasetData

The experiment data matrix.

Source code in src/albert/collections/smart_datasets.py
@validate_call
def get_data(
    self,
    *,
    id: SmartDatasetId,
    parent_id: ProjectId | None = None,
    aggregate_by: SmartDatasetAggregateBy = SmartDatasetAggregateBy.PTD,
    ids: list[str] | None = None,
    variables: list[str] | None = None,
) -> SmartDatasetData:
    """
    Retrieves the experiment data for a smart dataset.

    Parameters
    ----------
    id : SmartDatasetId
        The ID of the smart dataset.
    parent_id : ProjectId, optional
        The ID of the parent project to inherit the ACL policy from when
        the caller does not own the smart dataset record.
    aggregate_by : SmartDatasetAggregateBy, optional
        The aggregation level for the returned data. Defaults to ``ptd``.
    ids : list[str], optional
        Filter results to these identifier keys.
    variables : list[str], optional
        Filter results to these variable keys.

    Returns
    -------
    SmartDatasetData
        The experiment data matrix.
    """
    smart_dataset = self.get_by_id(id=id, parent_id=parent_id)
    if smart_dataset.build_state != SmartDatasetBuildState.READY:
        raise ValueError("Smart dataset is not ready")
    params: dict = {"aggregate_by": aggregate_by.to_api_value()}
    if ids is not None:
        params["id"] = ids
    if variables is not None:
        params["variable"] = variables
    response = self.session.get(
        f"{self.base_path}/{id}/experiments/data",
        params=params,
    )
    data = response.json()
    data["aggregate_by"] = aggregate_by.from_api_value(data["aggregate_by"])
    return SmartDatasetData(**data)