Substances V4 (🧪Beta)

albert.collections.substance_v4.SubstanceV4Collection

SubstanceV4Collection(*, session: AlbertSession)

Bases: BaseCollection

SubstanceV4Collection manages substance entities in the Albert platform (🧪Beta).

Beta Feature!

Please do not use in production or without explicit guidance from Albert. You might otherwise have a bad experience. This feature currently falls outside of the Albert support contract, but we'd love your feedback!

Parameters:

Name	Type	Description	Default
`session`	`AlbertSession`	The Albert session instance.	required

Attributes:

Name	Type	Description
`base_path`	`str`	The base URL for substance API requests.

Methods:

Name	Description
`get_by_ids`	Retrieves substances by CAS IDs, substance IDs, or external IDs.
`get_by_id`	Retrieves a single substance by CAS ID, substance ID, or external ID.
`search`	Searches substances by keyword or advanced filters.
`create`	Creates a new substance record.
`update_metadata`	Updates metadata fields on a substance.

Source code in src/albert/collections/substance_v4.py

def __init__(self, *, session: AlbertSession):
    super().__init__(session=session)
    self.base_path = f"/api/{SubstanceV4Collection._api_version}/substances"

base_path

base_path = (
    f"/api/{SubstanceV4Collection._api_version}/substances"
)

get_by_ids

get_by_ids(
    *,
    cas_ids: list[str] | None = None,
    sub_ids: list[str] | None = None,
    external_ids: list[str] | None = None,
    region: str = "global",
    catch_errors: bool | None = None,
    language: str | None = None,
    classification_type: str | None = None,
) -> list[SubstanceV4Info]

Retrieve substances by their identifiers.

At least one of cas_ids, sub_ids, or external_ids must be provided.

Parameters:

Name	Type	Description	Default
`cas_ids`	`list[str] \| None`	CAS numbers to look up.	`None`
`sub_ids`	`list[str] \| None`	Substance IDs to look up.	`None`
`external_ids`	`list[str] \| None`	External IDs to look up.	`None`
`region`	`str`	Region for hazard data. Common values: `"global"`, `"EU"`, `"US"`, `"UK"`. Defaults to `"global"`.	`'global'`
`catch_errors`	`bool \| None`	Whether to suppress errors for unknown substances, by default None.	`None`
`language`	`str \| None`	BCP-47 language code for name translation (e.g. `"EN"`, `"DE"`, `"FR"`), by default None.	`None`
`classification_type`	`str \| None`	Filter by classification type. Accepted values: `"HARMONISED"`, `"NOTIFIED"`, `"SELF_CLASSIFIED"`; or their display labels `"Harmonised C&L"`, `"Notified C&L"`, `"Self Classified"`, by default None.	`None`

Returns:

Type	Description
`list[SubstanceV4Info]`	The matching substances.

Source code in src/albert/collections/substance_v4.py

@validate_call
def get_by_ids(
    self,
    *,
    cas_ids: list[str] | None = None,
    sub_ids: list[str] | None = None,
    external_ids: list[str] | None = None,
    region: str = "global",
    catch_errors: bool | None = None,
    language: str | None = None,
    classification_type: str | None = None,
) -> list[SubstanceV4Info]:
    """Retrieve substances by their identifiers.

    At least one of ``cas_ids``, ``sub_ids``, or ``external_ids`` must be provided.

    Parameters
    ----------
    cas_ids : list[str] | None
        CAS numbers to look up.
    sub_ids : list[str] | None
        Substance IDs to look up.
    external_ids : list[str] | None
        External IDs to look up.
    region : str, optional
        Region for hazard data. Common values: ``"global"``, ``"EU"``, ``"US"``,
        ``"UK"``. Defaults to ``"global"``.
    catch_errors : bool | None, optional
        Whether to suppress errors for unknown substances, by default None.
    language : str | None, optional
        BCP-47 language code for name translation (e.g. ``"EN"``, ``"DE"``,
        ``"FR"``), by default None.
    classification_type : str | None, optional
        Filter by classification type. Accepted values: ``"HARMONISED"``,
        ``"NOTIFIED"``, ``"SELF_CLASSIFIED"``; or their display labels
        ``"Harmonised C&L"``, ``"Notified C&L"``, ``"Self Classified"``,
        by default None.

    Returns
    -------
    list[SubstanceV4Info]
        The matching substances.
    """
    if not any([cas_ids, sub_ids, external_ids]):
        raise ValueError("At least one of cas_ids, sub_ids, or external_ids must be provided.")

    params: dict = {"region": region}
    if cas_ids:
        params["casIDs"] = ",".join(cas_ids)
    if sub_ids:
        params["subIDs"] = ",".join(sub_ids)
    if external_ids:
        params["externalIDs"] = ",".join(external_ids)
    if catch_errors is not None:
        params["catchErrors"] = json.dumps(catch_errors)
    if language:
        params["language"] = language
    if classification_type:
        params["classificationType"] = classification_type

    response = self.session.get(self.base_path, params=params)
    return SubstanceV4Response.model_validate(response.json()).substances

get_by_id

get_by_id(
    *,
    cas_id: str | None = None,
    sub_id: str | None = None,
    external_id: str | None = None,
    region: str = "global",
    catch_errors: bool | None = None,
    language: str | None = None,
    classification_type: str | None = None,
) -> SubstanceV4Info

Retrieve a single substance by its identifier.

Provide exactly one of cas_id, sub_id, or external_id.

Parameters:

Name	Type	Description	Default
`cas_id`	`str \| None`	The CAS number.	`None`
`sub_id`	`str \| None`	The substance ID.	`None`
`external_id`	`str \| None`	The external ID.	`None`
`region`	`str`	Region for hazard data. Common values: `"global"`, `"EU"`, `"US"`, `"UK"`. Defaults to `"global"`.	`'global'`
`catch_errors`	`bool \| None`	Whether to suppress errors for unknown substances, by default None.	`None`
`language`	`str \| None`	BCP-47 language code for name translation (e.g. `"EN"`, `"DE"`, `"FR"`), by default None.	`None`
`classification_type`	`str \| None`	Filter by classification type. Accepted values: `"HARMONISED"`, `"NOTIFIED"`, `"SELF_CLASSIFIED"`; or their display labels `"Harmonised C&L"`, `"Notified C&L"`, `"Self Classified"`, by default None.	`None`

Returns:

Type	Description
`SubstanceV4Info`	The matching substance.

Source code in src/albert/collections/substance_v4.py

@validate_call
def get_by_id(
    self,
    *,
    cas_id: str | None = None,
    sub_id: str | None = None,
    external_id: str | None = None,
    region: str = "global",
    catch_errors: bool | None = None,
    language: str | None = None,
    classification_type: str | None = None,
) -> SubstanceV4Info:
    """Retrieve a single substance by its identifier.

    Provide exactly one of ``cas_id``, ``sub_id``, or ``external_id``.

    Parameters
    ----------
    cas_id : str | None
        The CAS number.
    sub_id : str | None
        The substance ID.
    external_id : str | None
        The external ID.
    region : str, optional
        Region for hazard data. Common values: ``"global"``, ``"EU"``, ``"US"``,
        ``"UK"``. Defaults to ``"global"``.
    catch_errors : bool | None, optional
        Whether to suppress errors for unknown substances, by default None.
    language : str | None, optional
        BCP-47 language code for name translation (e.g. ``"EN"``, ``"DE"``,
        ``"FR"``), by default None.
    classification_type : str | None, optional
        Filter by classification type. Accepted values: ``"HARMONISED"``,
        ``"NOTIFIED"``, ``"SELF_CLASSIFIED"``; or their display labels
        ``"Harmonised C&L"``, ``"Notified C&L"``, ``"Self Classified"``,
        by default None.

    Returns
    -------
    SubstanceV4Info
        The matching substance.
    """
    provided = sum([cas_id is not None, sub_id is not None, external_id is not None])
    if provided != 1:
        raise ValueError("Exactly one of cas_id, sub_id, or external_id must be provided.")

    results = self.get_by_ids(
        cas_ids=[cas_id] if cas_id else None,
        sub_ids=[sub_id] if sub_id else None,
        external_ids=[external_id] if external_id else None,
        region=region,
        catch_errors=catch_errors,
        language=language,
        classification_type=classification_type,
    )
    if not results:
        raise ValueError("No substance found for the provided identifier.")
    return results[0]

search

search(
    *,
    search_key: str | None = None,
    cas: str | None = None,
    ec: str | None = None,
    name: str | None = None,
    region: str = "global",
    classification_type: str | None = None,
    start_key: int = 0,
    max_items: int = 100,
) -> Iterator[SubstanceV4SearchItem]

Search for substances by keyword or advanced filters.

At least one of search_key, cas, ec, or name must be provided. If both search_key and advanced filters are provided, the advanced filters take precedence.

Parameters:

Name	Type	Description	Default
`search_key`	`str \| None`	Free-text search term.	`None`
`cas`	`str \| None`	Filter by CAS identifier.	`None`
`ec`	`str \| None`	Filter by EC identifier.	`None`
`name`	`str \| None`	Filter by substance name.	`None`
`region`	`str`	Region for hazard data. Common values: `"global"`, `"EU"`, `"US"`, `"UK"`. Defaults to `"global"`.	`'global'`
`classification_type`	`str \| None`	Filter by classification type. Accepted values: `"HARMONISED"`, `"NOTIFIED"`, `"SELF_CLASSIFIED"`; or their display labels `"Harmonised C&L"`, `"Notified C&L"`, `"Self Classified"`, by default None.	`None`
`start_key`	`int`	Offset to resume pagination from, by default 0.	`0`
`max_items`	`int`	Maximum number of items to yield, by default 100.	`100`

Yields:

Type	Description
`SubstanceV4SearchItem`	Matching substance search records.

Source code in src/albert/collections/substance_v4.py

@validate_call
def search(
    self,
    *,
    search_key: str | None = None,
    cas: str | None = None,
    ec: str | None = None,
    name: str | None = None,
    region: str = "global",
    classification_type: str | None = None,
    start_key: int = 0,
    max_items: int = 100,
) -> Iterator[SubstanceV4SearchItem]:
    """Search for substances by keyword or advanced filters.

    At least one of ``search_key``, ``cas``, ``ec``, or ``name`` must be provided.
    If both ``search_key`` and advanced filters are provided, the advanced filters
    take precedence.

    Parameters
    ----------
    search_key : str | None
        Free-text search term.
    cas : str | None
        Filter by CAS identifier.
    ec : str | None
        Filter by EC identifier.
    name : str | None
        Filter by substance name.
    region : str, optional
        Region for hazard data. Common values: ``"global"``, ``"EU"``, ``"US"``,
        ``"UK"``. Defaults to ``"global"``.
    classification_type : str | None, optional
        Filter by classification type. Accepted values: ``"HARMONISED"``,
        ``"NOTIFIED"``, ``"SELF_CLASSIFIED"``; or their display labels
        ``"Harmonised C&L"``, ``"Notified C&L"``, ``"Self Classified"``,
        by default None.
    start_key : int, optional
        Offset to resume pagination from, by default 0.
    max_items : int, optional
        Maximum number of items to yield, by default 100.

    Yields
    ------
    SubstanceV4SearchItem
        Matching substance search records.
    """
    if not any([search_key, cas, ec, name]):
        raise ValueError("At least one of search_key, cas, ec, or name must be provided.")

    params: dict = {"region": region, "startKey": start_key}
    if search_key:
        params["searchKey"] = search_key
    if cas:
        params["cas"] = cas
    if ec:
        params["ec"] = ec
    if name:
        params["name"] = name
    if classification_type:
        params["classificationType"] = classification_type

    yield from SubstanceV4SearchPaginator(
        path=f"{self.base_path}/search",
        session=self.session,
        params=params,
        max_items=max_items,
    )

create

create(
    *, substance: SubstanceV4Create
) -> SubstanceV4CreateResult

Create a new substance record.

Parameters:

Name	Type	Description	Default
`substance`	`SubstanceV4Create`	The substance data to create.	required

Returns:

Type	Description
`SubstanceV4CreateResult`	The result containing created, failed, and existing items.

Source code in src/albert/collections/substance_v4.py

@validate_call
def create(self, *, substance: SubstanceV4Create) -> SubstanceV4CreateResult:
    """Create a new substance record.

    Parameters
    ----------
    substance : SubstanceV4Create
        The substance data to create.

    Returns
    -------
    SubstanceV4CreateResult
        The result containing created, failed, and existing items.
    """
    payload = [substance.model_dump(by_alias=True, mode="json", exclude_none=True)]
    response = self.session.post(self.base_path, json=payload)
    return SubstanceV4CreateResult.model_validate(response.json())

update_metadata

update_metadata(
    *,
    id: str,
    notes: str | _UnsetType = _UNSET,
    description: str | _UnsetType = _UNSET,
    cas_smiles: str | _UnsetType = _UNSET,
    inchi_key: str | _UnsetType = _UNSET,
    iupac_name: str | _UnsetType = _UNSET,
    cactus_status: str | _UnsetType = _UNSET,
    metadata: dict[str, MetadataItem | None]
    | _UnsetType = _UNSET,
) -> None

Update metadata fields on a substance.

Only the keyword arguments you pass are updated — all others are left unchanged. The current state is fetched automatically.

Parameters:

Name	Type	Description	Default
`id`	`str`	The substance ID to update.	required
`notes`	`str`	Free-text notes.	`_UNSET`
`description`	`str`	Substance description.	`_UNSET`
`cas_smiles`	`str`	SMILES notation for the structure.	`_UNSET`
`inchi_key`	`str`	InChIKey identifier.	`_UNSET`
`iupac_name`	`str`	IUPAC name.	`_UNSET`
`cactus_status`	`str`	CACTUS resolver status.	`_UNSET`
`metadata`	`dict[str, MetadataItem \| None]`	Custom tenant metadata fields to update. Only the keys listed in this dict are touched; all other custom fields on the substance are left unchanged. Value types by field kind: String / number fields — pass the value directly (`"5 mg/mL"`, `42`). Single-select fields — pass an `EntityLink`; use `client.lists.get_matching_item()` to look up the ID. Multi-select fields — pass a list of `EntityLink` objects; only the changed items are sent. Delete a field — pass `None` as the value (works for all field types).	`_UNSET`

Notes

The following fields can be updated: notes, description, cas_smiles, inchi_key, iupac_name, cactus_status, and any custom metadata fields configured for the tenant.

Examples:

Update a scalar field and a custom string field:

client.substances_v4.update_metadata(
    id="SUB123",
    notes="new notes",
    metadata={"solubility": "5 mg/mL"},
)

Set a single-select custom field:

client.substances_v4.update_metadata(
    id="SUB123",
    metadata={"cmr_eu": EntityLink(id="LST1253")},
)

Update a multi-select custom field (becomes exactly this set):

client.substances_v4.update_metadata(
    id="SUB123",
    metadata={"amide_category": [EntityLink(id="LST1256"), EntityLink(id="LST1257")]},
)

Delete a custom field:

client.substances_v4.update_metadata(id="SUB123", metadata={"old_key": None})

Source code in src/albert/collections/substance_v4.py

@validate_call
def update_metadata(
    self,
    *,
    id: str,
    notes: str | _UnsetType = _UNSET,
    description: str | _UnsetType = _UNSET,
    cas_smiles: str | _UnsetType = _UNSET,
    inchi_key: str | _UnsetType = _UNSET,
    iupac_name: str | _UnsetType = _UNSET,
    cactus_status: str | _UnsetType = _UNSET,
    metadata: dict[str, MetadataItem | None] | _UnsetType = _UNSET,
) -> None:
    """Update metadata fields on a substance.

    Only the keyword arguments you pass are updated — all others are left unchanged.
    The current state is fetched automatically.

    Parameters
    ----------
    id : str
        The substance ID to update.
    notes : str, optional
        Free-text notes.
    description : str, optional
        Substance description.
    cas_smiles : str, optional
        SMILES notation for the structure.
    inchi_key : str, optional
        InChIKey identifier.
    iupac_name : str, optional
        IUPAC name.
    cactus_status : str, optional
        CACTUS resolver status.
    metadata : dict[str, MetadataItem | None], optional
        Custom tenant metadata fields to update. Only the keys listed in this dict
        are touched; all other custom fields on the substance are left unchanged.

        Value types by field kind:

        - **String / number fields** — pass the value directly (``"5 mg/mL"``, ``42``).
        - **Single-select fields** — pass an ``EntityLink``; use
          ``client.lists.get_matching_item()`` to look up the ID.
        - **Multi-select fields** — pass a list of ``EntityLink`` objects; only the
          changed items are sent.
        - **Delete a field** — pass ``None`` as the value (works for all field types).

    Notes
    -----
    The following fields can be updated: ``notes``, ``description``, ``cas_smiles``,
    ``inchi_key``, ``iupac_name``, ``cactus_status``, and any custom metadata fields
    configured for the tenant.

    Examples
    --------
    Update a scalar field and a custom string field:

        client.substances_v4.update_metadata(
            id="SUB123",
            notes="new notes",
            metadata={"solubility": "5 mg/mL"},
        )

    Set a single-select custom field:

        client.substances_v4.update_metadata(
            id="SUB123",
            metadata={"cmr_eu": EntityLink(id="LST1253")},
        )

    Update a multi-select custom field (becomes exactly this set):

        client.substances_v4.update_metadata(
            id="SUB123",
            metadata={"amide_category": [EntityLink(id="LST1256"), EntityLink(id="LST1257")]},
        )

    Delete a custom field:

        client.substances_v4.update_metadata(id="SUB123", metadata={"old_key": None})
    """
    scalar_kwargs = {
        "notes": notes,
        "description": description,
        "cas_smiles": cas_smiles,
        "inchi_key": inchi_key,
        "iupac_name": iupac_name,
        "cactus_status": cactus_status,
    }
    if all(v is _UNSET for v in scalar_kwargs.values()) and metadata is _UNSET:
        return

    sub_id = id if id.startswith("SUB") else f"SUB{id}"
    try:
        substance = self.get_by_id(sub_id=sub_id, catch_errors=True)
    except AlbertHTTPError:
        # Substance exists but can't be fetched (e.g. no hazard data yet).
        # Treat the current state as empty so all operations become adds.
        substance = None
    operations = []

    for attr, wire_name in [
        ("notes", "notes"),
        ("description", "description"),
        ("cas_smiles", "casSmiles"),
        ("inchi_key", "inchiKey"),
        ("iupac_name", "iUpacName"),
        ("cactus_status", "cactusStatus"),
    ]:
        new = scalar_kwargs[attr]
        if new is _UNSET:
            continue
        old = getattr(substance, attr, None) if substance is not None else None
        if old == new:
            continue
        if old is None:
            operations.append({"operation": "add", "attribute": wire_name, "newValue": new})
        else:
            operations.append(
                {
                    "operation": "update",
                    "attribute": wire_name,
                    "oldValue": old,
                    "newValue": new,
                }
            )

    if metadata is not _UNSET and metadata:
        # Coerce raw JSON dicts to EntityLink objects so _generate_metadata_diff
        # can call .id on single/multi-select values.
        raw_meta = substance.metadata if substance is not None else {}
        coerced = SubstanceV4Metadata.model_validate({"metadata": raw_meta or {}})
        current_meta = coerced.metadata or {}
        relevant_existing = {k: v for k, v in current_meta.items() if k in metadata}
        non_null_updates = {k: v for k, v in metadata.items() if v is not None}
        metadata_patches = self._generate_metadata_diff(
            existing_metadata=relevant_existing,
            updated_metadata=non_null_updates,
        )
        operations.extend(
            p.model_dump(by_alias=True, mode="json", exclude_none=True)
            for p in metadata_patches
        )

    if not operations:
        return

    self.session.patch(f"{self.base_path}/metadata/{sub_id}", json={"data": operations})