The `zenodo` submodule

This module provides functions to:

handle the publication of datasets after they have been validated using safedata_validate, including the generation of HTML descriptions of datasets.
maintain local copies of datasets in the folder structure expected by the safedata R package.
compile a RIS format bibliographic file for published datasets.

`create_deposit(concept_id=None, resources=None)`

Create a new deposit.

Creates a new deposit draft, possibly as a new version of an existing published record.

Parameters:

Name	Type	Description	Default
`concept_id`	`int \| None`	An optional concept id of a published record to create a new version of an existing dataset.	`None`
`resources`	`Resources \| None`	The safedata_validator resource configuration to be used. If none is provided, the standard locations are checked.	`None`

Returns:

Type	Description
`ZenodoFunctionResponseType`	See here.

Source code in safedata_validator/zenodo.py

def create_deposit(
    concept_id: int | None = None, resources: Resources | None = None
) -> ZenodoFunctionResponseType:
    """Create a new deposit.

    Creates a new deposit draft, possibly as a new version of an existing published
    record.

    Args:
        concept_id: An optional concept id of a published record to create a new version
            of an existing dataset.
        resources: The safedata_validator resource configuration to be used. If
            none is provided, the standard locations are checked.

    Returns:
        See [here][safedata_validator.zenodo.ZenodoFunctionResponseType].
    """

    # Get resource configuration
    zres = _resources_to_zenodo_api(resources)
    zenodo_api = zres["zapi"]
    params = zres["ztoken"]

    # get the correct draft api
    if concept_id is None:
        api = f"{zenodo_api}/deposit/depositions"
    else:
        api = f"{zenodo_api}/deposit/depositions/{concept_id}/actions/newversion"

    # Create the draft
    new_draft = requests.post(api, params=params, json={})

    # trap errors in creating the new version (not 201: created)
    if new_draft.status_code != 201:
        return {}, _zenodo_error_message(new_draft)

    if concept_id is None:
        return new_draft.json(), None

    # For new versions, the response is an update to the existing copy,
    # so need to separately retrieve the new draft
    api = new_draft.json()["links"]["latest_draft"]
    dep = requests.get(api, params=params, json={})

    # trap errors in creating the resource - successful creation of new version
    #  drafts returns 200
    if dep.status_code != 200:
        return {}, _zenodo_error_message(dep)
    else:
        return dep.json(), None

`get_deposit(deposit_id, resources=None)`

Download the metadata of a Zenodo deposit.

Parameters:

Name	Type	Description	Default
`deposit_id`	`int`	The Zenodo record id of an existing dataset.	required
`resources`	`Resources \| None`	The safedata_validator resource configuration to be used. If none is provided, the standard locations are checked.	`None`

Returns:

Type	Description
`ZenodoFunctionResponseType`	See here.

Source code in safedata_validator/zenodo.py

def get_deposit(
    deposit_id: int, resources: Resources | None = None
) -> ZenodoFunctionResponseType:
    """Download the metadata of a Zenodo deposit.

    Args:
        deposit_id: The Zenodo record id of an existing dataset.
        resources: The safedata_validator resource configuration to be used. If
            none is provided, the standard locations are checked.

    Returns:
        See [here][safedata_validator.zenodo.ZenodoFunctionResponseType].
    """

    zres = _resources_to_zenodo_api(resources)
    zenodo_api = zres["zapi"]
    params = zres["ztoken"]

    # request the deposit
    dep = requests.get(
        f"{zenodo_api}/deposit/depositions/{deposit_id}", params=params, json={}
    )

    # check for success and return the information.
    if dep.status_code == 200:
        return dep.json(), None
    else:
        return {}, _zenodo_error_message(dep)

`upload_metadata(metadata, zenodo, resources=None)`

Upload dataset metadata.

Takes a dictionary of dataset metadata, converts it to a JSON payload of Zenodo metadata and uploads it to a deposit.

Parameters:

Name	Type	Description	Default
`metadata`	`dict`	The metadata dictionary for a dataset	required
`zenodo`	`dict`	The zenodo metadata dictionary for a deposit	required
`resources`	`Resources \| None`	The safedata_validator resource configuration to be used. If none is provided, the standard locations are checked.	`None`

Returns:

Type	Description
`ZenodoFunctionResponseType`	See here.

Source code in safedata_validator/zenodo.py

def upload_metadata(
    metadata: dict, zenodo: dict, resources: Resources | None = None
) -> ZenodoFunctionResponseType:
    """Upload dataset metadata.

    Takes a dictionary of dataset metadata, converts it to a JSON payload of Zenodo
    metadata and uploads it to a deposit.

    Args:
        metadata: The metadata dictionary for a dataset
        zenodo: The zenodo metadata dictionary for a deposit
        resources: The safedata_validator resource configuration to be used. If
            none is provided, the standard locations are checked.

    Returns:
        See [here][safedata_validator.zenodo.ZenodoFunctionResponseType].
    """

    # Get resource configuration
    zres = _resources_to_zenodo_api(resources)

    # basic contents
    zen_md = {
        "metadata": {
            "upload_type": "dataset",
            # "publication_date": datetime.date.today().isoformat(),
            "title": metadata["title"],
            "keywords": metadata["keywords"],
            "license": "cc-by",
            "communities": [{"identifier": zres["zcomm"]}],
        }
    }

    # Add a contact name to contributors if provided in config
    if zres["zcname"] is not None:
        zen_md["metadata"]["contributors"] = [
            {
                "name": zres["zcname"],
                "type": "ContactPerson",
                "affiliation": zres["zcaffil"],
                "orcid": zres["zcorc"],
            }
        ]

    # set up the access rights
    dataset_access = metadata["access"].lower()
    if dataset_access == "embargo":
        zen_md["metadata"]["access_right"] = "embargoed"
        zen_md["metadata"]["embargo_date"] = metadata["embargo_date"]
    elif dataset_access == "open":
        zen_md["metadata"]["access_right"] = "open"
    elif dataset_access == "restricted":
        zen_md["metadata"]["access_right"] = "restricted"
        zen_md["metadata"]["access_conditions"] = metadata["access_conditions"]
    else:
        raise ValueError("Unknown access status")

    # set up the dataset creators - the format has already been checked and names
    # should be present and correct. Everything else is optional, so strip None
    # values and pass the rest to Zenodo
    zen_md["metadata"]["creators"] = [
        {ky: auth[ky] for ky in auth if auth[ky] is not None and ky != "email"}
        for auth in metadata["authors"]
    ]

    zen_md["metadata"]["description"] = dataset_description(
        metadata, zenodo, render=True, resources=resources
    )

    # attach the metadata to the deposit resource
    mtd = requests.put(zenodo["links"]["self"], params=zres["ztoken"], json=zen_md)

    # trap errors in uploading metadata and tidy up
    if mtd.status_code != 200:
        return {}, mtd.reason
    else:
        return {}, None

`update_published_metadata(zenodo, resources=None)`

Update published deposit metadata.

Updates the metadata on a published deposit, for example to modify the access status of deposit. In general, metadata should be updated by releasing a new version of the dataset, and this function should only be used where it is essential that the published version by altered.

Parameters:

Name	Type	Description	Default
`zenodo`	`dict`	A Zenodo metadata dictionary, with an updated metadata section	required
`resources`	`Resources \| None`	The safedata_validator resource configuration to be used. If none is provided, the standard locations are checked.	`None`

Returns:

Type	Description
`ZenodoFunctionResponseType`	See here.

Source code in safedata_validator/zenodo.py

def update_published_metadata(
    zenodo: dict,
    resources: Resources | None = None,
) -> ZenodoFunctionResponseType:
    """Update published deposit metadata.

    Updates the metadata on a published deposit, for example to modify the access status
    of deposit. In general, metadata should be updated by releasing a new version of the
    dataset, and this function should only be used where it is essential that the
    published version by altered.

    Args:
        zenodo: A Zenodo metadata dictionary, with an updated metadata section
        resources: The safedata_validator resource configuration to be used. If
            none is provided, the standard locations are checked.

    Returns:
        See [here][safedata_validator.zenodo.ZenodoFunctionResponseType].
    """

    # Get resource configuration
    zres = _resources_to_zenodo_api(resources)

    links = zenodo["links"]

    # Unlock the published deposit for editing
    edt = requests.post(links["edit"], params=zres["ztoken"])

    if edt.status_code != 201:
        return {}, edt.json()

    # # Amend the metadata
    # for key, val in new_values.items():
    #     if val is not None:
    #         metadata[key] = val
    #     elif key in metadata:
    #         metadata.pop(key)

    # If any API calls from now fail, we need to tidy up the edit
    # status of the record, or it will block subsequent attempts

    upd = requests.put(
        links["self"],
        params=zres["ztoken"],
        headers={"Content-Type": "application/json"},
        data=simplejson.dumps({"metadata": zenodo["metadata"]}),
    )

    success_so_far = 0 if upd.status_code != 200 else 1
    ret = upd.json()

    # Republish to save the changes
    if success_so_far:
        pub = requests.post(links["publish"], params=zres["ztoken"])
        success_so_far = 0 if pub.status_code != 202 else 1
        ret = pub.json()

    # If all steps have been successful, return a 0 code, otherwise
    # try to discard the edits and return the most recent failure
    # notice

    if success_so_far:
        return ret, None
    else:
        dsc = requests.post(links["discard"], params=zres["ztoken"])
        success_so_far = 0 if dsc.status_code != 201 else 1
        if not success_so_far:
            ret = dsc.json()

        return {}, ret

`upload_file(metadata, filepath, zenodo_filename=None, progress_bar=True, resources=None)`

Upload a file to Zenodo.

Uploads the contents of a specified file to an unpublished Zenodo deposit, optionally using an alternative filename. If the file already exists in the deposit, it will be replaced.

Parameters:

Name	Type	Description	Default
`metadata`	`dict`	The Zenodo metadata dictionary for a deposit	required
`filepath`	`str`	The path to the file to be uploaded	required
`zenodo_filename`	`str \| None`	An optional alternative file name to be used on Zenodo	`None`
`progress_bar`	`bool`	Should the upload progress be displayed	`True`
`resources`	`Resources \| None`	The safedata_validator resource configuration to be used. If none is provided, the standard locations are checked.	`None`

Returns:

Type	Description
`ZenodoFunctionResponseType`	See here.

Source code in safedata_validator/zenodo.py

def upload_file(
    metadata: dict,
    filepath: str,
    zenodo_filename: str | None = None,
    progress_bar: bool = True,
    resources: Resources | None = None,
) -> ZenodoFunctionResponseType:
    """Upload a file to Zenodo.

    Uploads the contents of a specified file to an unpublished Zenodo deposit,
    optionally using an alternative filename. If the file already exists in the deposit,
    it will be replaced.

    Args:
        metadata: The Zenodo metadata dictionary for a deposit
        filepath: The path to the file to be uploaded
        zenodo_filename: An optional alternative file name to be used on Zenodo
        progress_bar: Should the upload progress be displayed
        resources: The safedata_validator resource configuration to be used. If
            none is provided, the standard locations are checked.

    Returns:
        See [here][safedata_validator.zenodo.ZenodoFunctionResponseType].
    """

    # Get resource configuration
    zres = _resources_to_zenodo_api(resources)
    params = zres["ztoken"]

    # Check the file and get the filename if an alternative is not provided
    filepath = os.path.abspath(filepath)
    if not (os.path.exists(filepath) and os.path.isfile(filepath)):
        raise OSError(f"The file path is either a directory or not found: {filepath} ")

    if zenodo_filename is None:
        file_name = os.path.basename(filepath)
    else:
        file_name = zenodo_filename

    # upload the file
    # - https://gist.github.com/tyhoff/b757e6af83c1fd2b7b83057adf02c139
    file_size = os.stat(filepath).st_size
    api = f"{metadata['links']['bucket']}/{file_name}"

    with open(filepath, "rb") as file_io:
        if progress_bar:
            with tqdm(
                total=file_size, unit="B", unit_scale=True, unit_divisor=1024
            ) as upload_monitor:
                # Upload the wrapped file
                wrapped_file = CallbackIOWrapper(upload_monitor.update, file_io, "read")
                fls = requests.put(api, data=wrapped_file, params=params)
        else:
            fls = requests.put(api, data=file_io, params=params)

    # trap errors in uploading file
    # - no success or mismatch in md5 checksums
    if fls.status_code != 201:
        return {}, _zenodo_error_message(fls)

    # TODO - could this be inside with above? - both are looping over the file contents
    # https://medium.com/codex/chunked-uploads-with-binary-files-in-python-f0c48e373a91
    local_hash = _compute_md5(filepath)

    if fls.json()["checksum"] != f"md5:{local_hash}":
        return {}, "Mismatch in local and uploaded MD5 hashes"
    else:
        return fls.json(), None

`discard_deposit(metadata, resources=None)`

Discard a deposit.

Deposits can be discarded - the associated files and metadata will be deleted and the Zenodo ID no longer exists. Once deposits are published to records, they cannot be deleted via the API - contact the Zenodo team for help.

Parameters:

Name	Type	Description	Default
`metadata`	`dict`	The Zenodo metadata dictionary for a deposit	required
`resources`	`Resources \| None`	The safedata_validator resource configuration to be used. If none is provided, the standard locations are checked.	`None`

Returns:

Type	Description
`ZenodoFunctionResponseType`	See here.

Source code in safedata_validator/zenodo.py

def discard_deposit(
    metadata: dict, resources: Resources | None = None
) -> ZenodoFunctionResponseType:
    """Discard a deposit.

    Deposits can be discarded - the associated files and metadata will be deleted and
    the Zenodo ID no longer exists. Once deposits are published to records, they cannot
    be deleted via the API - contact the Zenodo team for help.

    Args:
        metadata: The Zenodo metadata dictionary for a deposit
        resources: The safedata_validator resource configuration to be used. If
            none is provided, the standard locations are checked.

    Returns:
        See [here][safedata_validator.zenodo.ZenodoFunctionResponseType].
    """

    # Get resource configuration
    zres = _resources_to_zenodo_api(resources)
    params = zres["ztoken"]

    delete = requests.delete(metadata["links"]["self"], params=params)

    if delete.status_code == 204:
        return {"result": "success"}, None
    else:
        return {}, _zenodo_error_message(delete)

`publish_deposit(zenodo, resources=None)`

Publish a created deposit.

Parameters:

Name	Type	Description	Default
`zenodo`	`dict`	The dataset metadata dictionary for a deposit	required
`resources`	`Resources \| None`	The safedata_validator resource configuration to be used. If none is provided, the standard locations are checked.	`None`

Returns:

Type	Description
`ZenodoFunctionResponseType`	See here.

Source code in safedata_validator/zenodo.py

def publish_deposit(
    zenodo: dict, resources: Resources | None = None
) -> ZenodoFunctionResponseType:
    """Publish a created deposit.

    Args:
        zenodo: The dataset metadata dictionary for a deposit
        resources: The safedata_validator resource configuration to be used. If
            none is provided, the standard locations are checked.

    Returns:
        See [here][safedata_validator.zenodo.ZenodoFunctionResponseType].
    """

    # Get resource configuration
    zres = _resources_to_zenodo_api(resources)
    params = zres["ztoken"]

    # publish
    pub = requests.post(zenodo["links"]["publish"], params=params)

    # trap errors in publishing, otherwise return the publication metadata
    if pub.status_code != 202:
        return {}, pub.json()
    else:
        return pub.json(), None

`delete_file(metadata, filename, resources=None)`

Delete an uploaded file from an unpublished Zenodo deposit.

Parameters:

Name	Type	Description	Default
`metadata`	`dict`	The Zenodo metadata dictionary for a deposit	required
`filename`	`str`	The file to delete from the deposit	required
`resources`	`Resources \| None`	The safedata_validator resource configuration to be used. If none is provided, the standard locations are checked.	`None`

Returns:

Type	Description
`ZenodoFunctionResponseType`	See here.

Source code in safedata_validator/zenodo.py

def delete_file(
    metadata: dict, filename: str, resources: Resources | None = None
) -> ZenodoFunctionResponseType:
    """Delete an uploaded file from an unpublished Zenodo deposit.

    Args:
        metadata: The Zenodo metadata dictionary for a deposit
        filename: The file to delete from the deposit
        resources: The safedata_validator resource configuration to be used. If
            none is provided, the standard locations are checked.

    Returns:
        See [here][safedata_validator.zenodo.ZenodoFunctionResponseType].
    """

    # Get resource configuration
    zres = _resources_to_zenodo_api(resources)
    params = zres["ztoken"]

    # get an up to date list of existing files (metadata
    # might be outdated)
    files = requests.get(metadata["links"]["files"], params=params)

    # check the result of the files request
    if files.status_code != 200:
        # failed to get the files
        return {}, _zenodo_error_message(files)

    # get a dictionary of file links
    files_dict = {f["filename"]: f["links"]["self"] for f in files.json()}

    if filename not in files_dict:
        return {}, f"{filename} is not a file in the deposit"

    # get the delete link to the file and call
    delete_api = files_dict[filename]
    file_del = requests.delete(delete_api, params=params)

    if file_del.status_code != 204:
        return {}, _zenodo_error_message(file_del)
    else:
        return {"result": "success"}, None

`dataset_description(dataset_metadata, zenodo_metadata, render=True, extra=None, resources=None)`

Create an HTML dataset description.

This function turns a dataset metadata JSON into html for inclusion in published datasets. This content is used to populate the dataset description section in the Zenodo metadata. Zenodo has a limited set of permitted HTML tags, so this is quite simple HTML.

The available tags are: a, p, br, blockquote, strong, b, u, i, em, ul, ol, li, sub, sup, div, strike. Note that <a> is currently only available on Zenodo when descriptions are uploaded programmatically as a bug in their web interface strips links.

The description can be modified for specific uses by including HTML via the extra argument. This content is inserted below the dataset description.

Parameters:

Name	Type	Description	Default
`dataset_metadata`	`dict`	The dataset metadata	required
`zenodo_metadata`	`dict`	The Zenodo deposit metadata	required
`render`	`bool`	Should the html be returned as text or as the underlying dominate.tags.div object.	`True`
`extra`	`str \| None`	Additional HTML content to include in the description.	`None`
`resources`	`Resources \| None`	The safedata_validator resource configuration to be used. If none is provided, the standard locations are checked.	`None`

Returns:

Type	Description
`div \| str`	Either a string of rendered HTML or a dominate.tags.div object.

Source code in safedata_validator/zenodo.py

def dataset_description(
    dataset_metadata: dict,
    zenodo_metadata: dict,
    render: bool = True,
    extra: str | None = None,
    resources: Resources | None = None,
) -> tags.div | str:
    """Create an HTML dataset description.

    This function turns a dataset metadata JSON into html for inclusion in
    published datasets. This content is used to populate the dataset description
    section in the Zenodo metadata. Zenodo has a limited set of permitted HTML
    tags, so this is quite simple HTML.

    The available tags are: a, p, br, blockquote, strong, b, u, i, em, ul, ol,
    li, sub, sup, div, strike. Note that `<a>` is currently only available on
    Zenodo when descriptions are uploaded programmatically as a bug in their
    web interface strips links.

    The description can be modified for specific uses by including HTML via the
    extra argument. This content is inserted below the dataset description.

    Args:
        dataset_metadata: The dataset metadata
        zenodo_metadata: The Zenodo deposit metadata
        render: Should the html be returned as text or as the underlying
            dominate.tags.div object.
        extra: Additional HTML content to include in the description.
        resources: The safedata_validator resource configuration to be used. If
            none is provided, the standard locations are checked.

    Returns:
        Either a string of rendered HTML or a dominate.tags.div object.
    """

    # zres = _resources_to_zenodo_api(resources)
    # metadata_api = zres["mdapi"]

    # PROJECT Title and authors are added by Zenodo from zenodo metadata
    # TODO - option to include here?

    desc = tags.div()

    # Dataset summary
    desc += tags.b("Description: ")
    desc += tags.p(dataset_metadata["description"].replace("\n", "</br>"))

    # Extra
    if extra is not None:
        desc += raw(extra)

    # proj_url = URL('projects', 'project_view', args=[metadata['project_id']],
    #               scheme=True, host=True)
    # desc += P(B('Project: '), 'This dataset was collected as part of the following '
    #                          'SAFE research project: ', A(B(title), _href=proj_url))
    ##

    # Funding information
    if dataset_metadata["funders"]:
        funder_info = []

        for fnd in dataset_metadata["funders"]:
            funder_details = [fnd["body"], "(", fnd["type"]]

            if fnd["ref"]:
                funder_details.append(str(fnd["ref"]))
            if fnd["url"]:
                funder_details.append(tags.a(fnd["url"], _href=fnd["url"]))

            funder_details.append(")")
            funder_info.append(tags.li(funder_details))

        desc += [
            tags.p(
                tags.b("Funding: "),
                "These data were collected as part of research funded by: ",
                tags.ul(funder_info),
            ),
            tags.p(
                "This dataset is released under the CC-BY 4.0 licence, requiring that "
                "you cite the dataset in any outputs, but has the additional condition "
                "that you acknowledge the contribution of these funders in any outputs."
            ),
        ]

    # Permits
    if dataset_metadata["permits"]:
        desc += tags.p(
            tags.b("Permits: "),
            "These data were collected under permit from the following authorities:",
            tags.ul(
                [
                    tags.li(
                        f"{pmt['authority']} ({pmt['type']} licence {pmt['number']})"
                    )
                    for pmt in dataset_metadata["permits"]
                ]
            ),
        )

    # Present a description of the file or files including 'external' files
    # (data files loaded directly to Zenodo).
    ds_files = [dataset_metadata["filename"]]
    n_ds_files = 1
    ex_files = []

    if dataset_metadata["external_files"]:
        ex_files = dataset_metadata["external_files"]
        ds_files += [f["file"] for f in ex_files]
        n_ds_files += len(ex_files)

    desc += tags.p(
        tags.b("Files: "),
        f"This dataset consists of {n_ds_files} files: ",
        ", ".join(ds_files),
    )

    # Group the sheets by their 'external' file - which is None for sheets
    # in the submitted workbook - and collect them into a dictionary by source
    # file. get() is used here for older data where external was not present.

    tables_by_source = dataset_metadata["dataworksheets"]

    # Now group into a dictionary keyed by external source file - cannot sort
    # None (no comparison operators) so use a substitute
    tables_by_source.sort(key=lambda sh: sh.get("external") or False)
    tables_by_source = groupby(
        tables_by_source, key=lambda sh: sh.get("external") or False
    )
    tables_by_source = {g: list(v) for g, v in tables_by_source}

    # We've now got a set of files (worksheet + externals) and a dictionary of table
    # descriptions that might have an entry for each file.

    # Report the worksheet first
    desc += tags.p(tags.b(dataset_metadata["filename"]))

    # Report internal tables
    if False in tables_by_source:
        int_tabs = tables_by_source[False]
        desc += tags.p(
            f"This file contains dataset metadata and {len(int_tabs)} data tables:"
        )
        desc += tags.ol([tags.li(table_description(tab)) for tab in int_tabs])
    else:
        # No internal tables at all.
        desc += tags.p("This file only contains metadata for the files below")

    # Report on the other files
    for exf in ex_files:
        desc += tags.p(
            tags.b(exf["file"]), tags.p(f"Description: {exf['description']}")
        )

        if exf["file"] in tables_by_source:
            # Report table description
            ext_tabs = tables_by_source[exf["file"]]
            desc += tags.p(f"This file contains {len(ext_tabs)} data tables:")
            desc += tags.ol([tags.li(table_description(tab)) for tab in ext_tabs])

    # Add extents if populated
    if dataset_metadata["temporal_extent"] is not None:
        desc += tags.p(
            tags.b("Date range: "),
            "{0[0]} to {0[1]}".format(
                [x[:10] for x in dataset_metadata["temporal_extent"]]
            ),
        )
    if dataset_metadata["latitudinal_extent"] is not None:
        desc += tags.p(
            tags.b("Latitudinal extent: "),
            "{0[0]:.4f} to {0[1]:.4f}".format(dataset_metadata["latitudinal_extent"]),
        )
    if dataset_metadata["longitudinal_extent"] is not None:
        desc += tags.p(
            tags.b("Longitudinal extent: "),
            "{0[0]:.4f} to {0[1]:.4f}".format(dataset_metadata["longitudinal_extent"]),
        )

    # Find taxa data from each database (if they exist)
    gbif_taxon_index = dataset_metadata.get("gbif_taxa")
    ncbi_taxon_index = dataset_metadata.get("ncbi_taxa")

    # When NCBI is absent use the old format for backwards compatibility
    if gbif_taxon_index or ncbi_taxon_index:
        desc += tags.p(
            tags.b("Taxonomic coverage: "),
            tags.br(),
            "This dataset contains data associated with taxa and these have been "
            "validated against appropriate taxonomic authority databases.",
        )

    if gbif_taxon_index:
        desc += tags.p(
            tags.u("GBIF taxa details: "),
            tags.br(),
            tags.br(),
            "The following taxa were validated against the GBIF backbone dataset."
            "If a dataset uses a synonym, the accepted usage is shown followed by the "
            "dataset usage in brackets. Taxa that cannot be validated, including new "
            "species and other unknown taxa, morphospecies, functional groups and "
            "taxonomic levels not used in the GBIF backbone are shown in square "
            "brackets.",
            taxon_index_to_text(gbif_taxon_index, True, auth="GBIF"),
        )

    if ncbi_taxon_index:
        desc += tags.p(
            tags.u("NCBI taxa details: "),
            tags.br(),
            tags.br(),
            "The following taxa were validated against the NCBI taxonomy dataset."
            " If a dataset uses a synonym, the accepted usage is shown followed by the "
            "dataset usage in brackets. Taxa that cannot be validated, e.g. new or "
            "unknown species are shown in square brackets. Non-backbone taxonomic "
            "ranks (e.g. strains or subphyla) can be validated using the NCBI "
            "database. However, they will only be shown if the user explicitly "
            "provided a non-backbone taxon. When they are shown they will be "
            "accompanied by an message stating their rank.",
            taxon_index_to_text(ncbi_taxon_index, True, auth="NCBI"),
        )

    if render:
        return desc.render()
    else:
        return desc

`table_description(tab)`

Convert a dict containing table contents into an HTML table.

Function to return a description for an individual source file in a dataset. Typically datasets only have a single source file - the Excel workbook that also contains the metadata - but they may also report on external files loaded directly to Zenodo, and which uses the same mechanism.

Parameters:

Name	Type	Description	Default
`tab`	`dict`	A dict describing a data table	required

Returns:

Type	Description
`div`	A `dominate.tags.div` instance containing an HTML description of the table

Source code in safedata_validator/zenodo.py

def table_description(tab: dict) -> tags.div:
    """Convert a dict containing table contents into an HTML table.

    Function to return a description for an individual source file in a dataset.
    Typically datasets only have a single source file - the Excel workbook that
    also contains the metadata - but they may also report on external files loaded
    directly to Zenodo, and which uses the same mechanism.

    Args:
        tab: A dict describing a data table

    Returns:
        A `dominate.tags.div` instance containing an HTML description of the table
    """

    # table summary
    tab_desc = tags.div(
        tags.p(tags.b(tab["title"]), f" (described in worksheet {tab['name']})"),
        tags.p(f"Description: {tab['description']}"),
        tags.p(f"Number of fields: {tab['max_col'] - 1}"),
    )

    # The explicit n_data_row key isn't available for older records
    if "n_data_row" in tab:
        if tab["n_data_row"] == 0:
            tab_desc += tags.p(
                "Number of data rows: Unavailable (table metadata description only)."
            )
        else:
            tab_desc += tags.p(f"Number of data rows: {tab['n_data_row']}")
    else:
        tab_desc += tags.p(
            f"Number of data rows: {tab['max_row'] - len(tab['descriptors'])}"
        )

    # add fields
    tab_desc += tags.p("Fields: ")

    # fields summary
    flds = tags.ul()
    for each_fld in tab["fields"]:
        flds += tags.li(
            tags.b(each_fld["field_name"]),
            f": {each_fld['description']} (Field type: {each_fld['field_type']})",
        )

    tab_desc += flds

    return tab_desc

`generate_inspire_xml(dataset_metadata, zenodo_metadata, resources, lineage_statement=None)`

Convert dataset and zenodo metadata into GEMINI XML.

Produces an INSPIRE/GEMINI formatted XML record from dataset metadata, and Zenodo record metadata using a template XML file. The dataset URL defaults to the Zenodo record but can be replaced if a separate URL (such as a project specific website) is used. The Gemini XML standard requires a statement about the lineage of a dataset - this is automatically taken from the package configuration but can be overridden for individual datasets, for example to add dataset specific links, using the lineage_statement argument.

Parameters:

Name	Type	Description	Default
`dataset_metadata`	`dict`	A dictionary of the dataset metadata	required
`zenodo_metadata`	`dict`	A dictionary of the Zenodo record metadata	required
`resources`	`Resources`	The safedata_validator resource configuration to be used. If none is provided, the standard locations are checked.	required
`lineage_statement`	`str \| None`	An optional alternative lineage statement about the data.	`None`

Returns:

Type	Description
`str`	A string containing GEMINI compliant XML.

Source code in safedata_validator/zenodo.py

def generate_inspire_xml(
    dataset_metadata: dict,
    zenodo_metadata: dict,
    resources: Resources,
    lineage_statement: str | None = None,
) -> str:
    """Convert dataset and zenodo metadata into GEMINI XML.

    Produces an INSPIRE/GEMINI formatted XML record from dataset metadata,
    and Zenodo record metadata using a template XML file. The dataset URL
    defaults to the Zenodo record but can be replaced if a separate URL (such as
    a project specific website) is used. The Gemini XML standard requires a
    statement about the lineage of a dataset - this is automatically taken from the
    package configuration but can be overridden for individual datasets, for example to
    add dataset specific links, using the `lineage_statement` argument.

    Args:
        dataset_metadata: A dictionary of the dataset metadata
        zenodo_metadata: A dictionary of the Zenodo record metadata
        resources: The safedata_validator resource configuration to be used. If
            none is provided, the standard locations are checked.
        lineage_statement: An optional alternative lineage statement about the data.

    Returns:
        A string containing GEMINI compliant XML.
    """

    template_path = il_resources.files("safedata_validator.templates").joinpath(
        "gemini_xml_template.xml"
    )

    # Get the Jinja environment and load the template
    # - mypy: importlib returns a Traversable, which is a protocol that Path complies
    #         with, but the attribute isn't being recognized
    env = Environment(
        loader=FileSystemLoader(template_path.parent),  # type: ignore [attr-defined]
        autoescape=select_autoescape(),
    )

    template = env.get_template(template_path.name)

    # Build some reused values from the metadata
    # URIs -  form the DOI URL from the prereserved DOI metadata
    doi_url = f"https://doi.org/{zenodo_metadata['metadata']['prereserve_doi']['doi']}"

    # A true "publication" date is not available until a record is published, so use the
    # creation date of the deposit as a reasonable replacement, with the caveat that you
    # should generate the XML and publish on the same day.
    pub_date = dt.fromisoformat(zenodo_metadata["created"]).date()

    # A citation string
    authors = [au["name"] for au in dataset_metadata["authors"]]
    author_string = ", ".join(authors)
    if len(authors) > 1:
        author_string = author_string.replace(", " + authors[-1], " & " + authors[-1])

    citation_string = (
        f"{author_string} ({pub_date.year}) "
        f"{dataset_metadata['title']} [Dataset] {doi_url}"
    )

    # Resource constraints text
    if dataset_metadata["access"] == "embargo":
        access_statement = (
            f"This data is under embargo until {dataset_metadata['embargo_date']}."
            "After that date there are no restrictions to public access."
        )
    elif dataset_metadata["access"] == "restricted":
        access_statement = (
            "This dataset is currently not publicly available, please contact the "
            "Zenodo community owner to request access."
        )
    else:
        access_statement = "There are no restrictions to public access."

    # Get a copy of the project wide XML configuration from the resources and update it
    # with the file specific elements from the zenodo and dataset metadata
    context_dict = resources.xml.copy()

    context_dict.update(
        # Values also used on the Zenodo information or duplicated in the xml
        contactName=resources.zenodo.contact_name,
        contactOrcID=resources.zenodo.contact_orcid,
        pointofcontactName=resources.zenodo.contact_name,
        pointofcontactCountry=resources.xml.contactCountry,
        pointofcontactEmail=resources.xml.contactEmail,
        pointofcontactOrcID=resources.zenodo.contact_name,
        # Dataset specific information
        citationRSIdentifier=doi_url,
        dateStamp=pub_date.isoformat(),
        publicationDate=pub_date.isoformat(),
        fileIdentifier=str(zenodo_metadata["id"]),
        title=dataset_metadata["title"],
        authors=dataset_metadata["authors"],
        abstract=dataset_metadata["description"],
        keywords=dataset_metadata["keywords"],
        citationString=citation_string,
        embargoValue=access_statement,
        startDate=dataset_metadata["temporal_extent"][0][:10],
        endDate=dataset_metadata["temporal_extent"][1][:10],
        westBoundLongitude=_min_dp(dataset_metadata["longitudinal_extent"][0], 2),
        eastBoundLongitude=_min_dp(dataset_metadata["longitudinal_extent"][1], 2),
        southBoundLatitude=_min_dp(dataset_metadata["latitudinal_extent"][0], 2),
        northBoundLatitude=_min_dp(dataset_metadata["latitudinal_extent"][1], 2),
        downloadLink=doi_url,
    )

    # Override global lineage statement
    if lineage_statement is not None:
        context_dict["lineageStatement"] = lineage_statement

    xml = template.render(context_dict)

    return xml

`download_ris_data(resources=None, ris_file=None)`

Downloads Zenodo records into a RIS format bibliography file.

This function is used to maintain a bibliography file of the records uploaded to a safedata community on Zenodo. It accesses the Zenodo community specified in the resource configuration and downloads all records. It then optionally checks the list of downloaded DOIs against the content of an existing RIS file and then downloads citations for all new DOIs from datacite.org.

Parameters:

Name	Type	Description	Default
`resources`	`Resources \| None`	The safedata_validator resource configuration to be used. If none is provided, the standard locations are checked.	`None`
`ris_file`	`str \| None`	The path to an existing RIS format file containing previously downloaded records.	`None`

Returns:

Type	Description
`None`	A list of strings containing RIS formatted citation data.

Source code in safedata_validator/zenodo.py

def download_ris_data(
    resources: Resources | None = None, ris_file: str | None = None
) -> None:
    """Downloads Zenodo records into a RIS format bibliography file.

    This function is used to maintain a bibliography file of the records
    uploaded to a safedata community on Zenodo. It accesses the Zenodo community
    specified in the resource configuration and downloads all records. It then
    optionally checks the list of downloaded DOIs against the content of an
    existing RIS file and then downloads citations for all new DOIs from
    datacite.org.

    Args:
        resources: The safedata_validator resource configuration to be used. If
            none is provided, the standard locations are checked.
        ris_file: The path to an existing RIS format file containing previously
            downloaded records.

    Returns:
        A list of strings containing RIS formatted citation data.
    """

    if resources is None:
        resources = Resources()

    # Get a list of known DOI records from an existing RIS file if one is
    # provided
    known_recids = []
    new_doi = []

    if ris_file and os.path.exists(ris_file):
        with open(ris_file) as bibliography_file:
            entries = rispy.load(bibliography_file)
            for entry in entries:
                record_id = int(entry["url"].split("/")[-1])
                known_recids.append(record_id)

    # Zenodo API call to return the records associated with the SAFE community
    zres = _resources_to_zenodo_api(resources)
    z_api = zres["zapi"]
    z_cname = zres["zcomm"]

    api = f"{z_api}/records/?q=communities:{z_cname}"

    # Provide feedback on DOI collection
    LOGGER.info(f"Fetching record DOIs from {api}:")
    FORMATTER.push()

    # The API is paged - it contains a set of records and a link that points
    # to the next page of records, so keep looping until there are no more next
    n_records = 0
    while True:
        # Get the data
        safe_data = requests.get(api)

        if safe_data.status_code != 200:
            raise OSError("Cannot access Zenodo API")
        else:
            # Retrieve the record data and store the DOI for each record
            safe_data_dict = safe_data.json()
            for hit in safe_data_dict["hits"]["hits"]:
                if hit["id"] not in known_recids:
                    new_doi.append(hit["doi"])

            # Reporting
            n_records += len(safe_data_dict["hits"]["hits"])
            LOGGER.info(f"{n_records}")

            # Update the link for the next page, unless there is no next page
            if "next" in safe_data_dict["links"]:
                api = safe_data_dict["links"]["next"]
            else:
                break

    # Use the datacite API to retrieve the citation data associated with the DOI
    # and save it out to a RIS format file
    if not new_doi:
        LOGGER.info("No new DOIs found")
        return

    # Get the DOI data
    data = []

    FORMATTER.pop()
    LOGGER.info(
        f"Retrieving citation data from Datacite for {len(new_doi)} new records"
    )
    FORMATTER.push()

    for doi in new_doi:
        ris_data = requests.get(
            f"https://data.datacite.org/application/x-research-info-systems/{doi}"
        )

        if ris_data.status_code != 200:
            LOGGER.warning(f"DOI {doi} not found in datacite.org")
        else:
            # Write the response content to the data list. It comes in as byte
            # data so needs to be decoded to a string variable
            LOGGER.info(f"Retrieved citation for DOI {doi}")
            data.append(ris_data.content.decode("utf-8") + "\r\n")

    FORMATTER.pop()

    # Writing only occurs if a ris file path has actually been provided
    if ris_file:
        if os.path.exists(ris_file):
            LOGGER.info(f"Appending RIS data for {len(data)} new records to {ris_file}")
            write_mode = "a"
        else:
            LOGGER.info(f"Writing RIS data for {len(data)} records to {ris_file}")
            write_mode = "w"

        with open(ris_file, write_mode) as ris_file_out:
            for this_entry in data:
                ris_file_out.write(this_entry)

`sync_local_dir(datadir, xlsx_only=True, replace_modified=False, resources=None)`

Synchronise a local data directory with a Zenodo community.

The safedata R package defines a directory structure used to store metadata and files downloaded from a safedata community on Zenodo and from a safedata metadata server. This tool allows a safedata developer or community maintainer to create or update such a directory with all of the resources in the Zenodo community, regardless of their public access status. This forms a backup (although Zenodo is heavily backed up) but also provides local copies of the files for testing and development of the code packages.

This function requires that the resources are configured with access tokens for Zenodo and the details of the metadata server.

Parameters:

Name	Type	Description	Default
`datadir`	`str`	The path to a local directory containing an existing safedata directory or an empty folder in which to create one.	required
`resources`	`Resources \| None`	The safedata_validator resource configuration to be used. If none is provided, the standard locations are checked.	`None`
`xlsx_only`	`bool`	Should the download ignore large non-xlsx files, defaulting to True.	`True`
`replace_modified`	`bool`	Should the synchronisation replace locally modified files with the archived version. By default, modified local files are left alone.	`False`

Source code in safedata_validator/zenodo.py

def sync_local_dir(
    datadir: str,
    xlsx_only: bool = True,
    replace_modified: bool = False,
    resources: Resources | None = None,
) -> None:
    """Synchronise a local data directory with a Zenodo community.

    The safedata R package defines a directory structure used to store metadata and
    files downloaded from a safedata community on Zenodo and from a safedata metadata
    server. This tool allows a safedata developer or community maintainer to create or
    update such a directory with _all_ of the resources in the Zenodo community,
    regardless of their public access status. This forms a backup (although Zenodo is
    heavily backed up) but also provides local copies of the files for testing and
    development of the code packages.

    This function requires that the resources are configured with access tokens for
    Zenodo and the details of the metadata server.

    Args:
        datadir: The path to a local directory containing an existing safedata
            directory or an empty folder in which to create one.
        resources: The safedata_validator resource configuration to be used. If
            none is provided, the standard locations are checked.
        xlsx_only: Should the download ignore large non-xlsx files, defaulting
            to True.
        replace_modified: Should the synchronisation replace locally modified files with
            the archived version. By default, modified local files are left alone.
    """

    # Private helper functions
    def _get_file(url: str, outf: str, params: dict | None = None) -> None:
        """Download a file from a URL."""
        resource = requests.get(url, params=params, stream=True)

        with open(outf, "wb") as outf_obj:
            shutil.copyfileobj(resource.raw, outf_obj)

    # Get resource configuration
    zres = _resources_to_zenodo_api(resources)
    zenodo_api = zres["zapi"]
    params = zres["ztoken"]

    # The dir argument should be an existing path
    if not (os.path.exists(datadir) and os.path.isdir(datadir)):
        raise OSError(f"{datadir} is not an existing directory")

    # Get the configured metadata api
    api = zres["mdapi"]

    # Check for an existing API url file and check it is congruent with config
    url_file = os.path.join(datadir, "url.json")

    if os.path.exists(url_file):
        with open(url_file) as urlf:
            dir_api = simplejson.load(urlf)["url"][0]

        if api != dir_api:
            raise RuntimeError(
                "Configured api does not match existing api in directory"
            )
    else:
        with open(url_file, "w") as urlf:
            simplejson.dump({"url": [api]}, urlf)

    # Download index files - don't bother to check for updates, this isn't
    # a frequent thing to do
    LOGGER.info("Downloading index files")
    _get_file(f"{api}/api/index", os.path.join(datadir, "index.json"))
    _get_file(f"{api}/api/gazetteer", os.path.join(datadir, "gazetteer.geojson"))
    _get_file(
        f"{api}/api/location_aliases", os.path.join(datadir, "location_aliases.csv")
    )

    # Get the deposits associated with the account, which includes a list of download
    # links
    params["page"] = 1
    deposits = []

    LOGGER.info("Scanning Zenodo deposits")
    while True:
        this_page = requests.get(
            zenodo_api + "/deposit/depositions",
            params=params,
            json={},
            headers={"Content-Type": "application/json"},
        )

        if not this_page.ok:
            raise RuntimeError("Could not connect to Zenodo API. Invalid token?")

        if this_page.json():
            deposits += this_page.json()
            LOGGER.info(f"Page {params['page']}")
            params["page"] += 1
        else:
            break

    LOGGER.info(f"Processing {len(deposits)} deposits")

    # Download the files
    for dep in deposits:
        con_rec_id = str(dep["conceptrecid"])
        rec_id = str(dep["record_id"])

        if not dep["submitted"]:
            LOGGER.info(f"Unsubmitted draft {con_rec_id}/{rec_id}")
            continue

        LOGGER.info(f"Processing deposit {con_rec_id}/{rec_id}")
        FORMATTER.push()

        # Create the directory structure if needed
        rec_dir = os.path.join(datadir, con_rec_id, rec_id)
        if not os.path.exists(rec_dir):
            LOGGER.info("Creating directory")
            os.makedirs(rec_dir)
        else:
            LOGGER.info("Directory found")

        # loop over the files in the record
        for this_file in dep["files"]:
            if xlsx_only and not this_file["filename"].endswith(".xlsx"):
                LOGGER.info(f"Skipping non-excel file {this_file['filename']}")
                continue

            LOGGER.info(f"Processing {this_file['filename']}")
            FORMATTER.push()

            outf = os.path.join(rec_dir, this_file["filename"])
            local_copy = os.path.exists(outf)

            if not local_copy:
                LOGGER.info("Downloading")
                _get_file(this_file["links"]["download"], outf, params=params)
            elif local_copy and _compute_md5(outf) != this_file["checksum"]:
                if replace_modified:
                    LOGGER.info("Replacing locally modified file")
                    _get_file(this_file["links"]["download"], outf, params=params)
                else:
                    LOGGER.warning("Local copy modified")
            else:
                LOGGER.info("Already present")

            FORMATTER.pop()

        # Get the metadata json
        metadata = os.path.join(rec_dir, f"{rec_id}.json")
        if os.path.exists(metadata):
            LOGGER.info("JSON Metadata found")
        else:
            LOGGER.info("Downloading JSON metadata ")
            _get_file(f"{api}/api/record/{rec_id}", metadata)

        FORMATTER.pop()

`taxon_index_to_text(taxa, html=False, indent_width=4, auth='GBIF')`

Render a taxon index as text or html.

This function takes a taxon index and renders the contents into either a text or html representation of the taxonomic hierarchy used in the dataset. Taxonomic ranks are indented to render a nested hierarchy. The auth argument is used to set whether the taxa are validated using GBIF or NCBI and this only affects the formatting of the names in the representation.

Parameters:

Name	Type	Description	Default
`taxa`	`list[dict]`	A list of taxon dictionaries containing the taxa for a dataset.	required
`html`	`bool`	Render as html or text.	`False`
`indent_width`	`int`	The indentation width to use for successive taxonomic ranks.	`4`
`auth`	`str`	The taxonomic authority that the taxa are taken from.	`'GBIF'`

Returns:

Type	Description
`str \| div`	Either a HTML or text representation of the taxa tree.

Source code in safedata_validator/taxa.py

def taxon_index_to_text(
    taxa: list[dict], html: bool = False, indent_width: int = 4, auth: str = "GBIF"
) -> str | tags.div:
    """Render a taxon index as text or html.

    This function takes a taxon index and renders the contents into either a text or
    html representation of the taxonomic hierarchy used in the dataset. Taxonomic ranks
    are indented to render a nested hierarchy. The `auth` argument is used to set
    whether the taxa are validated using GBIF or NCBI and this only affects the
    formatting of the names in the representation.

    Args:
        taxa: A list of taxon dictionaries containing the taxa for a dataset.
        html: Render as html or text.
        indent_width: The indentation width to use for successive taxonomic ranks.
        auth: The taxonomic authority that the taxa are taken from.

    Returns:
        Either a HTML or text representation of the taxa tree.
    """

    def _indent(n: int, use_html: bool = html):
        if use_html:
            return raw("&ensp;-&ensp;" * n)
        else:
            return " " * indent_width * (n - 1)

    def _format_name(tx: dict, use_html: bool = html, auth: str = "GBIF"):
        if auth == "GBIF":
            # format the canonical name
            if tx["taxon_rank"] in ["genus", "species", "subspecies"]:
                if use_html:
                    return tags.i(tx["taxon_name"])
                else:
                    return f"_{tx['taxon_name']}_"
            elif tx["taxon_rank"] in ["morphospecies", "functional group"]:
                return f"[{tx['worksheet_name']}, {tx['taxon_rank']}]"
            else:
                return tx["taxon_name"]

        elif auth == "NCBI":
            # format the canonical name
            if tx["taxon_status"] == "user":
                if tx["taxon_rank"] in NCBI_BACKBONE_RANKS:
                    return f"[{tx['taxon_name']}]"
                else:
                    return (
                        f"[{tx['taxon_name']}]  (non-backbone rank: {tx['taxon_rank']})"
                    )
            else:
                if tx["taxon_rank"] in ["genus", "species", "subspecies"]:
                    if use_html:
                        return tags.i(tx["taxon_name"])
                    else:
                        return f"_{tx['taxon_name']}_"
                elif tx["taxon_rank"] not in NCBI_BACKBONE_RANKS:
                    return f"{tx['taxon_name']} (non-backbone rank: {tx['taxon_rank']})"
                else:
                    return tx["taxon_name"]
        else:
            raise ValueError(f"Unknown auth value: {auth}")

    # Container type depends on whether or not html output is required
    if html:
        # Container to hold the output
        html_out = tags.div()
    else:
        html_out = StringIO()

    # group by parent taxon, substituting 0 for None
    # secondary order is then alphabetic based on taxon name
    taxa.sort(key=lambda x: (x["parent_id"] or 0, x["taxon_name"]))

    # Preallocate container to store identity of surplus taxa
    surp_tx_ids = []
    # Define keys that would match in unwanted repeated entries
    match_keys = [
        "taxon_id",
        "parent_id",
        "taxon_name",
        "taxon_rank",
        "taxon_status",
    ]

    # Loop over taxa to filter for repeated entries
    for idx, taxon in enumerate(taxa):
        # Identify elements in taxa where all 5 of the desired keys match
        matches = list(
            map(
                lambda x: x == 5,
                [sum([taxon[k] == item[k] for k in match_keys]) for item in taxa],
            )
        )
        if sum(matches) > 1:
            # Generate reduced list of matching taxa
            taxa_mtch = list(compress(taxa, matches))
            ws_names = [item["worksheet_name"] for item in taxa_mtch]
            # Find first non-None worksheet names
            first_nm = next(name for name in ws_names if name is not None)
            # If it doesn't match worksheet name of taxon, add index to be deleted
            if first_nm != taxon["worksheet_name"]:
                surp_tx_ids.append(idx)

    # Delete taxa that are superfluous by index
    for index in sorted(surp_tx_ids, reverse=True):
        del taxa[index]

    # group taxa by their parent id
    grouped = {k: list(v) for k, v in groupby(taxa, lambda x: x["parent_id"])}

    # start the stack with root taxa, which will have None as a parent (kingdoms for
    # GBIF and superkingdoms for NCBI)
    stack = [({"current": grouped[None][0]}, {"next": grouped[None][1:]})]

    while stack:
        # Handle the current top of the stack: format the canonical name
        current = stack[-1][0]["current"]
        canon_name = _format_name(current)

        # Look for a non-None entry in next that shares the same worksheet name
        next_ws_names = [
            tx["worksheet_name"]
            for tx in stack[-1][1]["next"]
            if tx["worksheet_name"] is not None
        ]

        if current["worksheet_name"] in next_ws_names:
            # pop out the matching entry and find which is 'accepted'
            name_pair = stack[-1][1]["next"].pop(
                next_ws_names.index(current["worksheet_name"])
            )
            if current["taxon_status"] == "accepted":
                as_name = _format_name(name_pair)
                as_status = name_pair["taxon_status"]
            else:
                as_name = canon_name
                as_status = current["taxon_status"]
                canon_name = _format_name(name_pair)

            if html:
                html_txt = [
                    _indent(len(stack)),
                    canon_name,
                    " (as ",
                    as_status,
                    ": ",
                    as_name,
                    ")",
                    tags.br(),
                ]
            else:
                txt = (
                    f"{_indent(len(stack))} {canon_name} (as {as_status}: {as_name})\n"
                )
        else:
            if html:
                html_txt = [_indent(len(stack)), canon_name, tags.br()]
            else:
                txt = f"{_indent(len(stack))} {canon_name}\n"

        if html:
            html_out += html_txt
        else:
            html_out.write(txt)

        # Is this taxon a parent for other taxa - if so add that taxon to the top of
        # the stack, otherwise start looking for a next taxon to push onto the stack.
        # If there is none at the top, pop and look down.
        parent_id = current["taxon_id"]
        if parent_id in grouped:
            stack.append(
                ({"current": grouped[parent_id][0]}, {"next": grouped[parent_id][1:]})
            )
        else:
            while stack:
                push = stack.pop()
                if push[1]["next"]:
                    stack.append(
                        ({"current": push[1]["next"][0]}, {"next": push[1]["next"][1:]})
                    )
                    break

    if html:
        return html_out
    else:
        return html_out.getvalue()

`ZenodoFunctionResponseType = tuple[dict, Optional[str]]` `module-attribute`

Function return value

The functions interacting with Zenodo all return a common format of tuple of length 2:

A dictionary containing the response content. For responses that do not generate a response content but just indicate success via HTTP status codes, an empty dictionary is returned. An empty dictionary is also returned when the function results in an error.
An error message on failure or None on success

So, for example:

({'key': 'value'}, None)
({}, 'Something went wrong')

The expected use pattern is then:

response, error = zenodo_function(args)

The zenodo submodule

create_deposit(concept_id=None, resources=None)

get_deposit(deposit_id, resources=None)

upload_metadata(metadata, zenodo, resources=None)

update_published_metadata(zenodo, resources=None)

upload_file(metadata, filepath, zenodo_filename=None, progress_bar=True, resources=None)

discard_deposit(metadata, resources=None)

publish_deposit(zenodo, resources=None)

delete_file(metadata, filename, resources=None)

dataset_description(dataset_metadata, zenodo_metadata, render=True, extra=None, resources=None)

table_description(tab)

generate_inspire_xml(dataset_metadata, zenodo_metadata, resources, lineage_statement=None)

download_ris_data(resources=None, ris_file=None)

sync_local_dir(datadir, xlsx_only=True, replace_modified=False, resources=None)

taxon_index_to_text(taxa, html=False, indent_width=4, auth='GBIF')

ZenodoFunctionResponseType = tuple[dict, Optional[str]] module-attribute

The `zenodo` submodule

`create_deposit(concept_id=None, resources=None)`

`get_deposit(deposit_id, resources=None)`

`upload_metadata(metadata, zenodo, resources=None)`

`update_published_metadata(zenodo, resources=None)`

`upload_file(metadata, filepath, zenodo_filename=None, progress_bar=True, resources=None)`

`discard_deposit(metadata, resources=None)`

`publish_deposit(zenodo, resources=None)`

`delete_file(metadata, filename, resources=None)`

`dataset_description(dataset_metadata, zenodo_metadata, render=True, extra=None, resources=None)`

`table_description(tab)`

`generate_inspire_xml(dataset_metadata, zenodo_metadata, resources, lineage_statement=None)`

`download_ris_data(resources=None, ris_file=None)`

`sync_local_dir(datadir, xlsx_only=True, replace_modified=False, resources=None)`

`taxon_index_to_text(taxa, html=False, indent_width=4, auth='GBIF')`

`ZenodoFunctionResponseType = tuple[dict, Optional[str]]` `module-attribute`