Source code for quilt3distribute.bin.quilt3_distribute_dataset

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import argparse
import logging
import sys
import traceback
from pathlib import Path

from quilt3distribute import Dataset

###############################################################################

log = logging.getLogger()
logging.basicConfig(level=logging.INFO,
                    format='[%(levelname)4s:%(lineno)4s %(asctime)s] %(message)s')

###############################################################################


[docs]class Args(argparse.Namespace): def __init__(self): self.__parse() def __parse(self): p = argparse.ArgumentParser(prog='quilt3_distribute_dataset', description=('A wrapper around quilt3 package distribution to make it even easier. ' 'As default, this will attempt to do basic data cleaning tasks ' 'and additionally, attempts to guess which file should be sent ' 'out with the package. Lastly, will parse the provided README for ' 'any referenced files and will package those them up as well so that ' 'the README will be properly rendered on the quiltdata catalog.')) p.add_argument('dataset_path', action='store', type=Path, help='Filepath to a csv dataset to distribute.') p.add_argument('dataset_name', action='store', help=('A name for the dataset. May only include lowercase alphanumeric, ' 'underscore, and hyphen characters.')) p.add_argument('package_owner', action='store', help='The name of the dataset owner. This will be attached to the name. Example: "aics"') p.add_argument('readme_path', action='store', type=Path, help='Filepath to a markdown readme for the dataset.') p.add_argument('push_uri', action='store', help='The S3 bucket URI you want to push to. Talk to your Quilt admin for details and support.') p.add_argument('-m', '--message', action='store', dest='message', default=None, help='A message to attach to the built/ released dataset version.') p.add_argument('-u', '--usage-doc', action='store', dest='usage_doc_or_link', default=None, help=('Filepath or URL for dataset usage details/ instructions. ' 'If your README already includes usage details, this can be ignored.')) p.add_argument('-l', '--license', action='store', dest='license_doc_or_link', default=None, help=('Filepath or URL for dataset license details. ' 'If your README already includes license details, this can be ignored.')) p.add_argument('-c', '--metadata-columns', action='store', nargs='+', dest='metadata_columns', default=None, help=('List of columns to use for metadata attachment. ' 'The values in each row for the columns provided will be attached as metadata, ' 'meaning, users will be able to search and filter the files sent using that metadata. ' 'Example: "quilt3_distribute_dataset ... -i drug_name structure_name ..."')) p.add_argument('-f', '--fc', '--file-columns', action='store', nargs='+', dest='path_columns', default=None, help=('List of columns that contains filepaths to be sent out in the package. ' 'Example: ' '"quilt3_distribute_dataset ... -p fov_read_path structure_segmentation_path ..."')) p.add_argument('--debug', action='store_true', dest='debug', help=argparse.SUPPRESS) p.parse_args(namespace=self)
###############################################################################
[docs]def main(): try: args = Args() # Create dataset ds = Dataset( dataset=args.dataset_path, name=args.dataset_name, package_owner=args.package_owner, readme_path=args.readme_path ) # Handle optional provided if args.usage_doc_or_link: ds.add_usage_doc(args.usage_doc_or_link) if args.license_doc_or_link: ds.add_license(args.license_doc_or_link) if args.metadata_columns: ds.set_metadata_columns(args.metadata_columns) if args.path_columns: ds.set_path_columns(args.path_columns) # Distribute pkg = ds.distribute(push_uri=args.push_uri, message=args.message) log.info( f"Completed distribution. " f"Package [name: '{args.package_owner}/{args.dataset_name}', version: {pkg.top_hash}]" ) except Exception as e: log.error("=============================================") if args.debug: log.error("\n\n" + traceback.format_exc()) log.error("=============================================") log.error("\n\n" + str(e) + "\n") log.error("=============================================") sys.exit(1)
############################################################################### # Allow caller to directly run this module (usually in development scenarios) if __name__ == '__main__': main()