1414logger = structlog .get_logger ()
1515
1616
17+ def validate_path (ctx , param , value ):
18+ """Validates th formatting of The submitted path"""
19+ if value [- 1 ] == '/' :
20+ return value
21+ else :
22+ raise click .BadParameter ('Include / at the end of the path.' )
23+
24+
1725@click .group (chain = True )
1826@click .option ('--url' , envvar = 'DSPACE_URL' , required = True ,)
1927@click .option ('-e' , '--email' , envvar = 'TEST_EMAIL' , required = True ,
@@ -51,11 +59,13 @@ def main(ctx, url, email, password):
5159
5260@main .command ()
5361@click .option ('-m' , '--metadata-csv' , required = True ,
54- type = click .Path (exists = True ),
55- help = 'The full path to the CSV file of metadata for the items.' )
56- @click .option ('--field-map' , required = True , type = click .Path (exists = True ),
57- help = 'Path to JSON field mapping file' )
58- @click .option ('-d' , '--directory' , required = True ,
62+ type = click .Path (exists = True , file_okay = True , dir_okay = False ),
63+ help = 'The path to the CSV file of metadata for the items.' )
64+ @click .option ('--field-map' , required = True ,
65+ type = click .Path (exists = True , file_okay = True , dir_okay = False ),
66+ help = 'The path to JSON field mapping file.' )
67+ @click .option ('-d' , '--content-directory' , required = True ,
68+ type = click .Path (exists = True , dir_okay = True , file_okay = False ),
5969 help = 'The full path to the content, either a directory of files '
6070 'or a URL for the storage location.' )
6171@click .option ('-t' , '--file-type' ,
@@ -67,11 +77,11 @@ def main(ctx, url, email, password):
6777 help = 'The handle of the collection to which items are being '
6878 'added.' , default = None )
6979@click .pass_context
70- def additems (ctx , metadata_csv , field_map , directory , file_type , ingest_report ,
71- collection_handle ):
80+ def additems (ctx , metadata_csv , field_map , content_directory , file_type ,
81+ ingest_report , collection_handle ):
7282 """Adds items to a specified collection from a metadata CSV, a field
7383 mapping file, and a directory of files. May be run in conjunction with the
74- newcollection CLI commands ."""
84+ newcollection CLI command ."""
7585 client = ctx .obj ['client' ]
7686 start_time = ctx .obj ['start_time' ]
7787 if 'collection_uuid' not in ctx .obj and collection_handle is None :
@@ -87,7 +97,7 @@ def additems(ctx, metadata_csv, field_map, directory, file_type, ingest_report,
8797 mapping = json .load (jsonfile )
8898 collection = Collection .from_csv (metadata , mapping )
8999 for item in collection .items :
90- item .bitstreams_from_directory (directory , file_type )
100+ item .bitstreams_from_directory (content_directory , file_type )
91101 collection .uuid = collection_uuid
92102 items = collection .post_items (client )
93103 if ingest_report :
@@ -114,20 +124,38 @@ def newcollection(ctx, community_handle, collection_name):
114124 ctx .obj ['collection_uuid' ] = collection_uuid
115125
116126
117- # @main.command()
118- # @click.option('-m', '--metadata_csv', prompt='Enter the metadata CSV file',
119- # help='The path of the CSV file of metadata.')
120- # @click.option('-o', '--output_path', prompt='Enter the output path',
121- # default='', help='The path of the output files, include '
122- # '/ at the end of the path')
123- # @click.option('-f', '--file_path', prompt='Enter the path',
124- # help='The path of the content, a URL or local drive path.'
125- # 'Include / at the end of a local drive path.')
126- # @click.option('-t', '--file_type', prompt='Enter the file type',
127- # help='The file type to be uploaded.')
128- # def reconcile(metadata_csv, file_path, file_type, output_path):
129- # workflows.reconcile_files_and_metadata(metadata_csv, output_path,
130- # file_path, file_type)
127+ @main .command ()
128+ @click .option ('-m' , '--metadata-csv' , required = True ,
129+ type = click .Path (exists = True , file_okay = True , dir_okay = False ),
130+ help = 'The path of the CSV file of metadata.' )
131+ @click .option ('-o' , '--output-directory' ,
132+ type = click .Path (exists = True , file_okay = False ),
133+ default = f'{ os .getcwd ()} /' , callback = validate_path ,
134+ help = 'The path of the output files, include / at the end of the '
135+ 'path.' )
136+ @click .option ('-d' , '--content-directory' , required = True ,
137+ help = 'The full path to the content, either a directory of files '
138+ 'or a URL for the storage location.' )
139+ @click .option ('-t' , '--file-type' ,
140+ help = 'The file type to be uploaded, if limited to one file '
141+ 'type.' , default = '*' )
142+ def reconcile (metadata_csv , output_directory , content_directory , file_type ):
143+ """Runs a reconciliation of the specified files and metadata that produces
144+ reports of files with no metadata, metadata with no files, metadata
145+ matched to files, and an updated version of the metadata CSV with only
146+ the records that have matching files."""
147+ file_ids = helpers .create_file_list (content_directory , file_type )
148+ metadata_ids = helpers .create_metadata_id_list (metadata_csv )
149+ metadata_matches = helpers .match_metadata_to_files (file_ids , metadata_ids )
150+ file_matches = helpers .match_files_to_metadata (file_ids , metadata_ids )
151+ no_files = set (metadata_ids ) - set (metadata_matches )
152+ no_metadata = set (file_ids ) - set (file_matches )
153+ helpers .create_csv_from_list (no_metadata , f'{ output_directory } no_metadata' )
154+ helpers .create_csv_from_list (no_files , f'{ output_directory } no_files' )
155+ helpers .create_csv_from_list (metadata_matches ,
156+ f'{ output_directory } metadata_matches' )
157+ helpers .update_metadata_csv (metadata_csv , output_directory ,
158+ metadata_matches )
131159
132160
133161if __name__ == '__main__' :
0 commit comments