Skip to content

Commit 52fb952

Browse files
authored
Merge pull request #285 from Smithsonian/issue/270/setting-up-logging
Setting up logging
2 parents edaeec4 + d399358 commit 52fb952

File tree

18 files changed

+309
-30
lines changed

18 files changed

+309
-30
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ layup-convert = "layup_cmdline.convert:main"
3737
layup-comet = "layup_cmdline.comet:main"
3838
layup-demo = "layup_cmdline.demo:main"
3939
layup-unpack = "layup_cmdline.unpack:main"
40+
layup-log = "layup_cmdline.log:main"
4041

4142
[project.urls]
4243
"Source Code" = "https://github.com/Smithsonian/layup"

src/layup/comet.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,4 +181,4 @@ def comet_cli(
181181
else:
182182
write_csv(comet_data, output_file)
183183

184-
print(f"Data has been written to {output_file}")
184+
logger.info(f"Data has been written to {output_file}")

src/layup/convert.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -505,4 +505,4 @@ def convert_cli(
505505
else:
506506
write_csv(converted_data, output_file)
507507

508-
print(f"Data has been written to {output_file}")
508+
logger.info(f"Data has been written to {output_file}")

src/layup/log.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
"""This module is meant to be an example for how to logging works in Layup.
2+
It has a corresponding layup_cmdline verb, `log` as well."""
3+
4+
# These imports are needed only for the demo
5+
import os
6+
import numpy as np
7+
from layup.utilities.data_processing_utilities import process_data, process_data_by_id
8+
9+
# NOTE - The following is the "configuration" required to log from this module.
10+
import logging
11+
12+
logger = logging.getLogger(__name__)
13+
14+
15+
def _apply_log(data, primary_id_column_name=None):
16+
"""Simple function that is executed in parallel across all processes."""
17+
logger.debug(f"In `_apply_log` with data = {data}")
18+
return data
19+
20+
21+
def log_by_chunk(samples: int, num_workers: int):
22+
"""This example function will generate a random dataset with length `samples`.
23+
The example dataset is then passed to the multiprocessing function that will
24+
distribute the data evenly across all available CPU workers."""
25+
26+
logger.info("In `log` function.")
27+
28+
# Create random data
29+
data = np.random.rand(samples)
30+
31+
# Parallelize the call to `_apply_log`
32+
return process_data(data, num_workers, _apply_log)
33+
34+
35+
def log_by_id(samples: int, num_workers: int):
36+
"""This example function will generate a random dataset with length `samples`.
37+
The data will have an `id` column that will take a value between 0 and 4 and
38+
a `value` column with a random value between 0 and 1. The example dataset is
39+
then passed to the multiprocessing function that will group data by id and then
40+
distribute the subsets of data evenly across all available CPU workers."""
41+
logger.info("In `log_by_id` function.")
42+
43+
# Create a random recarray
44+
primary_id_column_name = "id"
45+
dtype = [(primary_id_column_name, "i4"), ("value", "f8")]
46+
id = np.random.randint(0, 4, samples)
47+
value = np.random.rand(samples)
48+
data = np.rec.fromarrays([id, value], dtype=dtype)
49+
50+
# Parallelize the call to `_apply_log`
51+
return process_data_by_id(data, num_workers, _apply_log, primary_id_column_name)
52+
53+
54+
def log_cli():
55+
"""This function is called by `src/layup_cmdline/log.py`. Here it demonstrates
56+
the use of a few different logging levels and calls the two example functions
57+
that will trigger the two modes of multiprocessing currently implemented in
58+
Layup."""
59+
60+
logger.info(f"In `log_cli` function.")
61+
62+
samples = 25
63+
num_workers = os.cpu_count()
64+
65+
result = log_by_chunk(samples, num_workers)
66+
logger.debug(f"Processed data with this length: {len(result)}")
67+
68+
result = log_by_id(samples, num_workers)
69+
logger.debug(f"Processed data with this length: {len(result)}")
70+
71+
# This message will be sent to the terminal, .log, and .err files
72+
logger.error("I think you're great.")

src/layup/orbitfit.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -879,7 +879,7 @@ def orbitfit_cli(
879879
else:
880880
write_csv(fit_orbits, output_file)
881881

882-
print(f"Data has been written to {output_file}")
882+
logger.info(f"Data has been written to {output_file}")
883883

884884

885885
def _is_valid_data(data):

src/layup/predict.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import logging
12
import os
23
from argparse import Namespace
34
from pathlib import Path
@@ -21,6 +22,8 @@
2122
from layup.utilities.file_io import CSVDataReader
2223
from layup.utilities.file_io.file_output import write_csv
2324

25+
logger = logging.getLogger(__name__)
26+
2427
# The list of required input column names. Note: This should not include the
2528
# primary id column name.
2629
REQUIRED_INPUT_COLUMN_NAMES = [
@@ -242,4 +245,3 @@ def predict_cli(
242245

243246
if len(predictions) > 0:
244247
write_csv(predictions, output_file)
245-
pass

src/layup/unpack.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,4 +275,4 @@ def unpack_cli(
275275
else:
276276
write_csv(res_unpacked, output_file)
277277

278-
print(f"Data has been written to {output_file}")
278+
logger.info(f"Data has been written to {output_file}")

src/layup/utilities/file_access_utils.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
1+
import logging
12
import os
23
import sys
34
from pathlib import Path
45

56

7+
logger = logging.getLogger(__name__)
8+
9+
610
def find_file_or_exit(arg_fn, argname):
711
"""Checks to see if a file given by a filename exists. If it doesn't,
812
this fails gracefully and exits to the command line.
@@ -25,7 +29,8 @@ def find_file_or_exit(arg_fn, argname):
2529
if os.path.exists(arg_fn):
2630
return arg_fn
2731
else:
28-
sys.exit("ERROR: filename {} supplied for {} argument does not exist.".format(arg_fn, argname))
32+
logger.error(f"Filename {arg_fn} supplied for {argname} argument does not exist.")
33+
sys.exit(f"ERROR: filename {arg_fn} supplied for {argname} argument does not exist.")
2934

3035

3136
def find_directory_or_exit(arg_fn, argname):
@@ -50,4 +55,5 @@ def find_directory_or_exit(arg_fn, argname):
5055
file_path = file_path.parent.resolve()
5156

5257
if not file_path.is_dir():
53-
sys.exit("ERROR: filepath {} supplied for {} argument does not exist.".format(arg_fn, argname))
58+
logger.error(f"Filepath {arg_fn} supplied for {argname} argument does not exist.")
59+
sys.exit(f"ERROR: filepath {arg_fn} supplied for {argname} argument does not exist.")

src/layup/utilities/file_io/CSVReader.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
# CAUTION! - Avoid adding a character that would exclude the column header line.
2929
PRE_HEADER_COMMENT_AND_EXCLUDE_STRINGS = ("#", "!")
3030

31+
logger = logging.getLogger(__name__)
32+
3133

3234
class CSVDataReader(ObjectDataReader):
3335
"""A class to read in object data files stored as CSV or whitespace
@@ -58,7 +60,6 @@ def __init__(self, filename, sep="csv", **kwargs):
5860
self.filename = filename
5961

6062
if sep not in VALID_FILE_FORMATS:
61-
logger = logging.getLogger(__name__)
6263
logger.error(f"ERROR: Unrecognized delimiter ({sep})")
6364
sys.exit(f"ERROR: Unrecognized delimiter ({sep})")
6465
self.sep = sep
@@ -124,7 +125,7 @@ def get_row_count(self):
124125

125126
def _validate_header_line(self):
126127
"""Read and validate the header line (first line of the file)"""
127-
logger = logging.getLogger(__name__)
128+
128129
with open(self.filename) as fh:
129130
for i, line in enumerate(fh):
130131
# If the line starts with a comment character, increment the pre-header line count
@@ -163,7 +164,6 @@ def _check_header_line(self, header_line):
163164
header_line : str
164165
The proposed header line.
165166
"""
166-
logger = logging.getLogger(__name__)
167167

168168
# This is a bit ugly, but splitting the header in this way, means that we
169169
# can generally define the value separators at the top of the file, _and_
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
import logging
2+
import sys
3+
4+
from datetime import datetime
5+
from pathlib import Path
6+
7+
8+
class LayupLogger:
9+
"""This logger configures the root-level logger for Layup to emit messages
10+
to potentially three locations 1) STDERR 2) layup-<datetime>.log and
11+
3) layup-<datetime>.err depending on the log level. See the `_prepare_logger`
12+
method for details about which levels are sent to which handlers.
13+
14+
LayupLogger is intended to be used in one of two ways in general. Either
15+
instantiated within the `execute()` function in one of the layup_cmdline verbs
16+
or as a context manager when calling the API directly.
17+
18+
Example 1 - LayupLogger in a command line verb
19+
(See layup_cmdline/log.py for a working example)
20+
```
21+
def execute():
22+
from layup.utilities.layup_logging import LayupLogger
23+
24+
layup_logger = LayupLogger()
25+
26+
# Create a child logger. NOTE - that the name starts with "layup.<blah>"
27+
# Failure to specify a name with that form could result in lost logs.
28+
logger = layup_logger.get_logger("layup.log_cmdline")
29+
30+
logger.info("Sending a log message.") # Use the logger
31+
```
32+
33+
Example 2 - LayupLogger in a context manager
34+
This would likely be the usage within a Jupyter notebook
35+
```
36+
from layup.utilities.layup_logging import LayupLogger
37+
38+
with LayupLogger() as layup_logger:
39+
# Create a child logger. NOTE - that the name starts with "layup.<blah>"
40+
# Failure to specify a name with that form could result in lost logs.
41+
logger = layup_logger.get_logger("layup.interactive")
42+
43+
logger.info("Sending a log message from a notebook.")
44+
```
45+
"""
46+
47+
def __init__(self, log_directory="."):
48+
self._prepare_logger(log_directory)
49+
50+
def get_logger(self, name):
51+
"""Convenience function to return a logger under the top level logger.
52+
This is identical to calling `logger = logging.getLogger(__name__)`
53+
54+
Parameters
55+
----------
56+
name : str
57+
The name to use when emitting messages using this logger.
58+
59+
Returns
60+
-------
61+
Logger
62+
The logger to use to emit message.
63+
"""
64+
return logging.getLogger(name)
65+
66+
def __enter__(self):
67+
"""Entry point for using LayupLogger as a context manager
68+
69+
Returns
70+
-------
71+
self
72+
An instance of the LayupLogger object
73+
"""
74+
return self
75+
76+
def __exit__(self, exc_type, exc_val, exc_tb):
77+
"""Called when the context manager exits. Used only to call _stop_logger
78+
to terminate the loop in the queue and kill the queue thread.
79+
"""
80+
pass
81+
82+
def _prepare_logger(self, log_directory="."):
83+
"""Setup for the primary logger.
84+
85+
Parameters
86+
----------
87+
log_directory : str, optional
88+
The directory to place the log files, by default "."
89+
90+
Returns
91+
-------
92+
Logger
93+
The top level logger.
94+
"""
95+
96+
logger = logging.getLogger("layup")
97+
98+
# This logger handles all messages >= DEBUG
99+
logger.setLevel(logging.DEBUG)
100+
101+
# The format of the log messages
102+
formatter = logging.Formatter("%(asctime)s - %(name)s - %(process)d - %(levelname)s - %(message)s")
103+
104+
# Console handler - all messages >= INFO will be recorded to STDERR
105+
console_handler = logging.StreamHandler(sys.stderr)
106+
console_handler.setFormatter(formatter)
107+
console_handler.setLevel(logging.INFO)
108+
109+
# Configure log files
110+
log_location = Path(log_directory)
111+
timestamp = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
112+
log_file_base_name = f"layup-{timestamp}"
113+
log_file_info = log_location / f"{log_file_base_name}.log"
114+
log_file_error = log_location / f"{log_file_base_name}.err"
115+
116+
# File handler that will record all messages >= DEBUG
117+
file_handler_info = logging.FileHandler(log_file_info)
118+
file_handler_info.setFormatter(formatter)
119+
file_handler_info.setLevel(logging.DEBUG)
120+
121+
# File handler that will record all messaged >= ERROR
122+
file_handler_error = logging.FileHandler(log_file_error)
123+
file_handler_error.setFormatter(formatter)
124+
file_handler_error.setLevel(logging.ERROR)
125+
126+
# Add the handlers to the logger
127+
logger.addHandler(file_handler_info)
128+
logger.addHandler(file_handler_error)
129+
logger.addHandler(console_handler)
130+
131+
# Return the top level logger
132+
return logger

0 commit comments

Comments
 (0)