Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ pip install dowel
## Usage
```python
import dowel
from dowel import logger, tabular
from dowel import logger

logger.add_output(dowel.StdOutput())
logger.add_output(dowel.TensorBoardOutput('tensorboard_logdir'))
Expand All @@ -26,9 +26,8 @@ for i in range(1000):
logger.push_prefix('itr {}'.format(i))
logger.log('Running training step')

tabular.record('itr', i)
tabular.record('loss', 100.0 / (2 + i))
logger.log(tabular)
logger.logkv('itr', i)
logger.logkv('loss', 100.0 / (2 + i))

logger.pop_prefix()
logger.dump_all()
Expand Down
7 changes: 3 additions & 4 deletions examples/log_progress.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import time

import dowel
from dowel import logger, tabular
from dowel import logger

logger.add_output(dowel.StdOutput())
logger.add_output(dowel.CsvOutput('progress.csv'))
Expand All @@ -22,9 +22,8 @@

time.sleep(0.01) # Tensorboard doesn't like output to be too fast.

tabular.record('itr', i)
tabular.record('loss', 100.0 / (2 + i))
logger.log(tabular)
logger.logkv('itr', i)
logger.logkv('loss', 100.0 / (2 + i))

logger.pop_prefix()
logger.dump_all()
Expand Down
6 changes: 2 additions & 4 deletions src/dowel/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,11 @@
from dowel.histogram import Histogram
from dowel.logger import Logger, LoggerWarning, LogOutput
from dowel.simple_outputs import StdOutput, TextOutput
from dowel.tabular_input import TabularInput
from dowel.tabular import Tabular
from dowel.csv_output import CsvOutput # noqa: I100
from dowel.tensor_board_output import TensorBoardOutput

logger = Logger()
tabular = TabularInput()

__all__ = [
'Histogram',
Expand All @@ -20,8 +19,7 @@
'TextOutput',
'LogOutput',
'LoggerWarning',
'TabularInput',
'Tabular',
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is now a private API, so it should not be in __all__, which is only for things people should be importing from your package.

'TensorBoardOutput',
'logger',
'tabular',
]
84 changes: 46 additions & 38 deletions src/dowel/csv_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,59 +2,66 @@
import csv
import warnings

from dowel import TabularInput
import numpy as np

from dowel.simple_outputs import FileOutput
from dowel.tabular import Tabular
from dowel.utils import colorize


class CsvOutput(FileOutput):
"""CSV file output for logger.

:param file_name: The file this output should log to.
:param keys_accepted: Regex for which keys this output should accept.
"""

def __init__(self, file_name):
super().__init__(file_name)
def __init__(self, file_name, keys_accepted=r'^\S+$'):
super().__init__(file_name, keys_accepted=keys_accepted)
self._writer = None
self._fieldnames = None
self._warned_once = set()
self._disable_warnings = False
self.tabular = Tabular()
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_private, please


@property
def types_accepted(self):
"""Accept TabularInput objects only."""
return (TabularInput, )

def record(self, data, prefix=''):
"""Log tabular data to CSV."""
if isinstance(data, TabularInput):
to_csv = data.as_primitive_dict

if not to_csv.keys() and not self._writer:
return

if not self._writer:
self._fieldnames = set(to_csv.keys())
self._writer = csv.DictWriter(
self._log_file,
fieldnames=self._fieldnames,
extrasaction='ignore')
self._writer.writeheader()

if to_csv.keys() != self._fieldnames:
self._warn('Inconsistent TabularInput keys detected. '
'CsvOutput keys: {}. '
'TabularInput keys: {}. '
'Did you change key sets after your first '
'logger.log(TabularInput)?'.format(
set(self._fieldnames), set(to_csv.keys())))

self._writer.writerow(to_csv)

for k in to_csv.keys():
data.mark(k)
else:
raise ValueError('Unacceptable type.')
"""Accept str and scalar objects."""
return (str, ) + np.ScalarType
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just make this a tuple rather than using addition?


def record(self, key, value, prefix=''):
"""Log data to a csv file."""
self.tabular.record(key, value)

def dump(self, step=None):
"""Flush data to log file."""
if self.tabular.empty:
return

to_csv = self.tabular.as_primitive_dict

if not to_csv.keys() and not self._writer:
return

if not self._writer:
self._fieldnames = set(to_csv.keys())
self._writer = csv.DictWriter(self._log_file,
fieldnames=self._fieldnames,
extrasaction='ignore')
self._writer.writeheader()

if to_csv.keys() != self._fieldnames:
self._warn('Inconsistent Tabular keys detected. '
'CsvOutput keys: {}. '
'Tabular keys: {}. '
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

your user now has no idea that a Tabular is, so this message needs to be updated.

'Did you change key sets after your first '
'logger.log(Tabular)?'.format(set(self._fieldnames),
set(to_csv.keys())))

self._writer.writerow(to_csv)

self._log_file.flush()
self.tabular.clear()
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i don't think we should clear the table between calls to dump, because it allows us to provide a value even if someone doesn't update it. basically, if KV pairs are not all updated at the same rate it's okay, and we don't need to output an error.


def _warn(self, msg):
"""Warns the user using warnings.warn.
Expand All @@ -63,8 +70,9 @@ def _warn(self, msg):
is the one printed.
"""
if not self._disable_warnings and msg not in self._warned_once:
warnings.warn(
colorize(msg, 'yellow'), CsvOutputWarning, stacklevel=3)
warnings.warn(colorize(msg, 'yellow'),
CsvOutputWarning,
stacklevel=3)
self._warned_once.add(msg)
return msg

Expand Down
114 changes: 65 additions & 49 deletions src/dowel/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,18 @@

The logger has 4 major steps:

1. Inputs, such as a simple string or something more complicated like
TabularInput, are passed to the log() method of an instantiated Logger.
1. Inputs, such as a simple string or something more complicated like
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please make sure this giant commend renders nicely in sphinx. Here's what it looks like now:
https://dowel.readthedocs.io/en/latest/_apidoc/dowel.html#module-dowel.logger

Perhaps you can actually move this content (+example) to the title page of the documentation. If you do that, it will probably render fine (markdown is supported for pages but not docstrings).

Anyway, to render the docs just do

cd docs
make html
xdg-open _build/html/index.html  # if on macOS, use open instead

a distribution, are passed to the log() or logkv() method of an
instantiated Logger.

2. The Logger class checks for any outputs that have been added to it, and
calls the record() method of any outputs that accept the type of input.
2. The Logger class checks for any outputs that have been added to it, and
calls the record() method of any outputs that accept the type of input.

3. The output (a subclass of LogOutput) receives the input via its record()
method and handles it in whatever way is expected.
3. The output (a subclass of LogOutput) receives the input via its record()
method and handles it in whatever way is expected.

4. (only in some cases) The dump method is used to dump the output to file.
It is necessary for some LogOutput subclasses, like TensorBoardOutput.
4. (only in some cases) The dump method is used to dump the output to file
and to log any key-value pairs that have been stored.


# Here's a demonstration of dowel:
Expand Down Expand Up @@ -61,8 +62,8 @@

# And another output.

from dowel import CsvOutput
logger.add_output(CsvOutput('log_folder/table.csv'))
from dowel import TensorBoardOutput
logger.add_output(TensorBoardOutput('log_folder/tensorboard'))

+---------+
+------>StdOutput|
Expand All @@ -72,13 +73,16 @@
|logger+------>TextOutput|
+------+ +----------+
|
| +---------+
+------>CsvOutput|
+---------+
| +-----------------+
+------>TensorBoardOutput|
+-----------------+

# The logger will record anything passed to logger.log to all outputs that
# accept its type.


# Now let's try logging a string again.

logger.log('test')

+---------+
Expand All @@ -89,38 +93,36 @@
|logger+---'test'--->TextOutput|
+------+ +----------+
|
| +---------+
+-----!!----->CsvOutput|
+---------+
| +-----------------+
+-----!!----->TensorBoardOutput|
+-----------------+

# !! Note that the logger knows not to send CsvOutput the string 'test'
# Similarly, more complex objects like tf.tensor won't be sent to (for
# !! Note that the logger knows not to send 'test' to TensorBoardOutput.
# Similarly, more complex objects like tf.Graph won't be sent to (for
# example) TextOutput.
# This behavior is defined in each output's types_accepted property

# Here's a more complex example.
# TabularInput, instantiated for you as the tabular, can log key/value pairs.
# We can log key-value pairs using logger.logkv

from dowel import tabular
tabular.record('key', 72)
tabular.record('foo', 'bar')
logger.log(tabular)
logger.logkv('key', 72)
logger.logkv('foo', 'bar')
logger.dump_all()

+---------+
+---tabular--->StdOutput|
| +---------+
+---------+
+------>StdOutput|
| +---------+
|
+------+ +----------+
|logger+---tabular--->TextOutput|
+------+ +----------+
+------+ +----------+
|logger+------>TextOutput|
+------+ +----------+
|
| +---------+
+---tabular--->CsvOutput|
+---------+
| +---------+
+------>CsvOutput|
+---------+

# Note that LogOutputs which consume TabularInputs must call
# TabularInput.mark() on each key they log. This helps the logger detect when
# tabular data is not logged.
# Note that the key-value pairs are saved in each output until we call
# dump_all().

# Console Output:
--- ---
Expand All @@ -133,29 +135,37 @@
"""
import abc
import contextlib
import re
import warnings

from dowel.utils import colorize


class LogOutput(abc.ABC):
"""Abstract class for Logger Outputs."""
"""Abstract class for Logger Outputs.

@property
def types_accepted(self):
"""Pass these types to this logger output.
:param keys_accepted: Regex for which keys this output should accept.
"""

The types in this tuple will be accepted by this output.
def __init__(self, keys_accepted=r'^$'):
self._keys_accepted = keys_accepted

:return: A tuple containing all valid input types.
"""
@property
def types_accepted(self):
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should probably be an abc.abstractmethod rather than returning anything. that way, if a user tries to write a new LogOutput and forgets to implement this method, they will get an error message. Right now, their LogOutput will just reject all logs silently because the base class says so.

"""Returns a tuple containing all valid input value types."""
return ()

@property
def keys_accepted(self):
"""Returns a regex string matching keys to be sent to this output."""
return self._keys_accepted

@abc.abstractmethod
def record(self, data, prefix=''):
def record(self, key, value, prefix=''):
"""Pass logger data to this output.

:param data: The data to be logged by the output.
:param key: The key to be logged by the output.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please use google style docstrings

:param value: The value to be logged by the output.
:param prefix: A prefix placed before a log entry in text outputs.
"""
pass
Expand Down Expand Up @@ -186,7 +196,7 @@ def __init__(self):
self._warned_once = set()
self._disable_warnings = False

def log(self, data):
def logkv(self, key, value):
"""Magic method that takes in all different types of input.

This method is the main API for the logger. Any data to be logged goes
Expand All @@ -195,24 +205,30 @@ def log(self, data):
Any data sent to this method is sent to all outputs that accept its
type (defined in the types_accepted property).

:param data: Data to be logged. This can be any type specified in the
:param key: Key to be logged. This must be a string.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please update these docstrings to use Google style

:param value: Value to be logged. This can be any type specified in the
types_accepted property of any of the logger outputs.
"""
if not self._outputs:
self._warn('No outputs have been added to the logger.')

at_least_one_logged = False
for output in self._outputs:
if isinstance(data, output.types_accepted):
output.record(data, prefix=self._prefix_str)
if isinstance(value, output.types_accepted) and re.match(
output.keys_accepted, key):
output.record(key, value, prefix=self._prefix_str)
at_least_one_logged = True

if not at_least_one_logged:
warning = (
'Log data of type {} was not accepted by any output'.format(
type(data).__name__))
type(value).__name__))
self._warn(warning)

def log(self, value):
"""Log just a value without a key."""
self.logkv('', value)

def add_output(self, output):
"""Add a new output to the logger.

Expand Down
Loading