Skip to content
Snippets Groups Projects
Commit 9eca6ab3 authored by Ming Ding's avatar Ming Ding
Browse files

add tensorboard log of customized logs

parent 98a5247a
No related branches found
No related tags found
No related merge requests found
# 2021.10.29
# 2021.10.29 v0.1
1. change `mixins` from `ModuleList` to `ModuleDict`
2. return tokens and mems in `fill_sequence`, and mems becomes a tensor.
3. `CachedAutoRegressiveMixin`
......@@ -28,5 +28,8 @@ for the older framework, you also need:
old['module']['transformer.word_embeddings.weight'] = old['module']['word_embeddings.weight']
del old['module']['word_embeddings.weight']
```
# 2021.11.5 v0.1.2
1. Add generation.autoregressive_sampling.evalute_perplexity
2. fix Runtime Error in skipping Nan Loss
......@@ -141,8 +141,9 @@ class SelfAttention(torch.nn.Module):
class MLP(torch.nn.Module):
def __init__(self, hidden_size, output_dropout_prob, init_method,
output_layer_init_method=None, hooks={}):
output_layer_init_method=None, layer_id=None, hooks={}):
super(MLP, self).__init__()
self.layer_id = layer_id
# Set output layer initialization if not provided.
if output_layer_init_method is None:
output_layer_init_method = init_method
......@@ -225,6 +226,7 @@ class BaseTransformerLayer(torch.nn.Module):
output_dropout_prob,
init_method,
output_layer_init_method=output_layer_init_method,
layer_id=layer_id,
hooks=hooks
)
......
......@@ -467,6 +467,8 @@ def report_iteration_metrics(summary_writer, optimizer, lr, loss, elapsed_time,
summary_writer.add_scalar(f'Train/lr', lr, step)
summary_writer.add_scalar(f'Train/train_loss', loss, step)
summary_writer.add_scalar(f'Train/elapsed_time', elapsed_time, step)
for key in avg_metrics:
summary_writer.add_scalar('Train/'+key, avg_metrics[key], step)
def report_evaluate_metrics(summary_writer, prefix, loss, ppl, step):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment