e2e-coref-old/experiments.conf at master · tcxdgit/e2e-coref-old · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# Word embeddings.
glove_300d {
  path = glove.840B.300d.txt
  size = 300
  format = txt
  lowercase = false
}
glove_300d_filtered {
  path = glove.840B.300d.txt.filtered
  size = 300
  format = txt
  lowercase = false
}
turian_50d {
  path = turian.50d.txt
  size = 50
  format = txt
  lowercase = false
}

word2vec_300d_filtered {
  path = word2vec.300d.txt.filtered
  size = 300
  format = txt
  lowercase = false
}

word2vec_300d {
  path = word2vec.300d.txt
  size = 300
  format = txt
  lowercase = false
}

# Compute clusters.
nlp {
  addresses {
    ps = [nlp2:2222]
    worker = [n01:2222, n02:2222, n03:2222, n04:2222, n05:2222, n07:2222, n08:2222, n09:2222, n10:2222, n11:2222, n12:2222, n13:2222, n14:2222, n15:2222, n16:2222]
  }
  gpus = [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1]
}
appositive {
  addresses {
    ps = [localhost:2222]
    worker = [localhost:2223, localhost:2224]
  }
  gpus = [0, 1]
}

# Main configuration.
best {
  # Computation limits.
  max_antecedents = 250
  max_training_sentences = 50
  mention_ratio = 0.4

  # Model hyperparameters.
  filter_widths = [3, 4, 5]
  filter_size = 50
  char_embedding_size = 8
  char_vocab_path = "char_vocab.english.txt"
  embeddings = [${glove_300d_filtered}, ${turian_50d}]
  lstm_size = 200
  ffnn_size = 150
  ffnn_depth = 2
  feature_size = 20
  max_mention_width = 10
  use_metadata = true
  use_features = true
  model_heads = true

  # Learning hyperparameters.
  max_gradient_norm = 5.0
  lexical_dropout_rate = 0.5
  dropout_rate = 0.2
  optimizer = adam
  learning_rate = 0.001
  decay_rate = 0.999
  decay_frequency = 100

  # Other.
  train_path = train.english.jsonlines
  eval_path = dev.english.jsonlines
  conll_eval_path = dev.english.v4_auto_conll
  genres = [bc, bn, mz, nw, pt, tc, wb]
  eval_frequency = 1000
  report_frequency = 100
  log_root = logs
  cluster = ${appositive}
}

# Multiple full models for ensembling.
best0 = ${best}
best1 = ${best}
best2 = ${best}
best3 = ${best}
best4 = ${best}

# Ablations.
glove = ${best} {
  embeddings = [${glove_300d_filtered}]
}
turian = ${best} {
  embeddings = [${turian_50d}]
}
nochar = ${best} {
  char_embedding_size = -1
}
nometa = ${best} {
  use_metadata = false
}
noheads = ${best} {
  model_heads = false
}
nofeatures = ${best} {
  use_features = false
}
best_cn = ${best} {
  embeddings = [${word2vec_300d_filtered}]
  char_vocab_path = "char_vocab.chinese.txt"
  train_path = train.chinese.jsonlines
  eval_path = dev.chinese.jsonlines
  conll_eval_path = dev.chinese.v4_auto_conll
}

# For evaluation. Do not use for training (i.e. only for decoder.py, ensembler.py, visualize.py and demo.py). Rename `best0` directory to `final`.
final = ${best} {
  embeddings = [${glove_300d}, ${turian_50d}]
  eval_path = test.english.jsonlines
  conll_eval_path = test.english.v4_gold_conll
}

best_en = ${best} {
  embeddings = [${glove_300d}, ${turian_50d}]
  eval_path = test.english.jsonlines
  conll_eval_path = test.english.v4_gold_conll
}

final_cn = ${best_cn} {
  embeddings = [${word2vec_300d}]
  eval_path = test.chinese.jsonlines
  conll_eval_path = test.chinese.v4_gold_conll
}