1
- # -*- coding: utf-8 -*-
2
- from __future__ import absolute_import
3
1
import re
4
2
import fractions
5
3
from collections import namedtuple
6
4
7
- LabelScore = namedtuple (' LabelScore' , ' match model ref precision recall f1' )
5
+ LabelScore = namedtuple (" LabelScore" , " match model ref precision recall f1" )
8
6
9
7
10
- class TrainLogParser (object ):
11
-
8
+ class TrainLogParser :
12
9
def __init__ (self ):
13
10
self .state = None
14
11
self .featgen_percent = - 2
@@ -26,31 +23,31 @@ def feed(self, line):
26
23
# if line != '\n':
27
24
self .log .append (line )
28
25
if self .state is None :
29
- self .state = ' STARTING'
26
+ self .state = " STARTING"
30
27
self .handle_STARTING (line )
31
- self .events .append ((' start' , 0 , len (self .log )))
32
- return ' start'
28
+ self .events .append ((" start" , 0 , len (self .log )))
29
+ return " start"
33
30
event = getattr (self , "handle_" + self .state )(line )
34
31
if event is not None :
35
32
start , end = self .events [- 1 ][2 ], len (self .log )
36
- if event in (' prepared' , ' optimization_end' ):
33
+ if event in (" prepared" , " optimization_end" ):
37
34
end -= 1
38
35
self .events .append ((event , start , end ))
39
36
return event
40
37
41
38
@property
42
39
def last_log (self ):
43
40
event , start , end = self .events [- 1 ]
44
- return '' .join (self .log [start :end ])
41
+ return "" .join (self .log [start :end ])
45
42
46
43
def handle_STARTING (self , line ):
47
- if line .startswith (' Feature generation' ):
48
- self .state = ' FEATGEN'
44
+ if line .startswith (" Feature generation" ):
45
+ self .state = " FEATGEN"
49
46
50
47
def handle_FEATGEN (self , line ):
51
48
if line in "0123456789.10" :
52
49
self .featgen_percent += 2
53
- return ' featgen_progress'
50
+ return " featgen_progress"
54
51
55
52
m = re .match (r"Number of features: (\d+)" , line )
56
53
if m :
@@ -59,29 +56,29 @@ def handle_FEATGEN(self, line):
59
56
60
57
if self ._seconds (line ) is not None :
61
58
self .featgen_seconds = self ._seconds (line )
62
- self .state = ' AFTER_FEATGEN'
63
- return ' featgen_end'
59
+ self .state = " AFTER_FEATGEN"
60
+ return " featgen_end"
64
61
65
62
def handle_AFTER_FEATGEN (self , line ):
66
63
if self ._iteration_head (line ) is not None :
67
- self .state = ' ITERATION'
64
+ self .state = " ITERATION"
68
65
self .handle_ITERATION (line )
69
- return ' prepared'
66
+ return " prepared"
70
67
71
- if ' terminated with error' in line :
72
- self .state = ' AFTER_ITERATION'
73
- return ' prepare_error'
68
+ if " terminated with error" in line :
69
+ self .state = " AFTER_ITERATION"
70
+ return " prepare_error"
74
71
75
72
def handle_ITERATION (self , line ):
76
73
if self ._iteration_head (line ) is not None :
77
74
self .last_iteration = {
78
- ' num' : self ._iteration_head (line ),
79
- ' scores' : {},
75
+ " num" : self ._iteration_head (line ),
76
+ " scores" : {},
80
77
}
81
78
self .iterations .append (self .last_iteration )
82
- elif line == ' \n ' :
83
- self .state = ' AFTER_ITERATION'
84
- return ' iteration'
79
+ elif line == " \n " :
80
+ self .state = " AFTER_ITERATION"
81
+ return " iteration"
85
82
86
83
def add_re (key , pattern , typ ):
87
84
m = re .match (pattern , line )
@@ -96,71 +93,81 @@ def add_re(key, pattern, typ):
96
93
add_re ("linesearch_step" , r"Line search step: (\d+\.\d+)" , float )
97
94
add_re ("time" , r"Seconds required for this iteration: (\d+\.\d+)" , float )
98
95
99
- m = re .match (r"Macro-average precision, recall, F1: \((\d\.\d+), (\d\.\d+), (\d\.\d+)\)" , line )
96
+ m = re .match (
97
+ r"Macro-average precision, recall, F1: \((\d\.\d+), (\d\.\d+), (\d\.\d+)\)" ,
98
+ line ,
99
+ )
100
100
if m :
101
- self .last_iteration [' avg_precision' ] = float (m .group (1 ))
102
- self .last_iteration [' avg_recall' ] = float (m .group (2 ))
103
- self .last_iteration [' avg_f1' ] = float (m .group (3 ))
101
+ self .last_iteration [" avg_precision" ] = float (m .group (1 ))
102
+ self .last_iteration [" avg_recall" ] = float (m .group (2 ))
103
+ self .last_iteration [" avg_f1" ] = float (m .group (3 ))
104
104
105
105
m = re .match (r"Item accuracy: (\d+) / (\d+)" , line )
106
106
if m :
107
107
acc = fractions .Fraction (int (m .group (1 )), int (m .group (2 )))
108
- self .last_iteration [' item_accuracy' ] = acc
109
- self .last_iteration [' item_accuracy_float' ] = float (acc )
108
+ self .last_iteration [" item_accuracy" ] = acc
109
+ self .last_iteration [" item_accuracy_float" ] = float (acc )
110
110
111
111
m = re .match (r"Instance accuracy: (\d+) / (\d+)" , line )
112
112
if m :
113
113
acc = fractions .Fraction (int (m .group (1 )), int (m .group (2 )))
114
- self .last_iteration [' instance_accuracy' ] = acc
115
- self .last_iteration [' instance_accuracy_float' ] = float (acc )
114
+ self .last_iteration [" instance_accuracy" ] = acc
115
+ self .last_iteration [" instance_accuracy_float" ] = float (acc )
116
116
117
- m = re .match (r"\s{4}(.+): \((\d+), (\d+), (\d+)\) \((\d\.\d+), (\d\.\d+), (\d\.\d+)\)" , line )
117
+ m = re .match (
118
+ r"\s{4}(.+): \((\d+), (\d+), (\d+)\) \((\d\.\d+), (\d\.\d+), (\d\.\d+)\)" ,
119
+ line ,
120
+ )
118
121
if m :
119
- self .last_iteration ['scores' ][m .group (1 )] = LabelScore (** {
120
- 'match' : int (m .group (2 )),
121
- 'model' : int (m .group (3 )),
122
- 'ref' : int (m .group (4 )),
123
- 'precision' : float (m .group (5 )),
124
- 'recall' : float (m .group (6 )),
125
- 'f1' : float (m .group (7 )),
126
- })
122
+ self .last_iteration ["scores" ][m .group (1 )] = LabelScore (
123
+ ** {
124
+ "match" : int (m .group (2 )),
125
+ "model" : int (m .group (3 )),
126
+ "ref" : int (m .group (4 )),
127
+ "precision" : float (m .group (5 )),
128
+ "recall" : float (m .group (6 )),
129
+ "f1" : float (m .group (7 )),
130
+ }
131
+ )
127
132
128
133
m = re .match (r"\s{4}(.+): \(0, 0, 0\) \(\*{6}, \*{6}, \*{6}\)" , line )
129
134
if m :
130
- self .last_iteration ['scores' ][m .group (1 )] = LabelScore (** {
131
- 'match' : 0 ,
132
- 'model' : 0 ,
133
- 'ref' : 0 ,
134
- 'precision' : None ,
135
- 'recall' : None ,
136
- 'f1' : None ,
137
- })
135
+ self .last_iteration ["scores" ][m .group (1 )] = LabelScore (
136
+ ** {
137
+ "match" : 0 ,
138
+ "model" : 0 ,
139
+ "ref" : 0 ,
140
+ "precision" : None ,
141
+ "recall" : None ,
142
+ "f1" : None ,
143
+ }
144
+ )
138
145
139
146
def handle_AFTER_ITERATION (self , line ):
140
147
if self ._iteration_head (line ) is not None :
141
- self .state = ' ITERATION'
148
+ self .state = " ITERATION"
142
149
return self .handle_ITERATION (line )
143
150
144
151
m = re .match (r"Total seconds required for training: (\d+\.\d+)" , line )
145
152
if m :
146
153
self .training_seconds = float (m .group (1 ))
147
154
148
- if line .startswith (' Storing the model' ):
149
- self .state = ' STORING'
150
- return ' optimization_end'
155
+ if line .startswith (" Storing the model" ):
156
+ self .state = " STORING"
157
+ return " optimization_end"
151
158
152
159
def handle_STORING (self , line ):
153
- if line == ' \n ' :
154
- return ' end'
160
+ if line == " \n " :
161
+ return " end"
155
162
elif self ._seconds (line ):
156
163
self .storing_seconds = self ._seconds (line )
157
164
158
165
def _iteration_head (self , line ):
159
- m = re .match (r' \*{5} (?:Iteration|Epoch) #(\d+) \*{5}\n' , line )
166
+ m = re .match (r" \*{5} (?:Iteration|Epoch) #(\d+) \*{5}\n" , line )
160
167
if m :
161
168
return int (m .group (1 ))
162
169
163
170
def _seconds (self , line ):
164
- m = re .match (r' Seconds required: (\d+\.\d+)' , line )
171
+ m = re .match (r" Seconds required: (\d+\.\d+)" , line )
165
172
if m :
166
173
return float (m .group (1 ))
0 commit comments