Skip to content

Commit e6d1d0c

Browse files
authored
Merge branch 'master' into fix-gcc14-build
2 parents df2857f + c6d25ad commit e6d1d0c

18 files changed

+964
-27491
lines changed

.flake8

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[flake8]
2+
max-line-length=160
3+
extend-ignore = E203

.github/workflows/build_and_upload.yml

+2-8
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,9 @@ jobs:
2020
with:
2121
submodules: true
2222
- name: "Build wheels"
23-
uses: "pypa/cibuildwheel@v2.16.2"
23+
uses: "pypa/cibuildwheel@v2.21.1"
2424
env:
2525
CIBW_SKIP: "pp*" # FIXME
26-
CIBW_BEFORE_BUILD: "pip install -U cython && ./update_cpp.sh"
27-
CIBW_BEFORE_BUILD_WINDOWS: "pip install -U cython && update_cpp.sh"
2826
CIBW_TEST_REQUIRES: "pytest"
2927
CIBW_TEST_COMMAND: "pytest {project}/tests --doctest-modules"
3028
- uses: "actions/upload-artifact@v3"
@@ -38,10 +36,6 @@ jobs:
3836
- uses: "actions/checkout@v4"
3937
with:
4038
submodules: true
41-
- name: "Install dependencies"
42-
run: "python -m pip install --upgrade cython"
43-
- name: "Rebuild CPP files using Cython"
44-
run: "./update_cpp.sh"
4539
- name: "Build source distribution"
4640
run: "pipx run build --sdist"
4741
- uses: "actions/upload-artifact@v3"
@@ -56,7 +50,7 @@ jobs:
5650
- build_wheels
5751
- make_sdist
5852
steps:
59-
- uses: "actions/download-artifact@v3"
53+
- uses: "actions/download-artifact@v4.1.7"
6054
with:
6155
name: artifact
6256
path: dist

.github/workflows/tests.yml

-4
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,6 @@ jobs:
3333
python -m site
3434
python -m pip install --upgrade pip setuptools wheel
3535
python -m pip install --upgrade virtualenv tox tox-gh-actions
36-
python -m pip install --upgrade cython
37-
38-
- name: "Rebuild CPP files using Cython"
39-
run: "./update_cpp.sh"
4036
4137
- name: "Run tox targets for ${{ matrix.python-version }}"
4238
run: "python -m tox"

README.rst

-8
Original file line numberDiff line numberDiff line change
@@ -50,14 +50,6 @@ Contributing
5050

5151
Feel free to submit ideas, bugs reports, pull requests or regular patches.
5252

53-
In order to run tests, install Cython_ (> 0.24.1) and tox_, then type
54-
55-
::
56-
57-
./update_cpp.sh; tox
58-
59-
from the source checkout.
60-
6153
Please don't commit generated cpp files in the same commit as other files.
6254

6355
.. _Cython: http://cython.org/

pycrfsuite/__init__.py

-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1 @@
1-
from __future__ import absolute_import
21
from ._pycrfsuite import *

pycrfsuite/_dumpparser.py

+9-8
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
1-
# -*- coding: utf-8 -*-
2-
from __future__ import absolute_import
31
import re
42

53

6-
class ParsedDump(object):
4+
class ParsedDump:
75
"""
86
CRFsuite model parameters. Objects of this type are returned by
97
:meth:`pycrfsuite.Tagger.info()` method.
@@ -27,6 +25,7 @@ class ParsedDump(object):
2725
``{name: internal_id}`` dict with known attributes
2826
2927
"""
28+
3029
def __init__(self):
3130
self.header = {}
3231
self.labels = {}
@@ -35,7 +34,7 @@ def __init__(self):
3534
self.state_features = {}
3635

3736

38-
class CRFsuiteDumpParser(object):
37+
class CRFsuiteDumpParser:
3938
"""
4039
A hack: parser for `crfsuite dump` results.
4140
@@ -49,17 +48,19 @@ def __init__(self):
4948

5049
def feed(self, line):
5150
# Strip initial ws and line terminator, but allow for ws at the end of feature names.
52-
line = line.lstrip().rstrip('\r\n')
51+
line = line.lstrip().rstrip("\r\n")
5352
if not line:
5453
return
5554

56-
m = re.match(r"(FILEHEADER|LABELS|ATTRIBUTES|TRANSITIONS|STATE_FEATURES) = {", line)
55+
m = re.match(
56+
r"(FILEHEADER|LABELS|ATTRIBUTES|TRANSITIONS|STATE_FEATURES) = {", line
57+
)
5758
if m:
5859
self.state = m.group(1)
59-
elif line == '}':
60+
elif line == "}":
6061
self.state = None
6162
else:
62-
getattr(self, 'parse_%s' % self.state)(line)
63+
getattr(self, "parse_%s" % self.state)(line)
6364

6465
def parse_FILEHEADER(self, line):
6566
m = re.match(r"(\w+): (.*)", line)

pycrfsuite/_logparser.py

+65-58
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,11 @@
1-
# -*- coding: utf-8 -*-
2-
from __future__ import absolute_import
31
import re
42
import fractions
53
from collections import namedtuple
64

7-
LabelScore = namedtuple('LabelScore', 'match model ref precision recall f1')
5+
LabelScore = namedtuple("LabelScore", "match model ref precision recall f1")
86

97

10-
class TrainLogParser(object):
11-
8+
class TrainLogParser:
129
def __init__(self):
1310
self.state = None
1411
self.featgen_percent = -2
@@ -26,31 +23,31 @@ def feed(self, line):
2623
# if line != '\n':
2724
self.log.append(line)
2825
if self.state is None:
29-
self.state = 'STARTING'
26+
self.state = "STARTING"
3027
self.handle_STARTING(line)
31-
self.events.append(('start', 0, len(self.log)))
32-
return 'start'
28+
self.events.append(("start", 0, len(self.log)))
29+
return "start"
3330
event = getattr(self, "handle_" + self.state)(line)
3431
if event is not None:
3532
start, end = self.events[-1][2], len(self.log)
36-
if event in ('prepared', 'optimization_end'):
33+
if event in ("prepared", "optimization_end"):
3734
end -= 1
3835
self.events.append((event, start, end))
3936
return event
4037

4138
@property
4239
def last_log(self):
4340
event, start, end = self.events[-1]
44-
return ''.join(self.log[start:end])
41+
return "".join(self.log[start:end])
4542

4643
def handle_STARTING(self, line):
47-
if line.startswith('Feature generation'):
48-
self.state = 'FEATGEN'
44+
if line.startswith("Feature generation"):
45+
self.state = "FEATGEN"
4946

5047
def handle_FEATGEN(self, line):
5148
if line in "0123456789.10":
5249
self.featgen_percent += 2
53-
return 'featgen_progress'
50+
return "featgen_progress"
5451

5552
m = re.match(r"Number of features: (\d+)", line)
5653
if m:
@@ -59,29 +56,29 @@ def handle_FEATGEN(self, line):
5956

6057
if self._seconds(line) is not None:
6158
self.featgen_seconds = self._seconds(line)
62-
self.state = 'AFTER_FEATGEN'
63-
return 'featgen_end'
59+
self.state = "AFTER_FEATGEN"
60+
return "featgen_end"
6461

6562
def handle_AFTER_FEATGEN(self, line):
6663
if self._iteration_head(line) is not None:
67-
self.state = 'ITERATION'
64+
self.state = "ITERATION"
6865
self.handle_ITERATION(line)
69-
return 'prepared'
66+
return "prepared"
7067

71-
if 'terminated with error' in line:
72-
self.state = 'AFTER_ITERATION'
73-
return 'prepare_error'
68+
if "terminated with error" in line:
69+
self.state = "AFTER_ITERATION"
70+
return "prepare_error"
7471

7572
def handle_ITERATION(self, line):
7673
if self._iteration_head(line) is not None:
7774
self.last_iteration = {
78-
'num': self._iteration_head(line),
79-
'scores': {},
75+
"num": self._iteration_head(line),
76+
"scores": {},
8077
}
8178
self.iterations.append(self.last_iteration)
82-
elif line == '\n':
83-
self.state = 'AFTER_ITERATION'
84-
return 'iteration'
79+
elif line == "\n":
80+
self.state = "AFTER_ITERATION"
81+
return "iteration"
8582

8683
def add_re(key, pattern, typ):
8784
m = re.match(pattern, line)
@@ -96,71 +93,81 @@ def add_re(key, pattern, typ):
9693
add_re("linesearch_step", r"Line search step: (\d+\.\d+)", float)
9794
add_re("time", r"Seconds required for this iteration: (\d+\.\d+)", float)
9895

99-
m = re.match(r"Macro-average precision, recall, F1: \((\d\.\d+), (\d\.\d+), (\d\.\d+)\)", line)
96+
m = re.match(
97+
r"Macro-average precision, recall, F1: \((\d\.\d+), (\d\.\d+), (\d\.\d+)\)",
98+
line,
99+
)
100100
if m:
101-
self.last_iteration['avg_precision'] = float(m.group(1))
102-
self.last_iteration['avg_recall'] = float(m.group(2))
103-
self.last_iteration['avg_f1'] = float(m.group(3))
101+
self.last_iteration["avg_precision"] = float(m.group(1))
102+
self.last_iteration["avg_recall"] = float(m.group(2))
103+
self.last_iteration["avg_f1"] = float(m.group(3))
104104

105105
m = re.match(r"Item accuracy: (\d+) / (\d+)", line)
106106
if m:
107107
acc = fractions.Fraction(int(m.group(1)), int(m.group(2)))
108-
self.last_iteration['item_accuracy'] = acc
109-
self.last_iteration['item_accuracy_float'] = float(acc)
108+
self.last_iteration["item_accuracy"] = acc
109+
self.last_iteration["item_accuracy_float"] = float(acc)
110110

111111
m = re.match(r"Instance accuracy: (\d+) / (\d+)", line)
112112
if m:
113113
acc = fractions.Fraction(int(m.group(1)), int(m.group(2)))
114-
self.last_iteration['instance_accuracy'] = acc
115-
self.last_iteration['instance_accuracy_float'] = float(acc)
114+
self.last_iteration["instance_accuracy"] = acc
115+
self.last_iteration["instance_accuracy_float"] = float(acc)
116116

117-
m = re.match(r"\s{4}(.+): \((\d+), (\d+), (\d+)\) \((\d\.\d+), (\d\.\d+), (\d\.\d+)\)", line)
117+
m = re.match(
118+
r"\s{4}(.+): \((\d+), (\d+), (\d+)\) \((\d\.\d+), (\d\.\d+), (\d\.\d+)\)",
119+
line,
120+
)
118121
if m:
119-
self.last_iteration['scores'][m.group(1)] = LabelScore(**{
120-
'match': int(m.group(2)),
121-
'model': int(m.group(3)),
122-
'ref': int(m.group(4)),
123-
'precision': float(m.group(5)),
124-
'recall': float(m.group(6)),
125-
'f1': float(m.group(7)),
126-
})
122+
self.last_iteration["scores"][m.group(1)] = LabelScore(
123+
**{
124+
"match": int(m.group(2)),
125+
"model": int(m.group(3)),
126+
"ref": int(m.group(4)),
127+
"precision": float(m.group(5)),
128+
"recall": float(m.group(6)),
129+
"f1": float(m.group(7)),
130+
}
131+
)
127132

128133
m = re.match(r"\s{4}(.+): \(0, 0, 0\) \(\*{6}, \*{6}, \*{6}\)", line)
129134
if m:
130-
self.last_iteration['scores'][m.group(1)] = LabelScore(**{
131-
'match': 0,
132-
'model': 0,
133-
'ref': 0,
134-
'precision': None,
135-
'recall': None,
136-
'f1': None,
137-
})
135+
self.last_iteration["scores"][m.group(1)] = LabelScore(
136+
**{
137+
"match": 0,
138+
"model": 0,
139+
"ref": 0,
140+
"precision": None,
141+
"recall": None,
142+
"f1": None,
143+
}
144+
)
138145

139146
def handle_AFTER_ITERATION(self, line):
140147
if self._iteration_head(line) is not None:
141-
self.state = 'ITERATION'
148+
self.state = "ITERATION"
142149
return self.handle_ITERATION(line)
143150

144151
m = re.match(r"Total seconds required for training: (\d+\.\d+)", line)
145152
if m:
146153
self.training_seconds = float(m.group(1))
147154

148-
if line.startswith('Storing the model'):
149-
self.state = 'STORING'
150-
return 'optimization_end'
155+
if line.startswith("Storing the model"):
156+
self.state = "STORING"
157+
return "optimization_end"
151158

152159
def handle_STORING(self, line):
153-
if line == '\n':
154-
return 'end'
160+
if line == "\n":
161+
return "end"
155162
elif self._seconds(line):
156163
self.storing_seconds = self._seconds(line)
157164

158165
def _iteration_head(self, line):
159-
m = re.match(r'\*{5} (?:Iteration|Epoch) #(\d+) \*{5}\n', line)
166+
m = re.match(r"\*{5} (?:Iteration|Epoch) #(\d+) \*{5}\n", line)
160167
if m:
161168
return int(m.group(1))
162169

163170
def _seconds(self, line):
164-
m = re.match(r'Seconds required: (\d+\.\d+)', line)
171+
m = re.match(r"Seconds required: (\d+\.\d+)", line)
165172
if m:
166173
return float(m.group(1))

0 commit comments

Comments
 (0)