2
2
import sparse
3
3
import os
4
4
5
- # CoordinateListFileLoader loads a file in coordinate list
6
- # format into a list of the coordinates and a list of the values.
7
- class CoordinateListFileLoader :
5
+ # TnsFileLoader loads a tensor stored in .tns format.
6
+ class TnsFileLoader :
8
7
def __init__ (self ):
9
8
pass
10
-
9
+
11
10
def load (self , path ):
12
- dims = []
13
- entries = None
14
11
coordinates = []
15
12
values = []
13
+ dims = []
16
14
first = True
17
15
with open (path , 'r' ) as f :
18
16
for line in f :
19
- # Skip lines with %, as some downloaded files have these
20
- # at the header as comments.
21
- if line .startswith ("%" ):
22
- continue
23
17
data = line .split (' ' )
24
- coords = [int (coord ) for coord in data [:len (data ) - 1 ]]
18
+ coords = [int (coord ) - 1 for coord in data [:len (data ) - 1 ]]
25
19
# TODO (rohany): What if we want this to be an integer?
26
20
value = float (data [- 1 ])
27
- # If this is the first line being read, then the read
28
- # coordinates and values are actually the size of each
29
- # dimension and the number of non-zeros.
30
21
if first :
31
- dims = coords
32
- entries = int (value )
33
22
first = False
34
- else :
35
- coordinates . append ( coords )
36
- values . append ( value )
37
- assert ( len ( coordinates ) == entries )
38
- assert ( len ( values ) == entries )
23
+ dims = [ 0 ] * len ( coords )
24
+ for i in range ( len ( coords )):
25
+ dims [ i ] = max ( dims [ i ], coords [ i ] + 1 )
26
+ coordinates . append ( coords )
27
+ values . append ( value )
39
28
return dims , coordinates , values
40
29
41
- # CoordinateListFileDumper dumps a dictionary of coordinates to values
30
+ # TnsFileDumper dumps a dictionary of coordinates to values
42
31
# into a coordinate list tensor file.
43
- class CoordinateListFileDumper :
32
+ class TnsFileDumper :
44
33
def __init__ (self ):
45
34
pass
46
35
47
36
def dump_dict_to_file (self , shape , data , path ):
48
37
# Sort the data so that the output is deterministic.
49
38
sorted_data = sorted ([list (coords ) + [value ] for coords , value in data .items ()])
50
39
with open (path , 'w+' ) as f :
51
- # Write the metadata into the file.
52
- dims = list (shape ) + [len (data )]
53
- f .write (" " .join ([str (elem ) for elem in dims ]))
54
- f .write ("\n " )
55
40
for line in sorted_data :
56
- strings = [str (elem ) for elem in line ]
41
+ coords = [str (elem + 1 ) for elem in line [:len (line ) - 1 ]]
42
+ strings = coords + [str (line [- 1 ])]
57
43
f .write (" " .join (strings ))
58
44
f .write ("\n " )
59
45
60
46
# ScipySparseTensorLoader loads a sparse tensor from a file into a
61
47
# scipy.sparse CSR matrix.
62
48
class ScipySparseTensorLoader :
63
49
def __init__ (self , format ):
64
- self .loader = CoordinateListFileLoader ()
50
+ self .loader = TnsFileLoader ()
65
51
self .format = format
66
52
67
53
def load (self , path ):
@@ -84,7 +70,7 @@ def load(self, path):
84
70
# a pydata.sparse tensor.
85
71
class PydataSparseTensorLoader :
86
72
def __init__ (self ):
87
- self .loader = CoordinateListFileLoader ()
73
+ self .loader = TnsFileLoader ()
88
74
89
75
def load (self , path ):
90
76
dims , coords , values = self .loader .load (path )
@@ -99,13 +85,13 @@ def load(self, path):
99
85
# a random tensor parameterized by the chosen shape and sparsity.
100
86
# The key itself is formatted by the dimensions, followed by the
101
87
# sparsity. For example, a 250 by 250 tensor with sparsity 0.01
102
- # would have a key of 250x250-0.01.tensor .
88
+ # would have a key of 250x250-0.01.tns .
103
89
def construct_random_tensor_key (shape , sparsity ):
104
90
# Get the path to the directory holding random tensors. Error out
105
91
# if this isn't set.
106
92
path = os .environ ['TACO_RANDOM_TENSOR_PATH' ]
107
93
dims = "x" .join ([str (dim ) for dim in shape ])
108
- key = "{}-{}.tensor " .format (dims , sparsity )
94
+ key = "{}-{}.tns " .format (dims , sparsity )
109
95
return os .path .join (path , key )
110
96
111
97
# RandomPydataSparseTensorLoader should be used to generate
@@ -126,14 +112,15 @@ def random(self, shape, sparsity):
126
112
# dump it to the output file, then return it.
127
113
result = sparse .random (shape , density = sparsity )
128
114
dok = sparse .DOK (result )
129
- CoordinateListFileDumper ().dump_dict_to_file (shape , dok .data , key )
115
+ TnsFileDumper ().dump_dict_to_file (shape , dok .data , key )
130
116
return result
131
117
132
118
# RandomScipySparseTensorLoader is the same as RandomPydataSparseTensorLoader
133
119
# but for scipy.sparse tensors.
134
120
class RandomScipySparseTensorLoader :
135
121
def __init__ (self , format ):
136
122
self .loader = ScipySparseTensorLoader (format )
123
+ self .format = format
137
124
138
125
def random (self , shape , sparsity ):
139
126
assert (len (shape ) == 2 )
@@ -143,7 +130,7 @@ def random(self, shape, sparsity):
143
130
return self .loader .load (key )
144
131
else :
145
132
# Otherwise, create and then dump a tensor.
146
- result = scipy .sparse .random (shape [0 ], shape [1 ], density = sparsity , format = 'csr' )
133
+ result = scipy .sparse .random (shape [0 ], shape [1 ], density = sparsity , format = self . format )
147
134
dok = scipy .sparse .dok_matrix (result )
148
- CoordinateListFileDumper ().dump_dict_to_file (shape , dict (dok .items ()), key )
135
+ TnsFileDumper ().dump_dict_to_file (shape , dict (dok .items ()), key )
149
136
return result
0 commit comments