1
1
#! /usr/bin/env python3
2
2
3
- # These are the settings
4
- FILE = "big.txt"
5
- FILE_URL = "https://norvig.com/big.txt"
6
- WINDOW_SIZE = 100
7
- LENGTH = 1024
3
+ # https://norvig.com/big.txt
8
4
9
5
import sys
10
6
import random
11
- import os
7
+ import argparse
12
8
13
9
def tokenize_file (f , window_size ):
14
10
allzeros = "\0 " * window_size
@@ -37,7 +33,7 @@ def build_index(token_stream):
37
33
38
34
def generate_text (index , window_size ):
39
35
allzeros = "\0 " * window_size
40
- token = random .choice (index )
36
+ token = random .choice (tuple ( index . keys ()) )
41
37
while True :
42
38
nc = random .choice (index .get (token , "\0 " ))
43
39
if nc != "\0 " :
@@ -47,13 +43,17 @@ def generate_text(index, window_size):
47
43
return
48
44
49
45
def main ():
50
- try :
51
- with open (FILE ): pass
52
- except FileNotFoundError :
53
- os .system (f"curl \" { FILE_URL } \" > { FILE } " )
54
- with open (FILE ) as f :
55
- index = build_index (tokenize_file (f , WINDOW_SIZE ))
56
- for ch , _ in zip (generate_text (index , WINDOW_SIZE ), range (LENGTH )):
46
+ parser = argparse .ArgumentParser ("dissociated" )
47
+ parser .add_argument ("-w" , "--windowsize" , type = int , default = 10 , metavar = "SIZE" , help = "Length of sliding window to scan the input with." )
48
+ parser .add_argument ("-n" , "--length" , type = int , default = 1024 , metavar = "LENGTH" , help = "Number of output bytes." )
49
+ parser .add_argument ("file" , type = str , help = "File to read from or - for stdin." )
50
+ opts = parser .parse_args ()
51
+ if opts .file == "-" :
52
+ index = build_index (tokenize_file (sys .stdin , opts .windowsize ))
53
+ else :
54
+ with open (opts .file ) as f :
55
+ index = build_index (tokenize_file (f , opts .windowsize ))
56
+ for ch , _ in zip (generate_text (index , opts .windowsize ), range (opts .length )):
57
57
print (ch , end = "" , flush = True )
58
58
59
59
main ()
0 commit comments