Zero-coder
diff --git a/‎exp/exp_main.py
+1-2 b/‎exp/exp_main.py
+1-2
diff --git a/‎layers/AutoCorrelation.py
+1-75 b/‎layers/AutoCorrelation.py
+1-75
diff --git a/‎layers/FourierCorrelation.py
+18-48 b/‎layers/FourierCorrelation.py
+18-48
@@ -1,7 +1,6 @@
 from data_provider.data_factory import data_provider
 from exp.exp_basic import Exp_Basic
-from models import FEDformer, Informer, Autoformer, Transformer # Logformer, Reformer,Transformer_sin,Autoformer_sin
-# from models.reformer_pytorch.reformer_pytorch import Reformer
+from models import FEDformer, Autoformer, Informer, Transformer
 from utils.tools import EarlyStopping, adjust_learning_rate, visual
 from utils.metrics import metric
 
 
@@ -1,13 +1,8 @@
 import time
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
-import matplotlib.pyplot as plt
 import numpy as np
 import math
-from math import sqrt
-import os
-# from pytorch_wavelets import DWTForward, DWTInverse, DWT1DForward, DWT1DInverse
 from torch.nn.functional import interpolate
 
 
@@ -38,36 +33,6 @@ def __init__(self, mask_flag=True, factor=1, scale=None, attention_dropout=0.1,
         self.dropout = nn.Dropout(attention_dropout)
         self.agg = None
         self.use_wavelet = configs.wavelet
-        # if self.use_wavelet:
-        #     J = 3
-        #     self.dwt1d = DWT1DForward(J=J, wave='db4')
-        #     self.dwt1div = DWT1DInverse(wave='db4')
-        #     self.j_list = [1, 2, 4, 8, 8]
-        #     print('DWTCorrelation used, J={}, j_list={}'.format(J, self.j_list))
-
-    # @decor_time
-    def time_delay_agg_mzq(self, values, corr):
-        head = values.shape[1]
-        channel = values.shape[2]
-        length = values.shape[3]
-        S = length
-        #  # else:
-        values = values.transpose(2, 3)
-        corr = corr.transpose(2, 3)
-        top_k = int(round(self.factor * np.log(S)))
-        # Rk = Rk.real
-        # if version == 3:
-        # V.size = [B, S, h]
-        # S = V.shape[1]
-        V_broad = torch.cat((values, values), dim=-2)  # size=[B, H, 2*S, h]
-        V_rolled = V_broad.unfold(-2, S, 1)  # size=[B, H, S+1, h, S]
-        # Rk.size = [B, S, h]
-        Rk_kthsmallest = torch.kthvalue(corr, k=S - top_k, dim=-2, keepdim=True)  # size=[B, H, 1, h]
-        mask = corr > torch.repeat_interleave(Rk_kthsmallest[0], repeats=S, dim=-2)
-        corr = torch.softmax(corr * mask, dim=-1)  # size = [B, H, S, h]
-        output = torch.einsum('beshi,besh->beih', V_rolled[:, :, 1:, :], corr)  # .transpose(1, 2)
-        # [B, H, S+1, h, S] * [B, H, S, h]
-        return output.transpose(2, 3)  # size=[batch, seq_len, h_dim]
 
     # @decor_time
     def time_delay_agg_training(self, values, corr):
@@ -166,8 +131,6 @@ def forward(self, queries, keys, values, attn_mask):
                 keys = keys.reshape([B, L, -1])
                 Ql, Qh_list = self.dwt1d(queries.transpose(1, 2))  # [B, H*D, L]
                 Kl, Kh_list = self.dwt1d(keys.transpose(1, 2))
-                # n = queries.shape[1]
-                # B = queries.shape[0]
                 qs = [queries.transpose(1, 2)] + Qh_list + [Ql]  # [B, H*D, L]
                 ks = [keys.transpose(1, 2)] + Kh_list + [Kl]
                 q_list = []
@@ -186,16 +149,11 @@ def forward(self, queries, keys, values, attn_mask):
 
             # time delay agg
             if self.training:
-                # if self.agg == 'thuml':
                 V = self.time_delay_agg_training(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2)  # [B, L, H, E], [B, H, E, L] -> [B, L, H, E]
-                # elif self.agg == 'mzq':
-                #     V = self.time_delay_agg_mzq(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2)
             else:
                 V = self.time_delay_agg_inference(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2)
-
         else:
             V_list = []
-            j_list = self.j_list
             queries = queries.reshape([B, L, -1])
             keys = keys.reshape([B, L, -1])
             values = values.reshape([B, L, -1])
@@ -261,36 +219,4 @@ def forward(self, queries, keys, values, attn_mask):
         )
 
         out = out.view(B, L, -1)
-        return self.out_projection(out), attn
-
-
-if __name__ == '__main__':
-    class Configs(object):
-        wavelet = 2
-
-    configs = Configs()
-    B = 3
-    H = 2
-    S = 240
-    d = 16
-    x = torch.randn([B, S, H, d])
-    model1 = AutoCorrelation(configs=configs)
-    model1.training = 1
-    model1.factor = 3
-    # model1.agg = 'thuml'
-    #
-    # model2 = AutoCorrelation()
-    # model2.training = 1
-    # model2.factor = 3
-    # model2.agg = 'mzq'
-    out1 = model1.forward(x, x, x, 1)
-    # out2 = model2.forward(x, x, x, 1)
-    # diff = out1[0] - out2[0]
-
-    # for S in 96, 480, 2400:
-    #     print('========{}========='.format(S))
-    #     x = torch.randn([B, S, H, d])
-    #     for i in range(0, 3):
-    #         out1 = model1.forward(x, x, x, 1)
-    #         out2 = model2.forward(x, x, x, 1)
-    a = 1
+        return self.out_projection(out), attn
@@ -5,12 +5,14 @@
 import numpy as np
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
-from torch.nn.parameter import Parameter
-from utils.masking import LocalMask
 
 
 def get_frequency_modes(seq_len, modes=64, mode_select_method='random'):
+    """
+    get modes on frequency domain:
+    'random' means sampling randomly;
+    'else' means sampling the lowest modes;
+    """
     modes = min(modes, seq_len//2)
     if mode_select_method == 'random':
         index = list(range(0, seq_len // 2))
@@ -28,8 +30,10 @@ def __init__(self, in_channels, out_channels, seq_len, modes=0, mode_select_meth
         super(FourierBlock, self).__init__()
         print('fourier enhanced block used!')
         """
-        1D Fourier layer. It does FFT, linear transform, and Inverse FFT.    
+        1D Fourier block. It performs representation learning on frequency domain, 
+        it does FFT, linear transform, and Inverse FFT.    
         """
+        # get modes on frequency domain
         self.index = get_frequency_modes(seq_len, modes=modes, mode_select_method=mode_select_method)
         print('modes={}, index={}'.format(modes, self.index))
 
@@ -44,63 +48,32 @@ def compl_mul1d(self, input, weights):
 
     def forward(self, q, k, v, mask):
         # size = [B, L, H, E]
-        k = k
-        v = v
-        mask = mask
         B, L, H, E = q.shape
         x = q.permute(0, 2, 3, 1)
-        # batchsize = B
-        # Compute Fourier coeffcients up to factor of e^(- something constant)
+        # Compute Fourier coefficients
         x_ft = torch.fft.rfft(x, dim=-1)
-        #out_ft = torch.zeros(B, H, E, L // 2 + 1, device=x.device, dtype=torch.cfloat)
-#         if len(self.index)==0:
-#             out_ft = torch.zeros(B, H, E, L // 2 + 1, device=x.device, dtype=torch.cfloat)
-#         else:
-#             out_ft = torch.zeros(B, H, E, len(self.index), device=x.device, dtype=torch.cfloat)
+        # Perform Fourier neural operations
         out_ft = torch.zeros(B, H, E, L // 2 + 1, device=x.device, dtype=torch.cfloat)
-
-        # Multiply relevant Fourier modes
-        # 取guided modes的版本
-        # print('x shape',x.shape)
-        # print('out_ft shape',out_ft.shape)
-        # print('x_ft shape',x_ft.shape)
-        # print('weight shape',self.weights1.shape)
-        # print('self index',self.index)
         for wi, i in enumerate(self.index):
             out_ft[:, :, :, wi] = self.compl_mul1d(x_ft[:, :, :, i], self.weights1[:, :, :, wi])
-
-        # 取topk的modes版本
-        # topk = torch.topk(torch.sum(x_ft, dim=[0, 1, 2]).abs(), dim=-1, k=self.modes1)
-        # energy = (topk[0]**2).sum()
-        # energy90 = 0
-        # for index, j in enumerate(topk[0]):
-        #     energy90 += j**2
-        #     if energy90 >= energy * 0.9:
-        #         break
-        # for i in topk[1][:index]:
-        #     out_ft[:, :, :, i] = self.compl_mul1d(x_ft[:, :, :, i], self.weights1[:, :, :, i])
-
-        # Return to physical space
+        # Return to time domain
         x = torch.fft.irfft(out_ft, n=x.size(-1))
-        #max_len = min(720,x.size(-1))
-        #x = torch.fft.irfft(out_ft, n=max_len)
-        # size = [B, L, H, E]
         return (x, None)
 
 
-# ########## Cross Fourier Former ####################
+# ########## Fourier Cross Former ####################
 class FourierCrossAttention(nn.Module):
     def __init__(self, in_channels, out_channels, seq_len_q, seq_len_kv, modes=64, mode_select_method='random',
                  activation='tanh', policy=0):
         super(FourierCrossAttention, self).__init__()
         print(' fourier enhanced cross attention used!')
-
         """
-        1D Fourier layer. It does FFT, linear transform, and Inverse FFT.    
+        1D Fourier Cross Attention layer. It does FFT, linear transform, attention mechanism and Inverse FFT.    
         """
         self.activation = activation
         self.in_channels = in_channels
         self.out_channels = out_channels
+        # get modes for queries and keys (& values) on frequency domain
         self.index_q = get_frequency_modes(seq_len_q, modes=modes, mode_select_method=mode_select_method)
         self.index_kv = get_frequency_modes(seq_len_kv, modes=modes, mode_select_method=mode_select_method)
 
@@ -118,23 +91,22 @@ def compl_mul1d(self, input, weights):
 
     def forward(self, q, k, v, mask):
         # size = [B, L, H, E]
-        mask = mask
         B, L, H, E = q.shape
-        xq = q.permute(0, 2, 3, 1) # size = [B, H, E, L]
+        xq = q.permute(0, 2, 3, 1)  # size = [B, H, E, L]
         xk = k.permute(0, 2, 3, 1)
         xv = v.permute(0, 2, 3, 1)
 
-        # Compute Fourier coeffcients up to factor of e^(- something constant)
+        # Compute Fourier coefficients
         xq_ft_ = torch.zeros(B, H, E, len(self.index_q)+1, device=xq.device, dtype=torch.cfloat)
         xq_ft = torch.fft.rfft(xq, dim=-1)
         for i, j in enumerate(self.index_q):
             xq_ft_[:, :, :, i] = xq_ft[:, :, :, j]
-
         xk_ft_ = torch.zeros(B, H, E, len(self.index_kv), device=xq.device, dtype=torch.cfloat)
         xk_ft = torch.fft.rfft(xk, dim=-1)
         for i, j in enumerate(self.index_kv):
             xk_ft_[:, :, :, i] = xk_ft[:, :, :, j]
 
+        # perform attention mechanism on frequency domain
         xqk_ft = (torch.einsum("bhex,bhey->bhxy", xq_ft_, xk_ft_))
         if self.activation == 'tanh':
             xqk_ft = xqk_ft.tanh()
@@ -143,15 +115,13 @@ def forward(self, q, k, v, mask):
             xqk_ft = torch.complex(xqk_ft, torch.zeros_like(xqk_ft))
         else:
             raise Exception('{} actiation function is not implemented'.format(self.activation))
-
         xqkv_ft = torch.einsum("bhxy,bhey->bhex", xqk_ft, xk_ft_)
         xqkvw = torch.einsum("bhex,heox->bhox", xqkv_ft, self.weights1)
         out_ft = torch.zeros(B, H, E, L // 2 + 1, device=xq.device, dtype=torch.cfloat)
         for i, j in enumerate(self.index_q):
             out_ft[:, :, :, j] = xqkvw[:, :, :, i]
+        # Return to time domain
         out = torch.fft.irfft(out_ft / self.in_channels / self.out_channels, n=xq.size(-1))
-        # size = [B, L, H, E]
-
         return (out, None)