/**
* vec *= mat
* 3 times
* ex) ei_vec *= weight.transpose();
*/
cublasSgemv(handle, CUBLAS_OP_T, //cublasHandle_t handle, cublasOperation_t trans,
SIZE, SIZE, //int m, int n,
&one, //const float *alpha,
cu_reset_in_mat, SIZE, //const float *A(m x n), int lda,
cu_reset, 1, //const float *x, int incx,
&one, //const float *beta,
cu_reset, 1); //float *y, int incy)
cublasSgemv(handle, CUBLAS_OP_T, //cublasHandle_t handle, cublasOperation_t trans,
SIZE, SIZE, //int m, int n,
&one, //const float *alpha,
cu_update_in_mat, SIZE, //const float *A(m x n), int lda,
cu_update, 1, //const float *x, int incx,
&one, //const float *beta,
cu_update, 1); //float *y, int incy)
cublasSgemv(handle, CUBLAS_OP_T, //cublasHandle_t handle, cublasOperation_t trans,
SIZE, SIZE, //int m, int n,
&one, //const float *alpha,
cu_quasihidden_in_mat, SIZE, //const float *A(m x n), int lda,
cu_quasihidden, 1, //const float *x, int incx,
&one, //const float *beta,
cu_quasihidden, 1); //float *y, int incy)