常用blas函数-创新互联

Y=alpha * X +beta*Y 常用blas函数
template <>
void caffe_cpu_axpby(const int N, const float alpha, const float* X,
const float beta, float* Y) {
  cblas_saxpby(N, alpha, X,1, beta, Y, 1);
}

template<>
void caffe_cpu_axpby(const int N, const double alpha, const double* X,
const double beta, double* Y) {
  cblas_daxpby(N, alpha, X,1, beta, Y, 1);
}

cblas_dscal(N, beta, Y, incY);  Y=Y*beta
cblas_daxpy(N, alpha, X, incX, Y, incY);  Y= (alpha * X) + Y)

创新互联专注于成都网站设计、成都网站建设、网页设计、网站制作、网站开发。公司秉持“客户至上,用心服务”的宗旨,从客户的利益和观点出发,让客户在网络营销中找到自己的驻足之地。尊重和关怀每一位客户,用严谨的态度对待客户,用专业的服务创造价值,成为客户值得信赖的朋友,为客户解除后顾之忧。

Y=alpha * X + Y

template <>
void caffe_axpy(const int N, const float alpha, const float* X,
float* Y) { cblas_saxpy(N, alpha, X, 1, Y, 1); }

template<>
void caffe_axpy(const int N, const double alpha, const double* X,
double* Y) { cblas_daxpy(N, alpha, X, 1, Y, 1); }
DEFINE_VSL_BINARY_FUNC(Add, y[i] = a[i] + b[i]);
DEFINE_VSL_BINARY_FUNC(Sub, y[i]= a[i] - b[i]);
DEFINE_VSL_BINARY_FUNC(Mul, y[i]= a[i] * b[i]);
DEFINE_VSL_BINARY_FUNC(Div, y[i]= a[i] / b[i]);


template<>
void caffe_add(const int n, const float* a, const float* b,
float* y) {
vsAdd(n, a, b, y);
}

template<>
void caffe_add(const int n, const double* a, const double* b,
double* y) {
vdAdd(n, a, b, y);
}

y=x;

template <>
void caffe_copy(const int N, const float* X, float* Y) {
  cblas_scopy(N, X,1, Y, 1);
}

template<>
void caffe_copy(const int N, const double* X, double* Y) {
  cblas_dcopy(N, X,1, Y, 1);
}

template<>
void caffe_gpu_copy(const int N, const float* X, float* Y) {
  CUBLAS_CHECK(cublasScopy(Caffe::cublas_handle(), N, X,1, Y, 1));
}

template<>
void caffe_gpu_copy(const int N, const double* X, double* Y) {
  CUBLAS_CHECK(cublasDcopy(Caffe::cublas_handle(), N, X,1, Y, 1));
}

Computes alpha*x*y' + A.

cblas_sger
Multiplies vector X by the transform of vector Y, then adds matrix A (single precison).

Multiplies vector X by the transform of vector Y, then adds matrix A (single precison).
void cblas_sger (
const enum CBLAS_ORDER Order,
const int M,
const int N,
const float alpha,
const float *X,
const int incX,
const float *Y,
const int incY,
float *A,
const int lda
);

Y(vetor)←αAX + βY

This function multiplies A * X (after transposing A, if needed) and multiplies the resulting matrix by alpha.
It then multiplies vector Y by beta. It stores the sum of these two products in vector Y.

template <>
void caffe_cpu_gemv(const CBLAS_TRANSPOSE TransA, const int M,
const int N, const float alpha, const float* A, const float* x,
const float beta, float* y) {
  cblas_sgemv(CblasRowMajor, TransA, M, N, alpha, A, N, x,1, beta, y, 1);
}

C(matrix)←αAB + βC

template
void gpu_multmat(T* A, T* B, T* C, int M,int K,int N){
const T alpha = 1,beta=0;
     caffe_gpu_gemm(CblasNoTrans,CblasNoTrans,M,N,K,alpha,A,B,beta,C);
}
template<>
void caffe_cpu_gemm(const CBLAS_TRANSPOSE TransA,
const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
const float alpha, const float* A, const float* B, const float beta,
float* C) {
int lda = (TransA == CblasNoTrans) ? K : M;
int ldb = (TransB == CblasNoTrans) ? N : K;
  cblas_sgemm(CblasRowMajor, TransA, TransB, M, N, K, alpha, A, lda, B,
      ldb, beta, C, N);
}
A=M*N  B=M*K
C=A'*B N M K

template void cpu_multTmat(T* A, T* B, T* C, int M,int K,int N){ const T alpha = 1,beta=0; caffe_cpu_gemm(CblasTrans,CblasNoTrans,M,N,K,alpha,A,B,beta,C); // cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, A, M, B, K, beta, C, M);}
A=M*N B=N*K
C=A*B   M N K

template void cpu_multmat(T* A, T* B, T* C, int M,int K,int N){ const T alpha = 1,beta=0; caffe_cpu_gemm(CblasNoTrans,CblasNoTrans,M,N,K,alpha,A,B,beta,C); // cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, A, M, B, K, beta, C, M);}

当前标题:常用blas函数-创新互联
分享路径:http://scyanting.com/article/djhscj.html