博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
caffe中各种cblas的函数使用总结
阅读量:4574 次
发布时间:2019-06-08

本文共 4182 字,大约阅读时间需要 13 分钟。

转来的,来自:http://www.cnblogs.com/huashiyiqike/p/3886670.html

 

总结的很赞,转到这里,留一下笔记。感觉cblas的函数名字很好记的,试着去找过源代码,但是是fortran的,我当时写过的那些fortran程序早忘记了。

 

 

 

 

Y=alpha * X +beta*Y 

 

 

Y=alpha * X +beta*Y template <>void caffe_cpu_axpby
(const int N, const float alpha, const float* X, const float beta, float* Y) { cblas_saxpby(N, alpha, X, 1, beta, Y, 1);}template <>void caffe_cpu_axpby
(const int N, const double alpha, const double* X, const double beta, double* Y) { cblas_daxpby(N, alpha, X, 1, beta, Y, 1);} cblas_dscal(N, beta, Y, incY); Y=Y*beta cblas_daxpy(N, alpha, X, incX, Y, incY); Y= (alpha * X) + Y)

  

 

 

Y=alpha * X + Y 

 

template <>void caffe_axpy
(const int N, const float alpha, const float* X, float* Y) { cblas_saxpy(N, alpha, X, 1, Y, 1); }template <>void caffe_axpy
(const int N, const double alpha, const double* X, double* Y) { cblas_daxpy(N, alpha, X, 1, Y, 1); }

  

DEFINE_VSL_BINARY_FUNC(Add, y[i] = a[i] + b[i]);DEFINE_VSL_BINARY_FUNC(Sub, y[i] = a[i] - b[i]);DEFINE_VSL_BINARY_FUNC(Mul, y[i] = a[i] * b[i]);DEFINE_VSL_BINARY_FUNC(Div, y[i] = a[i] / b[i]);template <>void caffe_add
(const int n, const float* a, const float* b,float* y) {vsAdd(n, a, b, y);}template <>void caffe_add
(const int n, const double* a, const double* b,double* y) {vdAdd(n, a, b, y);}

  

y=x;

 

template <>void caffe_copy
(const int N, const float* X, float* Y) { cblas_scopy(N, X, 1, Y, 1);}template <>void caffe_copy
(const int N, const double* X, double* Y) { cblas_dcopy(N, X, 1, Y, 1);}template <>void caffe_gpu_copy
(const int N, const float* X, float* Y) { CUBLAS_CHECK(cublasScopy(Caffe::cublas_handle(), N, X, 1, Y, 1));}template <>void caffe_gpu_copy
(const int N, const double* X, double* Y) { CUBLAS_CHECK(cublasDcopy(Caffe::cublas_handle(), N, X, 1, Y, 1));}

  Computes alpha*x*y' + A.

cblas_sgerMultiplies vector X by the transform of vector Y, then adds matrix A (single precison).Multiplies vector X by the transform of vector Y, then adds matrix A (single precison).void cblas_sger (const enum CBLAS_ORDER Order,const int M,const int N,const float alpha,const float *X,const int incX,const float *Y,const int incY,float *A,const int lda);

 

Y(vetor)←αAX + βYThis function multiplies A * X (after transposing A, if needed) and multiplies the resulting matrix by alpha.It then multiplies vector Y by beta. It stores the sum of these two products in vector Y.template <>void caffe_cpu_gemv
(const CBLAS_TRANSPOSE TransA, const int M, const int N, const float alpha, const float* A, const float* x, const float beta, float* y) { cblas_sgemv(CblasRowMajor, TransA, M, N, alpha, A, N, x, 1, beta, y, 1);}

  

 C(matrix)←αAB + βC

 

template
void gpu_multmat(T* A, T* B, T* C, int M,int K,int N){ const T alpha = 1,beta=0; caffe_gpu_gemm(CblasNoTrans,CblasNoTrans,M,N,K,alpha,A,B,beta,C);}template<>void caffe_cpu_gemm
(const CBLAS_TRANSPOSE TransA, const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const float alpha, const float* A, const float* B, const float beta, float* C) { int lda = (TransA == CblasNoTrans) ? K : M; int ldb = (TransB == CblasNoTrans) ? N : K; cblas_sgemm(CblasRowMajor, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, N);}

  

A=M*N  B=M*KC=A'*B   N M Ktemplate
void cpu_multTmat(T* A, T* B, T* C, int M,int K,int N){ const T alpha = 1,beta=0; caffe_cpu_gemm(CblasTrans,CblasNoTrans,M,N,K,alpha,A,B,beta,C); // cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, A, M, B, K, beta, C, M);}A=M*N B=N*KC=A*B M N Ktemplate
void cpu_multmat(T* A, T* B, T* C, int M,int K,int N){ const T alpha = 1,beta=0; caffe_cpu_gemm(CblasNoTrans,CblasNoTrans,M,N,K,alpha,A,B,beta,C); // cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, A, M, B, K, beta, C, M);}

  

 

转载于:https://www.cnblogs.com/jianyingzhou/p/4444728.html

你可能感兴趣的文章
mysql 数据库路径
查看>>
web服务器负载均衡部署及实现
查看>>
13.JOIN
查看>>
省市县三级联动
查看>>
多IP地址--笔记
查看>>
react native开发日记
查看>>
Virtual Dom是什么
查看>>
阶乘之和
查看>>
Unable to instantiate receiver xxx.receiver.NetworkReceiver异常
查看>>
C++调用C#类库函数
查看>>
vs2013编译项目去掉warning信息
查看>>
ASP.NET MVC html help
查看>>
C# 时间比较方法DateTime.Compare
查看>>
三级菜单小练习
查看>>
vim 插件管理
查看>>
Guid函数
查看>>
java的加减乘除问题
查看>>
Android 系统常用的权限
查看>>
类型参数约束 : Controller where T : class,new()
查看>>
N卡控制面板把physx设置为cpu
查看>>