Commit 52065cd6 authored by solomon's avatar solomon
Browse files

Connected scalapack back-end to proper gemm as it used to be and made this gemm offload to GPU.

parent e8071959
......@@ -32,7 +32,7 @@ ${libdir}/libctf.a: interface/ctf_world.o \
interface/ctf_sparse_tensor.o \
interface/ctf_flop_counter.o \
ctr_comm/seq_tsr.o \
ctr_seq/offload.o \
shared/offload.o \
shared/util.o \
shared/timer.o \
shared/memcontrol.o \
......@@ -40,9 +40,9 @@ ${libdir}/libctf.a: interface/ctf_world.o \
dist_tensor/distribution.o \
dist_tensor/cyclopstf.o
ctr_seq/offload.o: ctr_seq/offload.h ctr_seq/offload.cxx $(_DEPENDENCIES)
shared/offload.o: shared/offload.h shared/offload.cxx $(_DEPENDENCIES)
@mkdir -p $(DEPDIR)
$(OFFLOAD_CXX) -c ctr_seq/offload.cxx -o ctr_seq/offload.o
$(OFFLOAD_CXX) -c shared/offload.cxx -o shared/offload.o
#INCLUDES += -I${top_dir}/src/ctr_comm -I${top_dir}/src/ctr_seq -I${top_dir}/src/dist_tensor -I${top_dir}/src/util -I${top_dir}/src/interface
......@@ -403,10 +403,12 @@ int main(int argc, char **argv) {
startTime = MPI_Wtime();
for (iter=0; iter < num_iter; iter++){
//seq_square_matmul(mat_A, mat_B, mat_C, blockDim, 0);
TAU_FSTART(ctf_pgemm_bench);
myctf->pgemm('T','N', m, n, k, ALPHA,
mat_A, 1, 1, desc_a,
mat_B, 1, 1, desc_b, BETA,
mat_C, 1, 1, desc_c);
TAU_FSTOP(ctf_pgemm_bench);
// myctf->pgemm('T', 'N', ALPHA, tid_A, tid_B, BETA, tid_C);
if (iter == 0)
ans_verify = mat_C[2];
......
......@@ -6,7 +6,7 @@
#include "../shared/util.h"
#include <limits.h>
#include "sym_seq_shared.hxx"
#include "offload.h"
#include "../shared/offload.h"
/**
......
......@@ -557,7 +557,7 @@ int tCTF<dtype>::contract(CTF_ctr_type_t const * type,
dtype const alpha,
dtype const beta){
fseq_tsr_ctr<dtype> fs;
fs.func_ptr=sym_seq_ctr_ref<dtype>;
fs.func_ptr=NULL;//sym_seq_ctr_ref<dtype>;
return contract(type, fs, alpha, beta);
}
......@@ -675,7 +675,7 @@ int tCTF<dtype>::contract(CTF_ctr_type_t const * type,
dt->print_ctr(type,alpha,beta);
#endif
fseq_tsr_ctr<dtype> fs;
fs.func_ptr=sym_seq_ctr_ref<dtype>;
fs.func_ptr=NULL;//sym_seq_ctr_ref<dtype>;
int ret = dt->home_contract(type, fs, felm, alpha, beta);
#if DEBUG >= 1
if (dt->get_global_comm().rank == 0)
......
......@@ -165,7 +165,6 @@ struct fseq_elm_sum {
template<typename dtype>
struct fseq_tsr_ctr {
/* Function signature for sub-tensor contraction recrusive call */
int (*func_ptr) ( dtype const alpha,
dtype const * A,
......
......@@ -2931,8 +2931,11 @@ int dist_tensor<dtype>::
assert(stat == DIST_TENSOR_SUCCESS);
#endif
/* Check if the current tensor mappings can be contracted on */
fseq_tsr_ctr<dtype> fftsr=ftsr;
if (ftsr.func_ptr == NULL)
fftsr.func_ptr = &sym_seq_ctr_ref<dtype>;
#if REDIST
stat = map_tensors(type, ftsr, felm, alpha, beta, &ctrf);
stat = map_tensors(type, fftsr, felm, alpha, beta, &ctrf);
if (stat == DIST_TENSOR_ERROR) {
printf("Failed to map tensors to physical grid\n");
return DIST_TENSOR_ERROR;
......@@ -2940,7 +2943,7 @@ int dist_tensor<dtype>::
#else
if (check_contraction_mapping(type) == 0) {
/* remap if necessary */
stat = map_tensors(type, ftsr, felm, alpha, beta, &ctrf);
stat = map_tensors(type, fftsr, felm, alpha, beta, &ctrf);
if (stat == DIST_TENSOR_ERROR) {
printf("Failed to map tensors to physical grid\n");
return DIST_TENSOR_ERROR;
......@@ -2954,7 +2957,7 @@ int dist_tensor<dtype>::
print_map(stdout, type->tid_B);
print_map(stdout, type->tid_C);
#endif
ctrf = construct_contraction(type, ftsr, felm, alpha, beta);
ctrf = construct_contraction(type, fftsr, felm, alpha, beta);
if (global_comm.rank == 0){
uint64_t memuse = ctrf->mem_rec();
VPRINTF(1,"Contraction does not require redistribution, will use %E bytes per processor out of %E available memory and take an estimated of %lf sec\n",
......@@ -2964,7 +2967,9 @@ int dist_tensor<dtype>::
#endif
LIBT_ASSERT(check_contraction_mapping(type));
#if FOLD_TSR
if (felm.func_ptr == NULL && can_fold(type)){
if (felm.func_ptr == NULL &&
ftsr.func_ptr == NULL && //sym_seq_ctr_ref<dtype> &&
can_fold(type)){
iparam prm;
TAU_FSTART(map_fold);
stat = map_fold(type, &prm);
......@@ -2974,9 +2979,9 @@ int dist_tensor<dtype>::
}
if (stat == DIST_TENSOR_SUCCESS){
delete ctrf;
ctrf = construct_contraction(type, ftsr, felm, alpha, beta, 2, &prm);
ctrf = construct_contraction(type, fftsr, felm, alpha, beta, 2, &prm);
}
}
}
#endif
#if DEBUG >=2
if (get_global_comm().rank == 0)
......
/*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
#include "dist_tensor_internal.h"
#include "../shared/offload.h"
#if (defined BGP || defined BGQ)
#define BLACS_GRIDINFO blacs_gridinfo
......@@ -16,25 +17,22 @@ inline
void BLACS_GRIDINFO(int *, int *, int *, int *, int *) { assert(0); }
#endif
template<typename dtype, int is_herm_A, int is_herm_B>
int gemm_ctr( dtype const alpha,
dtype const * A,
int gemm_ctr( dtype const alpha,
dtype const * A,
int const ndim_A,
int const * edge_len_A,
int const * lda_A,
int const * sym_A,
int const * idx_map_A,
dtype const * B,
dtype const * B,
int const ndim_B,
int const * edge_len_B,
int const * lda_B,
int const * sym_B,
int const * idx_map_B,
dtype const beta,
dtype * C,
dtype const beta,
dtype * C,
int const ndim_C,
int const * edge_len_C,
int const * lda_C,
......@@ -79,13 +77,36 @@ int gemm_ctr( dtype const alpha,
LIBT_ASSERT(n==edge_len_C[1]);
la_C = m;
#ifdef OFFLOAD
TAU_FSTART(offload_alloc);
offload_ptr<dtype> ptr_A(m*k);
offload_ptr<dtype> ptr_B(k*n);
offload_ptr<dtype> ptr_C(m*n);
TAU_FSTOP(offload_alloc);
TAU_FSTART(offload_upload);
ptr_A.upload(A);
ptr_B.upload(B);
ptr_C.upload(C);
TAU_FSTOP(offload_upload);
TAU_FSTART(offload_gemm);
TAU_FSTART(dgemm);
offload_gemm<dtype>(ta, tb, m, n, k, alpha,
ptr_A, la_A,
ptr_B, la_B, beta,
ptr_C, la_C);
TAU_FSTOP(dgemm);
TAU_FSTART(offload_download);
ptr_C.download(C);
TAU_FSTOP(offload_download);
#else
TAU_FSTART(dgemm);
cxgemm(ta, tb, m, n, k, alpha, A, la_A, B, la_B, beta, C, la_C);
TAU_FSTOP(dgemm);
#endif
return 0;
}
/*
#define DECLARE_GEMM_CTR(type, herm_A, herm_B) \
template \
......
File moved
File moved
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment