Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
cc4s
Cyclops Tensor Framework
Commits
52065cd6
Commit
52065cd6
authored
11 years ago
by
solomon
Browse files
Options
Download
Email Patches
Plain Diff
Connected scalapack back-end to proper gemm as it used to be and made this gemm offload to GPU.
parent
e8071959
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
49 additions
and
22 deletions
+49
-22
src/Makefile
src/Makefile
+3
-3
src/bench/nonsq_pgemm_bench.cxx
src/bench/nonsq_pgemm_bench.cxx
+2
-0
src/ctr_seq/sym_seq_ctr_inner.hxx
src/ctr_seq/sym_seq_ctr_inner.hxx
+1
-1
src/dist_tensor/cyclopstf.cxx
src/dist_tensor/cyclopstf.cxx
+2
-2
src/dist_tensor/cyclopstf.hpp
src/dist_tensor/cyclopstf.hpp
+0
-1
src/dist_tensor/dist_tensor_op.cxx
src/dist_tensor/dist_tensor_op.cxx
+11
-6
src/dist_tensor/scala_backend.cxx
src/dist_tensor/scala_backend.cxx
+30
-9
src/shared/offload.cxx
src/shared/offload.cxx
+0
-0
src/shared/offload.h
src/shared/offload.h
+0
-0
No files found.
src/Makefile
View file @
52065cd6
...
...
@@ -32,7 +32,7 @@ ${libdir}/libctf.a: interface/ctf_world.o \
interface/ctf_sparse_tensor.o
\
interface/ctf_flop_counter.o
\
ctr_comm/seq_tsr.o
\
ctr_seq
/offload.o
\
shared
/offload.o
\
shared/util.o
\
shared/timer.o
\
shared/memcontrol.o
\
...
...
@@ -40,9 +40,9 @@ ${libdir}/libctf.a: interface/ctf_world.o \
dist_tensor/distribution.o
\
dist_tensor/cyclopstf.o
ctr_seq
/offload.o
:
ctr_seq
/offload.h
ctr_seq
/offload.cxx $(_DEPENDENCIES)
shared
/offload.o
:
shared
/offload.h
shared
/offload.cxx $(_DEPENDENCIES)
@
mkdir
-p
$(DEPDIR)
$(OFFLOAD_CXX)
-c
ctr_seq
/offload.cxx
-o
ctr_seq
/offload.o
$(OFFLOAD_CXX)
-c
shared
/offload.cxx
-o
shared
/offload.o
#INCLUDES += -I${top_dir}/src/ctr_comm -I${top_dir}/src/ctr_seq -I${top_dir}/src/dist_tensor -I${top_dir}/src/util -I${top_dir}/src/interface
This diff is collapsed.
Click to expand it.
src/bench/nonsq_pgemm_bench.cxx
View file @
52065cd6
...
...
@@ -403,10 +403,12 @@ int main(int argc, char **argv) {
startTime
=
MPI_Wtime
();
for
(
iter
=
0
;
iter
<
num_iter
;
iter
++
){
//seq_square_matmul(mat_A, mat_B, mat_C, blockDim, 0);
TAU_FSTART
(
ctf_pgemm_bench
);
myctf
->
pgemm
(
'T'
,
'N'
,
m
,
n
,
k
,
ALPHA
,
mat_A
,
1
,
1
,
desc_a
,
mat_B
,
1
,
1
,
desc_b
,
BETA
,
mat_C
,
1
,
1
,
desc_c
);
TAU_FSTOP
(
ctf_pgemm_bench
);
// myctf->pgemm('T', 'N', ALPHA, tid_A, tid_B, BETA, tid_C);
if
(
iter
==
0
)
ans_verify
=
mat_C
[
2
];
...
...
This diff is collapsed.
Click to expand it.
src/ctr_seq/sym_seq_ctr_inner.hxx
View file @
52065cd6
...
...
@@ -6,7 +6,7 @@
#include "../shared/util.h"
#include <limits.h>
#include "sym_seq_shared.hxx"
#include "offload.h"
#include "
../shared/
offload.h"
/**
...
...
This diff is collapsed.
Click to expand it.
src/dist_tensor/cyclopstf.cxx
View file @
52065cd6
...
...
@@ -557,7 +557,7 @@ int tCTF<dtype>::contract(CTF_ctr_type_t const * type,
dtype
const
alpha
,
dtype
const
beta
){
fseq_tsr_ctr
<
dtype
>
fs
;
fs
.
func_ptr
=
sym_seq_ctr_ref
<
dtype
>
;
fs
.
func_ptr
=
NULL
;
//
sym_seq_ctr_ref<dtype>;
return
contract
(
type
,
fs
,
alpha
,
beta
);
}
...
...
@@ -675,7 +675,7 @@ int tCTF<dtype>::contract(CTF_ctr_type_t const * type,
dt
->
print_ctr
(
type
,
alpha
,
beta
);
#endif
fseq_tsr_ctr
<
dtype
>
fs
;
fs
.
func_ptr
=
sym_seq_ctr_ref
<
dtype
>
;
fs
.
func_ptr
=
NULL
;
//
sym_seq_ctr_ref<dtype>;
int
ret
=
dt
->
home_contract
(
type
,
fs
,
felm
,
alpha
,
beta
);
#if DEBUG >= 1
if
(
dt
->
get_global_comm
().
rank
==
0
)
...
...
This diff is collapsed.
Click to expand it.
src/dist_tensor/cyclopstf.hpp
View file @
52065cd6
...
...
@@ -165,7 +165,6 @@ struct fseq_elm_sum {
template
<
typename
dtype
>
struct
fseq_tsr_ctr
{
/* Function signature for sub-tensor contraction recrusive call */
int
(
*
func_ptr
)
(
dtype
const
alpha
,
dtype
const
*
A
,
...
...
This diff is collapsed.
Click to expand it.
src/dist_tensor/dist_tensor_op.cxx
View file @
52065cd6
...
...
@@ -2931,8 +2931,11 @@ int dist_tensor<dtype>::
assert
(
stat
==
DIST_TENSOR_SUCCESS
);
#endif
/* Check if the current tensor mappings can be contracted on */
fseq_tsr_ctr
<
dtype
>
fftsr
=
ftsr
;
if
(
ftsr
.
func_ptr
==
NULL
)
fftsr
.
func_ptr
=
&
sym_seq_ctr_ref
<
dtype
>
;
#if REDIST
stat
=
map_tensors
(
type
,
ftsr
,
felm
,
alpha
,
beta
,
&
ctrf
);
stat
=
map_tensors
(
type
,
f
ftsr
,
felm
,
alpha
,
beta
,
&
ctrf
);
if
(
stat
==
DIST_TENSOR_ERROR
)
{
printf
(
"Failed to map tensors to physical grid
\n
"
);
return
DIST_TENSOR_ERROR
;
...
...
@@ -2940,7 +2943,7 @@ int dist_tensor<dtype>::
#else
if
(
check_contraction_mapping
(
type
)
==
0
)
{
/* remap if necessary */
stat
=
map_tensors
(
type
,
ftsr
,
felm
,
alpha
,
beta
,
&
ctrf
);
stat
=
map_tensors
(
type
,
f
ftsr
,
felm
,
alpha
,
beta
,
&
ctrf
);
if
(
stat
==
DIST_TENSOR_ERROR
)
{
printf
(
"Failed to map tensors to physical grid
\n
"
);
return
DIST_TENSOR_ERROR
;
...
...
@@ -2954,7 +2957,7 @@ int dist_tensor<dtype>::
print_map
(
stdout
,
type
->
tid_B
);
print_map
(
stdout
,
type
->
tid_C
);
#endif
ctrf
=
construct_contraction
(
type
,
ftsr
,
felm
,
alpha
,
beta
);
ctrf
=
construct_contraction
(
type
,
f
ftsr
,
felm
,
alpha
,
beta
);
if
(
global_comm
.
rank
==
0
){
uint64_t
memuse
=
ctrf
->
mem_rec
();
VPRINTF
(
1
,
"Contraction does not require redistribution, will use %E bytes per processor out of %E available memory and take an estimated of %lf sec
\n
"
,
...
...
@@ -2964,7 +2967,9 @@ int dist_tensor<dtype>::
#endif
LIBT_ASSERT
(
check_contraction_mapping
(
type
));
#if FOLD_TSR
if
(
felm
.
func_ptr
==
NULL
&&
can_fold
(
type
)){
if
(
felm
.
func_ptr
==
NULL
&&
ftsr
.
func_ptr
==
NULL
&&
//sym_seq_ctr_ref<dtype> &&
can_fold
(
type
)){
iparam
prm
;
TAU_FSTART
(
map_fold
);
stat
=
map_fold
(
type
,
&
prm
);
...
...
@@ -2974,9 +2979,9 @@ int dist_tensor<dtype>::
}
if
(
stat
==
DIST_TENSOR_SUCCESS
){
delete
ctrf
;
ctrf
=
construct_contraction
(
type
,
ftsr
,
felm
,
alpha
,
beta
,
2
,
&
prm
);
ctrf
=
construct_contraction
(
type
,
f
ftsr
,
felm
,
alpha
,
beta
,
2
,
&
prm
);
}
}
}
#endif
#if DEBUG >=2
if
(
get_global_comm
().
rank
==
0
)
...
...
This diff is collapsed.
Click to expand it.
src/dist_tensor/scala_backend.cxx
View file @
52065cd6
/*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
#include "dist_tensor_internal.h"
#include "../shared/offload.h"
#if (defined BGP || defined BGQ)
#define BLACS_GRIDINFO blacs_gridinfo
...
...
@@ -16,25 +17,22 @@ inline
void
BLACS_GRIDINFO
(
int
*
,
int
*
,
int
*
,
int
*
,
int
*
)
{
assert
(
0
);
}
#endif
template
<
typename
dtype
,
int
is_herm_A
,
int
is_herm_B
>
int
gemm_ctr
(
dtype
const
alpha
,
dtype
const
*
A
,
int
gemm_ctr
(
dtype
const
alpha
,
dtype
const
*
A
,
int
const
ndim_A
,
int
const
*
edge_len_A
,
int
const
*
lda_A
,
int
const
*
sym_A
,
int
const
*
idx_map_A
,
dtype
const
*
B
,
dtype
const
*
B
,
int
const
ndim_B
,
int
const
*
edge_len_B
,
int
const
*
lda_B
,
int
const
*
sym_B
,
int
const
*
idx_map_B
,
dtype
const
beta
,
dtype
*
C
,
dtype
const
beta
,
dtype
*
C
,
int
const
ndim_C
,
int
const
*
edge_len_C
,
int
const
*
lda_C
,
...
...
@@ -79,13 +77,36 @@ int gemm_ctr( dtype const alpha,
LIBT_ASSERT
(
n
==
edge_len_C
[
1
]);
la_C
=
m
;
#ifdef OFFLOAD
TAU_FSTART
(
offload_alloc
);
offload_ptr
<
dtype
>
ptr_A
(
m
*
k
);
offload_ptr
<
dtype
>
ptr_B
(
k
*
n
);
offload_ptr
<
dtype
>
ptr_C
(
m
*
n
);
TAU_FSTOP
(
offload_alloc
);
TAU_FSTART
(
offload_upload
);
ptr_A
.
upload
(
A
);
ptr_B
.
upload
(
B
);
ptr_C
.
upload
(
C
);
TAU_FSTOP
(
offload_upload
);
TAU_FSTART
(
offload_gemm
);
TAU_FSTART
(
dgemm
);
offload_gemm
<
dtype
>
(
ta
,
tb
,
m
,
n
,
k
,
alpha
,
ptr_A
,
la_A
,
ptr_B
,
la_B
,
beta
,
ptr_C
,
la_C
);
TAU_FSTOP
(
dgemm
);
TAU_FSTART
(
offload_download
);
ptr_C
.
download
(
C
);
TAU_FSTOP
(
offload_download
);
#else
TAU_FSTART
(
dgemm
);
cxgemm
(
ta
,
tb
,
m
,
n
,
k
,
alpha
,
A
,
la_A
,
B
,
la_B
,
beta
,
C
,
la_C
);
TAU_FSTOP
(
dgemm
);
#endif
return
0
;
}
/*
#define DECLARE_GEMM_CTR(type, herm_A, herm_B) \
template \
...
...
This diff is collapsed.
Click to expand it.
src/
ctr_seq
/offload.cxx
→
src/
shared
/offload.cxx
View file @
52065cd6
File moved
This diff is collapsed.
Click to expand it.
src/
ctr_seq
/offload.h
→
src/
shared
/offload.h
View file @
52065cd6
File moved
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment