Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
cc4s
Cyclops Tensor Framework
Commits
fea513c7
Commit
fea513c7
authored
9 years ago
by
Edgar Solomonik
Browse files
Options
Download
Email Patches
Plain Diff
Got rid of compiler warnings, added performance models for upload/download/offload_gemm
parent
df4a209f
Changes
11
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
65 additions
and
31 deletions
+65
-31
src/contraction/contraction.cxx
src/contraction/contraction.cxx
+11
-19
src/contraction/ctr_2d_general.cxx
src/contraction/ctr_2d_general.cxx
+1
-1
src/contraction/ctr_offload.cxx
src/contraction/ctr_offload.cxx
+3
-3
src/contraction/ctr_tsr.cxx
src/contraction/ctr_tsr.cxx
+11
-4
src/shared/init_models.cxx
src/shared/init_models.cxx
+3
-0
src/shared/init_models.h
src/shared/init_models.h
+3
-0
src/shared/offload.cu
src/shared/offload.cu
+23
-0
src/shared/offload.h
src/shared/offload.h
+6
-0
src/summation/summation.cxx
src/summation/summation.cxx
+2
-2
src/tensor/algstrct.cxx
src/tensor/algstrct.cxx
+1
-1
src/tensor/untyped_tensor.cxx
src/tensor/untyped_tensor.cxx
+1
-1
No files found.
src/contraction/contraction.cxx
View file @
fea513c7
...
...
@@ -1843,7 +1843,7 @@ namespace CTF_int {
// printf("new_order[%d/%d] = %d, new_lens[%d] = %d\n", i, topo->order, new_order[i], new_order[i], new_lens[new_order[i]]);
}
topology
*
new_topo
=
NULL
;
for
(
int
i
=
0
;
i
<
A
->
wrld
->
topovec
.
size
();
i
++
){
for
(
int
i
=
0
;
i
<
(
int
)
A
->
wrld
->
topovec
.
size
();
i
++
){
if
(
A
->
wrld
->
topovec
[
i
]
->
order
==
topo
->
order
){
bool
has_same_len
=
true
;
for
(
int
j
=
0
;
j
<
topo
->
order
;
j
++
){
...
...
@@ -2586,7 +2586,7 @@ namespace CTF_int {
&
num_tot
,
&
idx_arr
);
cdealloc
(
idx_arr
);
int64_t
tot_num_choices
=
0
;
for
(
int
i
=
0
;
i
<
wrld
->
topovec
.
size
();
i
++
){
for
(
int
i
=
0
;
i
<
(
int
)
wrld
->
topovec
.
size
();
i
++
){
// tot_num_choices += pow(num_choices,(int)wrld->topovec[i]->order);
tot_num_choices
+=
get_num_map_variants
(
wrld
->
topovec
[
i
]);
}
...
...
@@ -2594,7 +2594,7 @@ namespace CTF_int {
int64_t
choice_offset
=
0
;
int64_t
max_memuse
=
proc_bytes_available
();
TAU_FSTOP
(
init_select_ctr_map
);
for
(
int
i
=
0
;
i
<
wrld
->
topovec
.
size
();
i
++
){
for
(
int
i
=
0
;
i
<
(
int
)
wrld
->
topovec
.
size
();
i
++
){
// int tnum_choices = pow(num_choices,(int) wrld->topovec[i]->order);
int
tnum_choices
=
get_num_map_variants
(
wrld
->
topovec
[
i
]);
...
...
@@ -2767,10 +2767,6 @@ namespace CTF_int {
int
contraction
::
map
(
ctr
**
ctrf
,
bool
do_remap
){
int
ret
,
j
,
need_remap
,
d
;
int64_t
memuse
;
//, bmemuse;
double
best_time
;
int
btopo
;
//int * idx_arr, * idx_ctr, * idx_no_ctr, * idx_extra, * idx_weigh;
int
*
old_phase_A
,
*
old_phase_B
,
*
old_phase_C
;
topology
*
old_topo_A
,
*
old_topo_B
,
*
old_topo_C
;
distribution
*
dA
,
*
dB
,
*
dC
;
...
...
@@ -2831,12 +2827,9 @@ namespace CTF_int {
old_phase_C
[
j
]
=
C
->
edge_map
[
j
].
calc_phase
();
}
//}
btopo
=
-
1
;
best_time
=
DBL_MAX
;
//bmemuse = UINT64_MAX;
int
ttopo
,
ttopo_sel
,
ttopo_exh
;
double
gbest_time
,
gbest_time_sel
,
gbest_time_exh
;
double
gbest_time_sel
,
gbest_time_exh
;
TAU_FSTART
(
get_best_sel_map
);
get_best_sel_map
(
dA
,
dB
,
dC
,
old_topo_A
,
old_topo_B
,
old_topo_C
,
old_map_A
,
old_map_B
,
old_map_C
,
ttopo_sel
,
gbest_time_sel
);
...
...
@@ -2850,10 +2843,8 @@ namespace CTF_int {
TAU_FSTOP
(
get_best_exh_map
);
}
if
(
gbest_time_sel
<=
gbest_time_exh
){
gbest_time
=
gbest_time_sel
;
ttopo
=
ttopo_sel
;
}
else
{
gbest_time
=
gbest_time_exh
;
ttopo
=
ttopo_exh
;
}
...
...
@@ -2914,7 +2905,7 @@ namespace CTF_int {
int64_t
choice_offset
=
0
;
int
i
=
0
;
int64_t
old_off
;
for
(
i
=
0
;
i
<
wrld
->
topovec
.
size
();
i
++
){
for
(
i
=
0
;
i
<
(
int
)
wrld
->
topovec
.
size
();
i
++
){
//int tnum_choices = pow(num_choices,(int) wrld->topovec[i]->order);
int
tnum_choices
=
get_num_map_variants
(
wrld
->
topovec
[
i
]);
old_off
=
choice_offset
;
...
...
@@ -2962,10 +2953,10 @@ namespace CTF_int {
#endif
//FIXME: adhoc?
memuse
=
MAX
((
int64_t
)(
*
ctrf
)
->
mem_rec
(),
(
int64_t
)(
A
->
size
*
A
->
sr
->
el_size
+
B
->
size
*
B
->
sr
->
el_size
+
C
->
size
*
C
->
sr
->
el_size
)
*
3
);
/*
memuse = MAX((int64_t)(*ctrf)->mem_rec(), (int64_t)(A->size*A->sr->el_size+B->size*B->sr->el_size+C->size*C->sr->el_size)*3);
if (global_comm.rank == 0)
VPRINTF(1,"Contraction will use %E bytes per processor out of %E available memory and take an estimated of %lf sec\n",
(
double
)
memuse
,(
double
)
proc_bytes_available
(),
gbest_time
);
(double)memuse,(double)proc_bytes_available(),gbest_time);
*/
if
(
A
->
is_cyclic
==
0
&&
B
->
is_cyclic
==
0
&&
...
...
@@ -4039,7 +4030,9 @@ namespace CTF_int {
#if DEBUG >=2
ctrf
->
print
();
#endif
#if VERBOSE >= 1
double
dtt
=
MPI_Wtime
();
#endif
#ifdef DEBUG
if
(
global_comm
.
rank
==
0
){
//DPRINTF(1,"[%d] performing contraction\n",
...
...
@@ -4823,9 +4816,8 @@ namespace CTF_int {
}
void
contraction
::
print
(){
// int j,ex_A, ex_B,ex_C;
int
i
,
max
;
max
=
A
->
order
+
B
->
order
+
C
->
order
;
int
i
;
//max = A->order+B->order+C->order;
CommData
global_comm
=
A
->
wrld
->
cdt
;
MPI_Barrier
(
global_comm
.
cm
);
if
(
global_comm
.
rank
==
0
){
...
...
This diff is collapsed.
Click to expand it.
src/contraction/ctr_2d_general.cxx
View file @
fea513c7
...
...
@@ -267,7 +267,7 @@ namespace CTF_int {
void
ctr_2d_general
::
run
(
char
*
A
,
char
*
B
,
char
*
C
){
int
owner_A
,
owner_B
,
owner_C
,
ret
;
int64_t
ib
;
char
*
buf_A
,
*
buf_B
,
*
buf_C
,
*
buf_aux
;
char
*
buf_A
,
*
buf_B
,
*
buf_C
;
char
*
op_A
,
*
op_B
,
*
op_C
;
int
rank_A
,
rank_B
,
rank_C
;
int64_t
b_A
,
b_B
,
b_C
,
s_A
,
s_B
,
s_C
,
aux_size
;
...
...
This diff is collapsed.
Click to expand it.
src/contraction/ctr_offload.cxx
View file @
fea513c7
...
...
@@ -64,9 +64,9 @@ namespace CTF_int {
double
ctr_offload
::
est_time_fp
(
int
nlyr
){
double
tot_time
=
0.0
;
tot_time
+=
size_A
*
sr_A
->
el_size
*
(
total_iter
/
upload_phase_A
)
*
COST_OFFLOADBW
;
tot_time
+=
size_B
*
sr_B
->
el_size
*
(
total_iter
/
upload_phase_B
)
*
COST_OFFLOADBW
;
tot_time
+=
size_C
*
sr_C
->
el_size
*
(
total_iter
/
download_phase_C
)
*
COST_OFFLOADBW
;
tot_time
+=
estimate_upload_time
(
size_A
*
sr_A
->
el_size
)
*
(
total_iter
/
upload_phase_A
);
tot_time
+=
estimate_upload_time
(
size_B
*
sr_B
->
el_size
)
*
(
total_iter
/
upload_phase_B
);
tot_time
+=
estimate_download_time
(
size_C
*
sr_C
->
el_size
)
*
(
total_iter
/
download_phase_C
);
return
tot_time
;
}
...
...
This diff is collapsed.
Click to expand it.
src/contraction/ctr_tsr.cxx
View file @
fea513c7
...
...
@@ -386,6 +386,7 @@ namespace CTF_int {
LinModel
<
3
>
seq_tsr_ctr_mdl_cst
(
seq_tsr_ctr_mdl_cst_init
,
"seq_tsr_ctr_mdl_cst"
);
LinModel
<
3
>
seq_tsr_ctr_mdl_ref
(
seq_tsr_ctr_mdl_ref_init
,
"seq_tsr_ctr_mdl_ref"
);
LinModel
<
3
>
seq_tsr_ctr_mdl_inr
(
seq_tsr_ctr_mdl_inr_init
,
"seq_tsr_ctr_mdl_inr"
);
LinModel
<
3
>
seq_tsr_ctr_mdl_off
(
seq_tsr_ctr_mdl_off_init
,
"seq_tsr_ctr_mdl_off"
);
uint64_t
seq_tsr_ctr
::
est_membw
(){
uint64_t
size_A
=
sy_packed_size
(
order_A
,
edge_len_A
,
sym_A
)
*
sr_A
->
el_size
;
...
...
@@ -430,9 +431,12 @@ namespace CTF_int {
// printf("time estimate is %lf\n", seq_tsr_ctr_mdl.est_time(ps));
if
(
is_custom
)
return
seq_tsr_ctr_mdl_cst
.
est_time
(
ps
);
else
if
(
is_inner
)
return
seq_tsr_ctr_mdl_inr
.
est_time
(
ps
);
else
else
if
(
is_inner
){
if
(
inner_params
.
offload
)
return
seq_tsr_ctr_mdl_off
.
est_time
(
ps
);
else
return
seq_tsr_ctr_mdl_inr
.
est_time
(
ps
);
}
else
return
seq_tsr_ctr_mdl_ref
.
est_time
(
ps
);
}
...
...
@@ -497,7 +501,10 @@ namespace CTF_int {
double
exe_time
=
MPI_Wtime
()
-
st_time
;
// printf("exe_time = %E est_time = %E abs_err = %e rel_err = %lf\n", exe_time,est_time,fabs(exe_time-est_time),fabs(exe_time-est_time)/exe_time);
double
tps
[]
=
{
exe_time
,
1.0
,
(
double
)
est_membw
(),
est_fp
()};
seq_tsr_ctr_mdl_inr
.
observe
(
tps
);
if
(
inner_params
.
offload
)
seq_tsr_ctr_mdl_off
.
observe
(
tps
);
else
seq_tsr_ctr_mdl_inr
.
observe
(
tps
);
// seq_tsr_ctr_mdl_inr.print_param_guess();
}
else
{
double
st_time
=
MPI_Wtime
();
...
...
This diff is collapsed.
Click to expand it.
src/shared/init_models.cxx
View file @
fea513c7
...
...
@@ -2,6 +2,7 @@ namespace CTF_int{
double
seq_tsr_ctr_mdl_cst_init
[]
=
{
3.8587E-13
,
5.0935E-09
,
4.2446E-10
};
double
seq_tsr_ctr_mdl_ref_init
[]
=
{
1.5382E-16
,
9.2293E-10
,
7.6911E-11
};
double
seq_tsr_ctr_mdl_inr_init
[]
=
{
3.2489E-04
,
1.2692E-10
,
1.0686E-10
};
double
seq_tsr_ctr_mdl_off_init
[]
=
{
2.5413E-04
,
1.5889E-10
,
9.6735E-12
};
double
long_contig_transp_mdl_init
[]
=
{
-
1.8639E-03
,
1.8467E-08
};
double
shrt_contig_transp_mdl_init
[]
=
{
5.5768E-03
,
1.7220E-08
};
double
non_contig_transp_mdl_init
[]
=
{
-
4.8961E-04
,
1.0225E-08
};
...
...
@@ -11,5 +12,7 @@ namespace CTF_int{
double
bcast_mdl_init
[]
=
{
2.0139E-04
,
-
4.1326E-05
,
2.8453E-10
,
2.4672E-10
};
double
dgtog_res_mdl_init
[]
=
{
1.1994E-04
,
3.1005E-04
,
9.6259E-10
};
double
blres_mdl_init
[]
=
{
1.4728E-05
,
1.7673E-10
};
double
upload_mdl_init
[]
=
{
1.4604E-04
,
7.3783E-10
};
double
download_mdl_init
[]
=
{
5.7942E-04
,
6.0787E-10
};
}
This diff is collapsed.
Click to expand it.
src/shared/init_models.h
View file @
fea513c7
...
...
@@ -13,6 +13,9 @@ namespace CTF_int{
extern
double
seq_tsr_ctr_mdl_cst_init
[];
extern
double
seq_tsr_ctr_mdl_ref_init
[];
extern
double
seq_tsr_ctr_mdl_inr_init
[];
extern
double
seq_tsr_ctr_mdl_off_init
[];
extern
double
upload_mdl_init
[];
extern
double
download_mdl_init
[];
}
#endif
This diff is collapsed.
Click to expand it.
src/shared/offload.cu
View file @
fea513c7
...
...
@@ -13,6 +13,7 @@
#include "offload.h"
#include "../tensor/algstrct.h"
#include "../interface/timer.h"
namespace
CTF_int
{
volatile
static
int64_t
int64_t_max
=
INT64_MAX
;
...
...
@@ -65,20 +66,42 @@ namespace CTF_int{
cudaError_t
err
=
cudaFree
(
dev_ptr
);
assert
(
err
==
cudaSuccess
);
}
LinModel
<
2
>
upload_mdl
(
upload_mdl_init
,
"upload_mdl"
);
LinModel
<
2
>
download_mdl
(
download_mdl_init
,
"download_mdl"
);
double
estimate_download_time
(
int64_t
size
){
double
ps
[]
=
{
1.0
,
(
double
)
size
};
return
download_mdl
.
est_time
(
ps
);
}
double
estimate_upload_time
(
int64_t
size
){
double
ps
[]
=
{
1.0
,
(
double
)
size
};
return
upload_mdl
.
est_time
(
ps
);
}
void
offload_ptr
::
download
(
char
*
host_ptr
){
assert
(
initialized
);
TAU_FSTART
(
cuda_download
);
double
st_time
=
MPI_Wtime
();
cudaError_t
err
=
cudaMemcpy
(
host_ptr
,
dev_ptr
,
size
*
sr
->
el_size
,
cudaMemcpyDeviceToHost
);
double
exe_time
=
MPI_Wtime
()
-
st_time
;
double
tps
[]
=
{
exe_time
,
1.0
,
(
double
)
size
*
sr
->
el_size
};
download_mdl
.
observe
(
tps
);
TAU_FSTOP
(
cuda_download
);
assert
(
err
==
cudaSuccess
);
}
void
offload_ptr
::
upload
(
char
const
*
host_ptr
){
TAU_FSTART
(
cuda_upload
);
double
st_time
=
MPI_Wtime
();
cudaError_t
err
=
cudaMemcpy
(
dev_ptr
,
host_ptr
,
size
*
sr
->
el_size
,
cudaMemcpyHostToDevice
);
double
exe_time
=
MPI_Wtime
()
-
st_time
;
double
tps
[]
=
{
exe_time
,
1.0
,
(
double
)
size
*
sr
->
el_size
};
upload_mdl
.
observe
(
tps
);
TAU_FSTOP
(
cuda_upload
);
assert
(
err
==
cudaSuccess
);
}
...
...
This diff is collapsed.
Click to expand it.
src/shared/offload.h
View file @
fea513c7
...
...
@@ -12,6 +12,12 @@ namespace CTF_int{
void
offload_init
();
/** \brief exit offloading, e.g. destroy cublas */
void
offload_exit
();
/** \brief estimate time it takes to upload */
double
estimate_download_time
(
int64_t
size
);
/** \brief estimate time it takes to download */
double
estimate_upload_time
(
int64_t
size
);
class
offload_ptr
{
public:
...
...
This diff is collapsed.
Click to expand it.
src/summation/summation.cxx
View file @
fea513c7
...
...
@@ -2263,8 +2263,8 @@ namespace CTF_int {
}
void
summation
::
print
(){
int
i
,
j
,
max
,
ex_A
,
ex_B
;
max
=
A
->
order
+
B
->
order
;
int
i
;
//
max = A->order+B->order;
CommData
global_comm
=
A
->
wrld
->
cdt
;
MPI_Barrier
(
global_comm
.
cm
);
...
...
This diff is collapsed.
Click to expand it.
src/tensor/algstrct.cxx
View file @
fea513c7
...
...
@@ -639,7 +639,7 @@ namespace CTF_int {
swap_buffer
+
ptr_pairs
[
i
].
idx
*
(
sizeof
(
int64_t
)
+
sr
->
el_size
),
sizeof
(
int64_t
)
+
sr
->
el_size
);
}
break
;
break
;
//compiler warning here seems to be gcc bug
}
}
...
...
This diff is collapsed.
Click to expand it.
src/tensor/untyped_tensor.cxx
View file @
fea513c7
...
...
@@ -20,7 +20,7 @@ using namespace CTF;
namespace
CTF_int
{
static
const
char
*
SY_strings
[
4
]
=
{
"NS"
,
"SY"
,
"AS"
,
"SH"
};
//
static const char * SY_strings[4] = {"NS", "SY", "AS", "SH"};
Idx_Tensor
tensor
::
operator
[](
const
char
*
idx_map_
){
Idx_Tensor
idxtsr
(
this
,
idx_map_
);
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment