Commit d68f4f9d authored by solomon's avatar solomon
Browse files

Allowed more nvirt, added VERBOSITY flag for better info reporting

......@@ -61,20 +61,20 @@ class Integrals {
aa = CTF_Vector(nv,dw_);
ii = CTF_Vector(no,dw_);
ab = CTF_Matrix(nv,nv,AS,dw_,"V",1);
ai = CTF_Matrix(nv,no,NS,dw_,"V",1);
ia = CTF_Matrix(no,nv,NS,dw_,"V",1);
ij = CTF_Matrix(no,no,AS,dw_,"V",1);
abcd = CTF_Tensor(4,vvvv,shapeASAS,dw_,"V",1);
abci = CTF_Tensor(4,vvvo,shapeASNS,dw_,"V",1);
aibc = CTF_Tensor(4,vovv,shapeNSAS,dw_,"V",1);
aibj = CTF_Tensor(4,vovo,shapeNSNS,dw_,"V",1);
abij = CTF_Tensor(4,vvoo,shapeASAS,dw_,"V",1);
ijab = CTF_Tensor(4,oovv,shapeASAS,dw_,"V",1);
aijk = CTF_Tensor(4,vooo,shapeNSAS,dw_,"V",1);
ijak = CTF_Tensor(4,oovo,shapeASNS,dw_,"V",1);
ijkl = CTF_Tensor(4,oooo,shapeASAS,dw_,"V",1);
ab = CTF_Matrix(nv,nv,AS,dw_,"Vab",1);
ai = CTF_Matrix(nv,no,NS,dw_,"Vai",1);
ia = CTF_Matrix(no,nv,NS,dw_,"Via",1);
ij = CTF_Matrix(no,no,AS,dw_,"Vij",1);
abcd = CTF_Tensor(4,vvvv,shapeASAS,dw_,"Vabcd",1);
abci = CTF_Tensor(4,vvvo,shapeASNS,dw_,"Vabci",1);
aibc = CTF_Tensor(4,vovv,shapeNSAS,dw_,"Vaibc",1);
aibj = CTF_Tensor(4,vovo,shapeNSNS,dw_,"Vaibj",1);
abij = CTF_Tensor(4,vvoo,shapeASAS,dw_,"Vabij",1);
ijab = CTF_Tensor(4,oovv,shapeASAS,dw_,"Vijab",1);
aijk = CTF_Tensor(4,vooo,shapeNSAS,dw_,"Vaijk",1);
ijak = CTF_Tensor(4,oovo,shapeASNS,dw_,"Vijak",1);
ijkl = CTF_Tensor(4,oooo,shapeASAS,dw_,"Vijkl",1);
}
void fill_rand(){
......@@ -148,9 +148,9 @@ class Amplitudes {
int shapeASAS[] = {AS,NS,AS,NS};
int vvoo[] = {nv,nv,no,no};
ai = CTF_Matrix(nv,no,NS,dw_,"T",1);
ai = CTF_Matrix(nv,no,NS,dw_,"Tai",1);
abij = CTF_Tensor(4,vvoo,shapeASAS,dw_,"T",1);
abij = CTF_Tensor(4,vvoo,shapeASAS,dw_,"Tabij",1);
}
tCTF_Idx_Tensor<double> operator[](char const * idx_map_){
......
......@@ -111,7 +111,7 @@ int tCTF<dtype>::init(MPI_Comm const global_context,
#ifdef USE_OMP
if (rank == 0)
DPRINTF(1,"CTF running with %d threads\n",omp_get_max_threads());
VPRINTF(1,"Running with %d threads\n",omp_get_max_threads());
#endif
mst_size = getenv("CTF_MST_SIZE");
......@@ -119,12 +119,11 @@ int tCTF<dtype>::init(MPI_Comm const global_context,
if (mst_size == NULL && stack_size == NULL){
#ifdef USE_MST
if (rank == 0)
DPRINTF(1,"Creating CTF stack of size "PRId64"\n",1000*(long_int)1E6);
VPRINTF(1,"Creating stack of size "PRId64"\n",1000*(long_int)1E6);
CTF_mst_create(1000*(long_int)1E6);
#else
if (rank == 0){
DPRINTF(1,"Running CTF without stack, define CTF_STACK_SIZE ");
DPRINTF(1,"environment variable to activate stack\n");
VPRINTF(1,"Running without stack, define CTF_STACK_SIZE environment variable to activate stack\n");
}
#endif
} else {
......@@ -134,7 +133,7 @@ int tCTF<dtype>::init(MPI_Comm const global_context,
if (stack_size != NULL)
imst_size = MAX(imst_size,strtoull(stack_size,NULL,0));
if (rank == 0)
DPRINTF(1,"Creating CTF stack of size "PRIu64" due to CTF_STACK_SIZE enviroment variable\n",
VPRINTF(1,"Creating stack of size "PRIu64" due to CTF_STACK_SIZE enviroment variable\n",
imst_size);
CTF_mst_create(imst_size);
}
......@@ -142,14 +141,14 @@ int tCTF<dtype>::init(MPI_Comm const global_context,
if (mem_size != NULL){
uint64_t imem_size = strtoull(mem_size,NULL,0);
if (rank == 0)
DPRINTF(1,"CTF memory size set to "PRIu64" by CTF_MEMORY_SIZE environment variable\n",
VPRINTF(1,"Memory size set to "PRIu64" by CTF_MEMORY_SIZE environment variable\n",
imem_size);
CTF_set_mem_size(imem_size);
}
ppn = getenv("CTF_PPN");
if (ppn != NULL){
if (rank == 0)
DPRINTF(1,"CTF assuming %d processes per node due to CTF_PPN environment variable\n",
VPRINTF(1,"Assuming %d processes per node due to CTF_PPN environment variable\n",
atoi(ppn));
LIBT_ASSERT(atoi(ppn)>=1);
CTF_set_memcap(.75/atof(ppn));
......@@ -554,14 +553,25 @@ int tCTF<dtype>::contract(CTF_ctr_type_t const * type,
sprintf(cname+strlen(cname),"%d",type->idx_map_B[i]);
}
sprintf(cname+strlen(cname),"]");
double dtt;
if (dt->get_global_comm()->rank == 0){
dtt = MPI_Wtime();
VPRINTF(1,"Starting %s\n",cname);
}
CTF_Timer tctr(cname);
tctr.start();
ret = dt->home_contract(type, func_ptr, felm, alpha, beta, map_inner);
tctr.stop();
if (dt->get_global_comm()->rank == 0){
VPRINTF(1,"Ended %s in %lf seconds\n",cname,MPI_Wtime()-dtt); }
} else
ret = dt->home_contract(type, func_ptr, felm, alpha, beta, map_inner);
if ((*dt->get_tensors())[type->tid_A]->profile &&
(*dt->get_tensors())[type->tid_B]->profile &&
(*dt->get_tensors())[type->tid_C]->profile){
}
#if DEBUG >= 1
if (dt->get_global_comm()->rank == 0)
printf("End head contraction :\n");
......
......@@ -47,6 +47,7 @@ enum CTF_OP { CTF_OP_SUM, CTF_OP_SUMABS,
typedef int64_t long_int;
typedef long_int key;
static const char * SY_strings[4] = {"NS", "SY", "AS", "SH"};
template<typename dtype>
struct tkv_pair {
......@@ -80,7 +81,7 @@ inline bool comp_tkv_pair(tkv_pair<dtype> i,tkv_pair<dtype> j) {
#define INNER_MAP 0
#define FOLD_TSR 1
#define PERFORM_DESYM 1
#define ALLOW_NVIRT 8
#define ALLOW_NVIRT 32
#define DIAG_RESCALE
#define USE_SYM_SUM
#define HOME_CONTRACT
......
......@@ -1573,6 +1573,8 @@ void dist_tensor<dtype>::desymmetrize(int const sym_tid,
strcpy(spf,"desymmetrize_");
strcat(spf,tsr_sym->name);
CTF_Timer t_pf(spf);
if (global_comm->rank == 0)
VPRINTF(1,"Desymmetrizing %s\n", tsr_sym->name);
t_pf.start();
}
......@@ -1716,6 +1718,16 @@ void dist_tensor<dtype>::symmetrize(int const sym_tid, int const nonsym_tid){
tsr_sym = tensors[sym_tid];
tsr_nonsym = tensors[nonsym_tid];
if (tsr_sym->profile) {
char spf[80];
strcpy(spf,"symmetrize_");
strcat(spf,tsr_nonsym->name);
CTF_Timer t_pf(spf);
if (global_comm->rank == 0)
VPRINTF(1,"Symmetrizing %s\n", tsr_nonsym->name);
t_pf.start();
}
sym_dim = -1;
is = -1;
......@@ -1828,6 +1840,14 @@ idx_map_B, fss, fselm);
CTF_free(idx_map_A);
CTF_free(idx_map_B);
if (tsr_sym->profile) {
char spf[80];
strcpy(spf,"symmetrize_");
strcat(spf,tsr_sym->name);
CTF_Timer t_pf(spf);
t_pf.stop();
}
TAU_FSTOP(symmetrize);
}
......
......@@ -125,17 +125,13 @@ int dist_tensor<dtype>::initialize(CommData_t * cdt_global,
/* FIXME: Sorting will fuck up dimensional ordering */
// std::sort(srt_dim_len, srt_dim_len + ndim);
#if DEBUG >= 1
if (cdt_global->rank == 0)
printf("Setting up initial torus topology:\n");
#endif
VPRINTF(1,"Setting up initial torus physical topology P:\n");
stride = 1, cut = 0;
for (i=0; i<ndim; i++){
LIBT_ASSERT(dim_len[i] != 1);
#if DEBUG >= 1
if (cdt_global->rank == 0)
printf("dim[%d] = %d:\n",i,srt_dim_len[i]);
#endif
VPRINTF(1,"P[%d] = %d\n",i,srt_dim_len[i]);
phys_comm[i] = (CommData_t*)CTF_alloc(sizeof(CommData_t));
SETUP_SUB_COMM(cdt_global, phys_comm[i],
......@@ -181,6 +177,10 @@ void dist_tensor<dtype>::set_phys_comm(CommData_t ** cdt, int const ndim){
lda = 1;
/* Figure out the lda of each dimension communicator */
for (i=0; i<ndim; i++){
#if DEBUG >= 1
if (global_comm->rank == 0)
printf("Added topo %d dim[%d] = %d:\n",(int)topovec.size(),i,cdt[i]->np);
#endif
LIBT_ASSERT(cdt[i]->np != 1);
new_topo.lda[i] = lda;
lda = lda*cdt[i]->np;
......@@ -1970,54 +1970,12 @@ int dist_tensor<dtype>::print_map(FILE * stream,
mapping * map;
tsr = tensors[tid];
if (all)
COMM_BARRIER(global_comm);
if (/*tsr->is_mapped &&*/ (!all || global_comm->rank == 0)){
printf("Tensor %d of dimension %d is mapped to a ", tid, tsr->ndim);
if (is_inner){
for (i=0; i<inner_topovec[tsr->itopo].ndim-1; i++){
printf("%d-by-", inner_topovec[tsr->itopo].dim_comm[i]->np);
}
if (inner_topovec[tsr->itopo].ndim > 0)
printf("%d inner topology.\n", inner_topovec[tsr->itopo].dim_comm[i]->np);
} else {
for (i=0; i<topovec[tsr->itopo].ndim-1; i++){
printf("%d-by-", topovec[tsr->itopo].dim_comm[i]->np);
}
if (topovec[tsr->itopo].ndim > 0)
printf("%d topology.\n", topovec[tsr->itopo].dim_comm[i]->np);
}
for (i=0; i<tsr->ndim; i++){
switch (tsr->edge_map[i].type){
case NOT_MAPPED:
printf("Dimension %d of length %d and symmetry %d is not mapped\n",i,tsr->edge_len[i],tsr->sym[i]);
break;
case PHYSICAL_MAP:
printf("Dimension %d of length %d and symmetry %d is mapped to physical dimension %d with phase %d\n",
i,tsr->edge_len[i],tsr->sym[i],tsr->edge_map[i].cdt,tsr->edge_map[i].np);
map = &tsr->edge_map[i];
while (map->has_child){
map = map->child;
if (map->type == VIRTUAL_MAP)
printf("\tDimension %d also has a virtualized child of phase %d\n", i, map->np);
else
printf("\tDimension %d also has a physical child mapped to physical dimension %d with phase %d\n",
i, map->cdt, map->np);
}
break;
case VIRTUAL_MAP:
printf("Dimension %d of length %d and symmetry %d is mapped virtually with phase %d\n",
i,tsr->edge_len[i],tsr->sym[i],tsr->edge_map[i].np);
break;
}
}
if (!all || global_comm->rank == 0){
tsr->print_map(stdout);
}
if (all)
COMM_BARRIER(global_comm);
return DIST_TENSOR_SUCCESS;
}
/**
......@@ -2383,7 +2341,7 @@ void dist_tensor<dtype>::contract_mst(){
#include "tensor_object.cxx"
#include "dist_tensor_map.cxx"
#include "dist_tensor_op.cxx"
#include "dist_tensor_inner.cxx"
......
......@@ -52,7 +52,8 @@ struct topology {
template<typename dtype>
struct tensor {
class tensor {
public:
int ndim;
int * edge_len;
int is_padded;
......@@ -85,6 +86,8 @@ struct tensor {
int has_home;
char const * name;
int profile;
void print_map(FILE * stream) const;
};
......
......@@ -651,13 +651,6 @@ int remap_tensor(int const tid,
dtype * shuffled_data_corr;
#endif
if (tsr->profile) {
char spf[80];
strcpy(spf,"redistribute_");
strcat(spf,tsr->name);
CTF_Timer t_pf(spf);
t_pf.start();
}
CTF_alloc_ptr(sizeof(int)*tsr->ndim, (void**)&new_phase);
CTF_alloc_ptr(sizeof(int)*tsr->ndim, (void**)&new_rank);
......@@ -695,11 +688,20 @@ int remap_tensor(int const tid,
tsr->is_home = 0;
}
#endif
#if DEBUG >= 1
if (global_comm->rank == 0){
printf("Remapping tensor %d with virtualization factor of %d\n",tid,new_nvirt);
}
if (tsr->profile) {
char spf[80];
strcpy(spf,"redistribute_");
strcat(spf,tsr->name);
if (global_comm->rank == 0){
if (can_block_shuffle) VPRINTF(1,"Remapping tensor %s via block_reshuffle\n",tsr->name);
else VPRINTF(1,"Remapping tensor %s via cyclic_reshuffle\n",tsr->name);
#if VERBOSE >=1
tsr->print_map(stdout);
#endif
}
CTF_Timer t_pf(spf);
t_pf.start();
}
#if VERIFY_REMAP
padded_reshuffle(tid,
......@@ -726,9 +728,6 @@ int remap_tensor(int const tid,
#endif
if (can_block_shuffle){
if (global_comm->rank == 0) {
DPRINTF(1,"remapping tensor %d via block_reshuffle\n", tid);
}
block_reshuffle( tsr->ndim,
old_phase,
old_size,
......@@ -743,10 +742,6 @@ int remap_tensor(int const tid,
shuffled_data,
global_comm);
} else {
if (global_comm->rank == 0) {
DEBUG_PRINTF("remapping with cyclic reshuffle (was padded = %d)\n",
tsr->is_padded);
}
// CTF_alloc_ptr(sizeof(dtype)*tsr->size, (void**)&shuffled_data);
cyclic_reshuffle(tsr->ndim,
old_size,
......
......@@ -62,6 +62,8 @@ void fold_torus(topology * topo,
/* Reorder the lda, bring j lda to lower lda and adjust other ldas */
color = glb_comm->rank - topo->dim_comm[i]->rank*topo->lda[i]
- topo->dim_comm[j]->rank*topo->lda[j];
if (j<ndim-1)
color = (color%topo->lda[i])+(color/topo->lda[j+1]);
}
np = topo->dim_comm[i]->np*topo->dim_comm[j]->np;
......
......@@ -10,7 +10,7 @@
#include "timer.h"
#include "util.h"
#define MAX_NAME_LENGTH 43
#define MAX_NAME_LENGTH 53
int main_argc = 0;
const char * const * main_argv;
......
......@@ -154,6 +154,20 @@ do { printf("error:%s:%d ",__FILE__,__LINE__); printf(__VA_ARGS__); printf("\n")
do { printf("warning: "); printf(__VA_ARGS__); printf("\n"); } while(0)
#endif
#if defined(VERBOSE)
#ifndef VPRINTF
#define VPRINTF(i,...) \
do { if (i<=VERBOSE) { \
printf("CTF: "__VA_ARGS__); } \
} while (0)
#endif
#else
#ifndef VPRINTF
#define VPRINTF(...) do { } while (0)
#endif
#endif
#ifdef DEBUG
#ifndef DPRINTF
#define DPRINTF(i,...) \
......@@ -191,16 +205,6 @@ do { printf("warning: "); printf(__VA_ARGS__); printf("\n"); } while(0)
#endif
#endif
#ifdef VERBOSE
#ifndef VERBOSE_PRINTF
#define VERBOSE_PRINTF(...) \
do { LOC; printf(__VA_ARGS__); } while(0)
#endif
#else
#ifndef VERBOSE_PRINTF
#define VERBOSE_PRINTF
#endif
#endif
#ifdef DUMPDEBUG
#ifndef DUMPDEBUG_PRINTF
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment