Commit 2ea09bc8 authored by solomon's avatar solomon
Browse files

some progress on symmetrization and summation code restructuring

parent 3e049698
......@@ -2,7 +2,7 @@ include ../config.mk
ctf: subdirs
SUBDIRS = interface shared tensor scaling summation contraction mapping redistribution test bench studies
SUBDIRS = interface shared tensor symmetry mapping redistribution scaling summation contraction test bench studies
.PHONY: subdirs $(SUBDIRS)
......
......@@ -60,7 +60,6 @@ namespace CTF {
void (*gemm)(char,char,int,int,int,dtype,dtype const*,dtype const*,dtype,dtype*);
void (*axpy)(int,dtype,dtype const*,int,dtype*,int);
void (*scal)(int,dtype,dtype*,int);
public:
/**
* \brief default constructor valid for only certain types:
* bool, int, unsigned int, int64_t, uint64_t,
......
......@@ -571,12 +571,12 @@ namespace CTF_int {
}
int summation::sym_sum_tsr(bool run_diag){
int stat, sidx, i, nst_B, * new_idx_map;
int sidx, i, nst_B, * new_idx_map;
int * map_A, * map_B;
int ** dstack_map_B;
tensor * tnsr_A, * tnsr_B, * new_tsr, ** dstack_tsr_B;
std::vector<summation> perm_types;
std::vector< char * > signs;
std::vector<int> signs;
char const * dbeta;
//#if (DEBUG >= 1 || VERBOSE >= 1)
// print_sum(type,alpha_,beta);
......@@ -625,12 +625,14 @@ namespace CTF_int {
map_B = new_idx_map;
}
summation newsum = summation(*this);
newsum.A = tnsr_A;
newsum.B = tnsr_B;
if (tnsr_A == tnsr_B){
new_tsr = new tensor(tnsr_A);
summation newsum = summation(*this);
newsum.A = new_tsr;
newsum.B = tnsr_B;
newsum.sym_sum_tsr(run_diag);
return newsum.sym_sum_tsr(run_diag);
/*clone_tensor(ntid_A, 1, &new_tid);
new_type = *type;
......@@ -639,6 +641,7 @@ namespace CTF_int {
del_tsr(new_tid);
return stat;*/
}
/* new_type.tid_A = ntid_A;
new_type.tid_B = ntid_B;
new_type.idx_map_A = map_A;
......@@ -700,47 +703,57 @@ namespace CTF_int {
DPRINTF(1,"Performing index desymmetrization\n");
desymmetrize(tnsr_A, unfold_sum->A, 0);
unfold_sum->B = tnsr_B;
unfold_sum.sym_sum_tr(run_diag);/
unfold_sum->sym_sum_tsr(run_diag);
// sym_sum_tsr(alpha, beta, &unfold_type, ftsr, felm, run_diag);
if (ntid_A != unfold_type.tid_A){
if (tnsr_A != unfold_sum->A){
unfold_sum->A->unfold();
ntsr_A->pull_alias(unfold_sum->A);
tnsr_A->pull_alias(unfold_sum->A);
delete unfold_sum->A;
}
} else {
get_sym_perms(&new_type, alpha, perm_types, signs);
//get_sym_perms(&new_type, alpha, perm_types, signs);
get_sym_perms(newsum, perm_types, signs);
if (A->wrld->cdt.rank == 0)
DPRINTF(1,"Performing %d summation permutations\n",
(int)perm_types.size());
dbeta = beta;
char * new_alpha = (char*)malloc(tnsr_B->sr.el_size);
for (i=0; i<(int)perm_types.size(); i++){
sum_tensors(signs[i], dbeta, perm_types[i].tid_A, perm_types[i].tid_B,
perm_types[i].idx_map_A, perm_types[i].idx_map_B, ftsr, felm, run_diag);
dbeta = 1.0;
if (signs[i] == 1)
B->sr.copy(new_alpha, alpha);
else
tnsr_B->sr.addinv(alpha, new_alpha);
perm_types[i].alpha = new_alpha;
perm_types[i].beta = dbeta;
perm_types[i].execute();
/*sum_tensors(new_alpha, dbeta, perm_types[i].tid_A, perm_types[i].tid_B,
perm_types[i].idx_map_A, perm_types[i].idx_map_B, ftsr, felm, run_diag);*/
dbeta = newsum.B->sr.addid;
}
for (i=0; i<(int)perm_types.size(); i++){
/* for (i=0; i<(int)perm_types.size(); i++){
free_type(&perm_types[i]);
}
}*/
perm_types.clear();
signs.clear();
}
CTF_free(unfold_type.idx_map_A);
CTF_free(unfold_type.idx_map_B);
} else {
sum_tensors(alpha, beta, new_type.tid_A, new_type.tid_B, new_type.idx_map_A,
new_type.idx_map_B, ftsr, felm, run_diag);
newsum.sum_tensors(run_diag);
/* sum_tensors(alpha, beta, new_type.tid_A, new_type.tid_B, new_type.idx_map_A,
new_type.idx_map_B, ftsr, felm, run_diag);*/
}
if (ntid_A != type->tid_A) del_tsr(ntid_A);
if (tnsr_A != A) delete tnsr_A;
for (i=nst_B-1; i>=0; i--){
extract_diag(dstack_tid_B[i], dstack_map_B[i], 0, &ntid_B, &new_idx_map);
del_tsr(ntid_B);
ntid_B = dstack_tid_B[i];
// extract_diag(dstack_tid_B[i], dstack_map_B[i], 0, &ntid_B, &new_idx_map);
dstack_tsr_B[i]->extract_diag(dstack_map_B[i], 0, tnsr_B, &new_idx_map);
//del_tsr(ntid_B);
delete tnsr_B;
tnsr_B = dstack_tsr_B[i];
}
ASSERT(ntid_B == type->tid_B);
ASSERT(tnsr_B == B);
CTF_free(map_A);
CTF_free(map_B);
CTF_free(dstack_map_B);
CTF_free(dstack_tid_B);
CTF_free(dstack_tsr_B);
return SUCCESS;
}
......
......@@ -3,6 +3,9 @@
#ifndef __SYM_INDICES_HXX__
#define __SYM_INDICES_HXX__
#include "../interface/common.h"
#include "../interface/tensor.h"
struct index_locator_
{
int sort;
......@@ -476,17 +479,12 @@ template int align_symmetric_indices<char*>(int order_A, char*& idx_A, const int
int order_C, char*& idx_C, const int* sym_C);
template int overcounting_factor<char*>(int order_A, const char*& idx_A, const int* sym_A,
int order_B, const char*& idx_B, const int* sym_B,
int order_C, const char*& idx_C, const int* sym_C);
template int overcounting_factor<char*>(int order_A, const char*& idx_A, const int* sym_A,
int order_B, const char*& idx_B, const int* sym_B,
int order_C, const char*& idx_C, const int* sym_C);
template int overcounting_factor<char*>(int order_A, char * const & idx_A, const int* sym_A,
int order_B, char * const & idx_B, const int* sym_B,
int order_C, char * const & idx_C, const int* sym_C);
template int overcounting_factor<char*>(int order_A, const char*& idx_A, const int* sym_A,
int order_B, const char*& idx_B, const int* sym_B);
template int overcounting_factor<char*>(int order_A, char * const & idx_A, const int* sym_A,
int order_B, char * const & idx_B, const int* sym_B);
#endif
#include "symmetrization.h"
#include "../shared/util.h"
namespace CTF_int {
......@@ -7,7 +8,7 @@ namespace CTF_int {
bool is_C){
int i, is, j, sym_dim, scal_diag, num_sy, num_sy_neg, ctid;
int * idx_map_A, * idx_map_B;
double rev_sign;
int rev_sign;
if (sym_tsr == nonsym_tsr) return;
......@@ -15,14 +16,14 @@ namespace CTF_int {
sym_dim = -1;
is = -1;
rev_sign = 1.0;
rev_sign = 1;
scal_diag = 0;
num_sy=0;
num_sy_neg=0;
for (i=0; i<sym_tsr->order; i++){
if (sym_tsr->sym[i] != nonsym_tsr->sym[i]){
is = i;
if (sym_tsr->sym[i] == AS) rev_sign = -1.0;
if (sym_tsr->sym[i] == AS) rev_sign = -1;
if (sym_tsr->sym[i] == SY){
scal_diag = 1;
}
......@@ -47,7 +48,7 @@ namespace CTF_int {
nonsym_tsr->set_padding();
copy_mapping(sym_tsr->order, sym_tsr->edge_map, nonsym_tsr->edge_map);
nonsym_tsr->is_mapped = 1;
nonsym_tsr->itopo = sym_tsr->itopo;
nonsym_tsr->topo = sym_tsr->topo;
nonsym_tsr->set_padding();
if (sym_dim == -1) {
......@@ -64,13 +65,13 @@ namespace CTF_int {
strcpy(spf,"desymmetrize_");
strcat(spf,sym_tsr->name);
CTF::Timer t_pf(spf);
if (global_comm.rank == 0)
if (sym_tsr->wrld->rank == 0)
VPRINTF(1,"Desymmetrizing %s\n", sym_tsr->name);
t_pf.start();
}
CTF_mst_alloc_ptr(nonsym_tsr->size*nonsym_tsr->sr.el_size, (void**)&nonsym_tsr->data);
nonsym_tsr->sr.set(nonsym_tsr->data, nonsym_tsr->size, nonsym_tsr->sr.addid);
nonsym_tsr->sr.set(nonsym_tsr->data, nonsym_tsr->sr.addid, nonsym_tsr->size);
CTF_alloc_ptr(sym_tsr->order*sizeof(int), (void**)&idx_map_A);
CTF_alloc_ptr(sym_tsr->order*sizeof(int), (void**)&idx_map_B);
......@@ -83,7 +84,7 @@ namespace CTF_int {
if (!is_C){
tensor * ctsr = sym_tsr;
if (scal_diag && num_sy+num_sy_neg==1){
ctsr = new tensor(stsr);
ctsr = new tensor(sym_tsr);
ctsr->sym[is] = SH;
ctsr->zero_out_padding();
ctsr->sym[is] = SY;
......@@ -236,7 +237,7 @@ namespace CTF_int {
}
}
if (sym_dim == -1) {
sym_tsr->itopo = nonsym_tsr->itopo;
sym_tsr->topo = nonsym_tsr->topo;
sym_tsr->is_mapped = 1;
copy_mapping(nonsym_tsr->order, nonsym_tsr->edge_map, sym_tsr->edge_map);
sym_tsr->set_padding();
......
......@@ -3,6 +3,8 @@
#include "assert.h"
#include "../tensor/untyped_tensor.h"
#include "../summation/summation.h"
#include "../contraction/contraction.h"
namespace CTF_int {
/**
......@@ -36,7 +38,7 @@ namespace CTF_int {
int const * sym,
int * nperm,
int ** perm,
double * sign){
double * sign);
/**
* \brief orders the summation indices of one tensor
......@@ -51,8 +53,8 @@ namespace CTF_int {
*/
void order_perm(summation const & sum,
int * idx_arr,
int const off_A,
int const off_B,
int off_A,
int off_B,
int & add_sign,
int & mod);
......@@ -117,8 +119,8 @@ namespace CTF_int {
* \param[out] signs sign of each summation
*/
void get_sym_perms(summation const & sum,
std::vector<summation>& perms,
std::vector<int>& signs);
std::vector<summation>& perms,
std::vector<int>& signs);
/**
* \brief finds all permutations of acontraction
......
......@@ -46,6 +46,8 @@ namespace CTF_int {
public:
/** \brief size of each element of semiring in bytes */
int el_size;
/** \brief true if an additive inverse is provided */
bool is_ring;
/** \brief identity element for addition i.e. 0 */
char * addid;
/** \brief identity element for multiplication i.e. 1 */
......
......@@ -167,6 +167,12 @@ namespace CTF_int {
this->edge_map[i].has_child = 0;
this->edge_map[i].np = 1;
if (this->sym[i] != NS) {
if (this->sym[i] == AS && !sr.is_ring){
if (wrld->rank == 0){
printf("CTF ERROR: It is illegal to define antisymmetric tensor must be defined on a ring, yet no additive inverse was provided for this semiring (see semiring constructor), aborting.\n");
}
ABORT;
}
this->sym_table[(i+1)+i*order] = 1;
this->sym_table[(i+1)*order+i] = 1;
}
......@@ -1211,7 +1217,60 @@ namespace CTF_int {
}
int tensor::zero_out_padding(){
int i, num_virt, idx_lyr;
int64_t np;
int * virt_phase, * virt_phys_rank, * phys_phase;
mapping * map;
TAU_FSTART(zero_out_padding);
if (this->has_zero_edge_len){
return SUCCESS;
}
this->unfold();
this->set_padding();
if (!this->is_mapped){
return SUCCESS;
} else {
np = this->size;
CTF_alloc_ptr(sizeof(int)*this->order, (void**)&virt_phase);
CTF_alloc_ptr(sizeof(int)*this->order, (void**)&phys_phase);
CTF_alloc_ptr(sizeof(int)*this->order, (void**)&virt_phys_rank);
num_virt = 1;
idx_lyr = wrld->rank;
for (i=0; i<this->order; i++){
/* Calcute rank and phase arrays */
map = this->edge_map + i;
phys_phase[i] = map->calc_phase();
virt_phase[i] = phys_phase[i]/map->calc_phys_phase();
virt_phys_rank[i] = map->calc_phys_rank(topo)*virt_phase[i];
num_virt = num_virt*virt_phase[i];
if (map->type == PHYSICAL_MAP)
idx_lyr -= topo->lda[map->cdt]
*virt_phys_rank[i]/virt_phase[i];
}
if (idx_lyr == 0){
zero_padding(this->order, np, num_virt,
this->pad_edge_len, this->sym, this->padding,
phys_phase, virt_phase, virt_phys_rank, this->data, sr);
} else {
std::fill(this->data, this->data+np, 0.0);
}
CTF_free(virt_phase);
CTF_free(phys_phase);
CTF_free(virt_phys_rank);
}
TAU_FSTOP(zero_out_padding);
return SUCCESS;
}
}
......
......@@ -173,6 +173,11 @@ namespace CTF_int {
*/
int set_zero();
/**
* \brief sets padded portion of tensor to zero (this should be maintained internally)
*/
int zero_out_padding();
/**
* \brief displays mapping information
* \param[in] stream output log (e.g. stdout)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment