Commit 8316afe9 authored by solomon's avatar solomon
Browse files

Fixed a bug involving multipel reads/writes to/from the same (or symmetrically...

Fixed a bug involving multipel reads/writes to/from the same (or symmetrically equivallent) index.  Added a bool that allows read_all to unpack tensor (off by default).
parent 94b63d80
......@@ -222,17 +222,17 @@ namespace CTF {
template<typename dtype, bool is_ord>
void Tensor<dtype, is_ord>::read_all(int64_t * npair, dtype ** vals){
void Tensor<dtype, is_ord>::read_all(int64_t * npair, dtype ** vals, bool unpack){
int ret;
ret = CTF_int::tensor::allread(npair, ((char**)vals));
ret = CTF_int::tensor::allread(npair, ((char**)vals), unpack);
assert(ret == CTF_int::SUCCESS);
}
template<typename dtype, bool is_ord>
int64_t Tensor<dtype, is_ord>::read_all(dtype * vals){
int64_t Tensor<dtype, is_ord>::read_all(dtype * vals, bool unpack){
int ret;
int64_t npair;
ret = CTF_int::tensor::allread(&npair, (char*)vals);
ret = CTF_int::tensor::allread(&npair, (char*)vals, unpack);
assert(ret == CTF_int::SUCCESS);
return npair;
}
......
......@@ -619,15 +619,19 @@ namespace CTF {
* \brief collects the entire tensor data on each process (not memory scalable)
* \param[out] npair number of values in the tensor
* \param[out] data pointer to the data of the entire tensor
* \param[in] unpack if true any symmetric tensor is unpacked, otherwise only unique elements are read
*/
void read_all(int64_t * npair,
dtype ** data);
dtype ** data,
bool unpack=false);
/**
* \brief collects the entire tensor data on each process (not memory scalable)
* \param[in,out] preallocated data pointer to the data of the entire tensor
* \param[in,out] preallocated data pointer to the data of the entire tensor
* \param[in] unpack if true any symmetric tensor is unpacked, otherwise only unique elements are read
*/
int64_t read_all(dtype * data);
int64_t read_all(dtype * data, bool unpack=false);
/**
* \brief obtains a small number of the biggest elements of the
......
......@@ -622,21 +622,30 @@ namespace CTF_int {
while (pr_offset < size && pairs[pr_offset].k() == pairs[pr_offset-1].k()){
// printf("found overlapped write of key %ld and value %lf\n", pairs[pr_offset].k, pairs[pr_offset].d);
if (rw == 'r'){
if (alpha == NULL){
pairs[pr_offset].write_val(data + sr->el_size*(buf_offset+i));
} else {
// pairs[pr_offset].d = alpha*data[buf_offset+i]+beta*pairs[pr_offset].d;
char wval[sr->pair_size()];
sr->mul(alpha, data + sr->el_size*(buf_offset+i), wval);
char wval2[sr->pair_size()];
sr->mul(beta, pairs[pr_offset].d(), wval2);
sr->add(wval, wval2, wval);
pairs[pr_offset].write_val(wval);
char wval[sr->pair_size()];
sr->mul(alpha, data + sr->el_size*(buf_offset+i), wval);
char wval2[sr->pair_size()];
sr->mul(beta, pairs[pr_offset].d(), wval2);
sr->add(wval, wval2, wval);
pairs[pr_offset].write_val(wval);
}
} else {
//FIXME: may be problematic if someone writes entries of a symmetric tensor redundantly
if (alpha == NULL){
sr->add(data + (buf_offset+i)*sr->el_size,
pairs[pr_offset].d(),
data + (buf_offset+i)*sr->el_size);
} else {
//data[(int64_t)buf_offset+i] = beta*data[(int64_t)buf_offset+i]+alpha*pairs[pr_offset].d;
char wval[sr->pair_size()];
sr->mul(alpha, pairs[pr_offset].d(), wval);
char wval2[sr->pair_size()];
sr->copy(wval2, data + ((int64_t)buf_offset+i)*sr->el_size);
sr->add(wval, wval2, wval);
sr->copy(data + sr->el_size*(buf_offset+i), wval);
char wval[sr->pair_size()];
sr->mul(alpha, pairs[pr_offset].d(), wval);
sr->add(wval, data + sr->el_size*(buf_offset+i), wval);
sr->copy(data + sr->el_size*(buf_offset+i), wval);
}
}
// printf("rw = %c found overlapped write and set value to %lf\n", rw, data[(int64_t)buf_offset+i]);
pr_offset++;
......
......@@ -1051,7 +1051,7 @@ namespace CTF_int {
}
}
PairIterator tensor::read_all_pairs(int64_t * num_pair){
PairIterator tensor::read_all_pairs(int64_t * num_pair, bool unpack){
int numPes;
int * nXs;
int nval, n, i;
......@@ -1063,7 +1063,29 @@ namespace CTF_int {
*num_pair = 0;
return PairIterator(sr, NULL);
}
//unpack symmetry
if (unpack){
bool is_nonsym=true;
for (int i=0; i<order; i++){
if (sym[i] != NS){
is_nonsym = false;
}
}
if (!is_nonsym){
int sym_A[order];
std::fill(sym_A, sym_A+order, NS);
int idx_A[order];
for (int i=0; i<order; i++){
idx_A[i] = i;
}
tensor tA(sr, order, lens, sym_A, wrld, 1);
tA.is_home = 0;
tA.has_home = 0;
summation st(this, idx_A, sr->mulid(), &tA, idx_A, sr->mulid());
st.execute();
return tA.read_all_pairs(num_pair, false);
}
}
alloc_ptr(numPes*sizeof(int), (void**)&nXs);
alloc_ptr(numPes*sizeof(int), (void**)&pXs);
pXs[0] = 0;
......@@ -1094,8 +1116,9 @@ namespace CTF_int {
}
int tensor::allread(int64_t * num_pair,
char ** all_data){
PairIterator ipr = read_all_pairs(num_pair);
char ** all_data,
bool unpack){
PairIterator ipr = read_all_pairs(num_pair, unpack);
char * ball_data = (char*)alloc(sr->el_size*(*num_pair));
for (int64_t i=0; i<*num_pair; i++){
ipr[i].read_val(ball_data+i*sr->el_size);
......@@ -1106,8 +1129,9 @@ namespace CTF_int {
}
int tensor::allread(int64_t * num_pair,
char * all_data){
PairIterator ipr = read_all_pairs(num_pair);
char * all_data,
bool unpack){
PairIterator ipr = read_all_pairs(num_pair, unpack);
for (int64_t i=0; i<*num_pair; i++){
ipr[i].read_val(all_data+i*sr->el_size);
}
......
......@@ -41,7 +41,7 @@ namespace CTF_int {
* \param[out] num_pair number of values read
* return pair iterator with allocated all pairs read
*/
PairIterator read_all_pairs(int64_t * num_pair);
PairIterator read_all_pairs(int64_t * num_pair, bool unpack);
/**
* \brief copies all tensor data from other
......@@ -290,18 +290,22 @@ namespace CTF_int {
* WARNING: will use an 'unscalable' amount of memory.
* \param[out] num_pair number of values read
* \param[in,out] mapped_data values read (allocated by library)
* \param[in] unpack if true any symmetric tensor is unpacked, otherwise only unique elements are read
*/
int allread(int64_t * num_pair,
char ** all_data);
char ** all_data,
bool unpack);
/**
* \brief read entire tensor with each processor (in packed layout).
* WARNING: will use an 'unscalable' amount of memory.
* \param[out] num_pair number of values read
* \param[in,out] preallocated mapped_data values read
* \param[in] unpack if true any symmetric tensor is unpacked, otherwise only unique elements are read
*/
int allread(int64_t * num_pair,
char * all_data);
char * all_data,
bool unpack);
/**
* \brief cuts out a slice (block) of this tensor = B
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment