Fixed a bug involving multipel reads/writes to/from the same (or symmetrically...

Fixed a bug involving multipel reads/writes to/from the same (or symmetrically equivallent) index. Added a bool that allows read_all to unpack tensor (off by default).

Fixed a bug involving multipel reads/writes to/from the same (or symmetrically...
Fixed a bug involving multipel reads/writes to/from the same (or symmetrically equivallent) index. Added a bool that allows read_all to unpack tensor (off by default).
8316afe9 · solomon · 94b63d80 · 8316afe9 · 8316afe9 · 8316afe9
Commit 8316afe9 authored 9 years ago by solomon
5 changed files
--- a/src/interface/tensor.cxx
+++ b/src/interface/tensor.cxx
@@ -222,17 +222,17 @@ namespace CTF {


  template<typename dtype, bool is_ord>
-  void Tensor<dtype, is_ord>::read_all(int64_t * npair, dtype ** vals){
+  void Tensor<dtype, is_ord>::read_all(int64_t * npair, dtype ** vals, bool unpack){
    int ret;
-    ret = CTF_int::tensor::allread(npair, ((char**)vals));
+    ret = CTF_int::tensor::allread(npair, ((char**)vals), unpack);
    assert(ret == CTF_int::SUCCESS);
  }

  template<typename dtype, bool is_ord>
-  int64_t Tensor<dtype, is_ord>::read_all(dtype * vals){
+  int64_t Tensor<dtype, is_ord>::read_all(dtype * vals, bool unpack){
    int ret;
    int64_t npair;
-    ret = CTF_int::tensor::allread(&npair, (char*)vals);
+    ret = CTF_int::tensor::allread(&npair, (char*)vals, unpack);
    assert(ret == CTF_int::SUCCESS);
    return npair;
  }

--- a/src/interface/tensor.h
+++ b/src/interface/tensor.h
@@ -619,15 +619,19 @@ namespace CTF {
       * \brief collects the entire tensor data on each process (not memory scalable)
       * \param[out] npair number of values in the tensor
       * \param[out] data pointer to the data of the entire tensor
+       * \param[in] unpack if true any symmetric tensor is unpacked, otherwise only unique elements are read
       */
      void read_all(int64_t  * npair,
-                    dtype **   data);
+                    dtype **   data,
+                    bool       unpack=false);
      
      /**
       * \brief collects the entire tensor data on each process (not memory scalable)
       * \param[in,out] preallocated data pointer to the data of the entire tensor
+       * \param[in,out] preallocated data pointer to the data of the entire tensor
+       * \param[in] unpack if true any symmetric tensor is unpacked, otherwise only unique elements are read
       */
-      int64_t read_all(dtype * data);
+      int64_t read_all(dtype * data, bool unpack=false);

      /**
       * \brief obtains a small number of the biggest elements of the 

--- a/src/redistribution/sparse_rw.cxx
+++ b/src/redistribution/sparse_rw.cxx
@@ -622,21 +622,30 @@ namespace CTF_int {
              while (pr_offset < size && pairs[pr_offset].k() == pairs[pr_offset-1].k()){
  //              printf("found overlapped write of key %ld and value %lf\n", pairs[pr_offset].k, pairs[pr_offset].d);
                if (rw == 'r'){
+                  if (alpha == NULL){
+                    pairs[pr_offset].write_val(data + sr->el_size*(buf_offset+i));
+                  } else {
 //                  pairs[pr_offset].d = alpha*data[buf_offset+i]+beta*pairs[pr_offset].d;
-                  char wval[sr->pair_size()];
-                  sr->mul(alpha, data + sr->el_size*(buf_offset+i), wval);
-                  char wval2[sr->pair_size()];
-                  sr->mul(beta,  pairs[pr_offset].d(), wval2);
-                  sr->add(wval, wval2, wval);
-                  pairs[pr_offset].write_val(wval);
+                    char wval[sr->pair_size()];
+                    sr->mul(alpha, data + sr->el_size*(buf_offset+i), wval);
+                    char wval2[sr->pair_size()];
+                    sr->mul(beta,  pairs[pr_offset].d(), wval2);
+                    sr->add(wval, wval2, wval);
+                    pairs[pr_offset].write_val(wval);
+                  }
                } else {
+                  //FIXME: may be problematic if someone writes entries of a symmetric tensor redundantly
+                  if (alpha == NULL){
+                    sr->add(data + (buf_offset+i)*sr->el_size, 
+                            pairs[pr_offset].d(),
+                            data + (buf_offset+i)*sr->el_size);
+                  } else {
                  //data[(int64_t)buf_offset+i] = beta*data[(int64_t)buf_offset+i]+alpha*pairs[pr_offset].d;
-                  char wval[sr->pair_size()];
-                  sr->mul(alpha,  pairs[pr_offset].d(), wval);
-                  char wval2[sr->pair_size()];
-                  sr->copy(wval2, data + ((int64_t)buf_offset+i)*sr->el_size);
-                  sr->add(wval, wval2, wval);
-                  sr->copy(data + sr->el_size*(buf_offset+i), wval);
+                    char wval[sr->pair_size()];
+                    sr->mul(alpha,  pairs[pr_offset].d(), wval);
+                    sr->add(wval, data + sr->el_size*(buf_offset+i), wval);
+                    sr->copy(data + sr->el_size*(buf_offset+i), wval);
+                  }
                }
  //              printf("rw = %c found overlapped write and set value to %lf\n", rw, data[(int64_t)buf_offset+i]);
                pr_offset++;

--- a/src/tensor/untyped_tensor.cxx
+++ b/src/tensor/untyped_tensor.cxx
@@ -1051,7 +1051,7 @@ namespace CTF_int {
    }
  }

-  PairIterator tensor::read_all_pairs(int64_t * num_pair){
+  PairIterator tensor::read_all_pairs(int64_t * num_pair, bool unpack){
    int numPes;
    int * nXs;
    int nval, n, i;
@@ -1063,7 +1063,29 @@ namespace CTF_int {
      *num_pair = 0;
      return PairIterator(sr, NULL);
    }
-
+    //unpack symmetry
+    if (unpack){
+      bool is_nonsym=true;
+      for (int i=0; i<order; i++){
+        if (sym[i] != NS){
+          is_nonsym = false;
+        }
+      }
+      if (!is_nonsym){
+        int sym_A[order];
+        std::fill(sym_A, sym_A+order, NS);
+        int idx_A[order];
+        for (int i=0; i<order; i++){
+          idx_A[i] = i;
+        }
+        tensor tA(sr, order, lens, sym_A, wrld, 1);
+        tA.is_home = 0;
+        tA.has_home = 0;
+        summation st(this, idx_A, sr->mulid(), &tA, idx_A, sr->mulid());
+        st.execute();
+        return tA.read_all_pairs(num_pair, false);
+      }
+    }
    alloc_ptr(numPes*sizeof(int), (void**)&nXs);
    alloc_ptr(numPes*sizeof(int), (void**)&pXs);
    pXs[0] = 0;
@@ -1094,8 +1116,9 @@ namespace CTF_int {
  }

  int tensor::allread(int64_t * num_pair,
-                      char **   all_data){
-    PairIterator ipr = read_all_pairs(num_pair);
+                      char **   all_data,
+                      bool      unpack){
+    PairIterator ipr = read_all_pairs(num_pair, unpack);
    char * ball_data = (char*)alloc(sr->el_size*(*num_pair));
    for (int64_t i=0; i<*num_pair; i++){
      ipr[i].read_val(ball_data+i*sr->el_size);
@@ -1106,8 +1129,9 @@ namespace CTF_int {
  }

  int tensor::allread(int64_t * num_pair,
-                      char *    all_data){
-    PairIterator ipr = read_all_pairs(num_pair);
+                      char *    all_data,
+                      bool      unpack){
+    PairIterator ipr = read_all_pairs(num_pair, unpack);
    for (int64_t i=0; i<*num_pair; i++){
      ipr[i].read_val(all_data+i*sr->el_size);
    }

--- a/src/tensor/untyped_tensor.h
+++ b/src/tensor/untyped_tensor.h
@@ -41,7 +41,7 @@ namespace CTF_int {
       * \param[out] num_pair number of values read
       * return pair iterator with allocated all pairs read 
       */
-      PairIterator read_all_pairs(int64_t * num_pair);
+      PairIterator read_all_pairs(int64_t * num_pair, bool unpack);

      /** 
       * \brief copies all tensor data from other
@@ -290,18 +290,22 @@ namespace CTF_int {
       *         WARNING: will use an 'unscalable' amount of memory. 
       * \param[out] num_pair number of values read
       * \param[in,out] mapped_data values read (allocated by library)
+       * \param[in] unpack if true any symmetric tensor is unpacked, otherwise only unique elements are read
       */
      int allread(int64_t * num_pair,
-                  char **   all_data);
+                  char **   all_data,
+                  bool      unpack);

      /**
       * \brief read entire tensor with each processor (in packed layout).
       *         WARNING: will use an 'unscalable' amount of memory. 
       * \param[out] num_pair number of values read
       * \param[in,out] preallocated mapped_data values read
+       * \param[in] unpack if true any symmetric tensor is unpacked, otherwise only unique elements are read
       */
      int allread(int64_t * num_pair,
-                  char *    all_data);
+                  char *    all_data,
+                  bool      unpack);

       /**
       * \brief cuts out a slice (block) of this tensor = B