d0/d53/a00232_source.html

00001 /* This file is part of the ivl C++ library <http://image.ntua.gr/ivl>.
00002    A C++ template library extending syntax towards mathematical notation.
00003
00004    Copyright (C) 2012 Yannis Avrithis <iavr@image.ntua.gr>
00005    Copyright (C) 2012 Kimon Kontosis <kimonas@image.ntua.gr>
00006
00007    ivl is free software; you can redistribute it and/or modify
00008    it under the terms of the GNU Lesser General Public License
00009    version 3 as published by the Free Software Foundation.
00010
00011    Alternatively, you can redistribute it and/or modify it under the terms
00012    of the GNU General Public License version 2 as published by the Free
00013    Software Foundation.
00014
00015    ivl is distributed in the hope that it will be useful,
00016    but WITHOUT ANY WARRANTY; without even the implied warranty of
00017    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
00018    See the GNU General Public License for more details.
00019
00020    You should have received a copy of the GNU General Public License
00021    and a copy of the GNU Lesser General Public License along
00022    with ivl. If not, see <http://www.gnu.org/licenses/>. */
00023
00024 #ifndef IVL_CORE_DETAILS_LOOPS_LOOPS_ND
00025 #define IVL_CORE_DETAILS_LOOPS_LOOPS_ND
00026
00027 namespace ivl {
00028
00029 namespace loops {
00030
00031 template <class F, class T, class S, class J, class D>
00032 void loop(array_nd<T, S>& out, const array_nd<J, D>& in)
00033 {
00034         typedef array_nd<T, S> out_t;
00035         typedef const array_nd<J, D> in_t;
00036
00037         int id[2] = {0, 0};
00038         int j = 0;
00039
00040         for(size_t i = 0; i < out.ndims() && j < 2; i++) {
00041                 if(out.size(i) > 1) id[j++] = int(i);
00042         }
00043
00044         loop_base_nd<1, F, out_t, in_t,
00045                 typename out_t::optimal_access_tag,
00046                 typename in_t::optimal_access_tag,
00047                 typename out_t::has_specialized_iter,
00048                 typename in_t::has_specialized_iter>::
00049                         exec(out, in, id[0], id[1]);
00050
00051 }
00052
00053 template <class F, class T, class S, class J, class D>
00054 void loop_ww(array_nd<T, S>& out, array_nd<J, D>& in)
00055 {
00056         typedef array_nd<T, S> out_t;
00057         typedef array_nd<J, D> in_t;
00058
00059         int id[2] = {0, 0};
00060         int j = 0;
00061
00062         for(size_t i = 0; i < out.ndims() && j < 2; i++) {
00063                 if(out.size(i) > 1) id[j++] = int(i);
00064         }
00065
00066         loop_base_nd<1, F, out_t, in_t,
00067                 typename out_t::optimal_access_tag,
00068                 typename in_t::optimal_access_tag,
00069                 typename out_t::has_specialized_iter,
00070                 typename in_t::has_specialized_iter>::
00071                         exec(out, in, id[0], id[1]);
00072
00073 }
00074
00076 template<
00077         class F, class A1, class A2,
00078         class I_OPTIMAL_ACCESS,
00079         class O_ITER_1, class I_ITER_1, class O_ITER_2
00080 >
00081 inline
00082 void loop_base_nd<3, F, A1, A2,
00083         ivl::data::nd_seq_optimal_tag, I_OPTIMAL_ACCESS,
00084         types::t_false, types::t_false,
00085         O_ITER_1, I_ITER_1, O_ITER_2, types::term>::
00086         exec(A1& out, A2& in,
00087                 int inner_dim_1, int inner_dim_2)
00088 {
00089         // Remark: as a contradiction to the coding style, this function is
00090         // way more lengthy than expected. However this is because it was
00091         // decided by kimonas that this is the best and simplest design for that
00092         // particular function which induces complications to improve speed.
00093
00094         // type definitions
00095         typedef A1 out_t;
00096         typedef A2 in_t;
00097
00098         typedef typename out_t::_fast_iterator_nd iter_t;
00099         typedef typename iter_t::iter_nd_border_walker brd_t;
00100
00101         // input iterator
00102         I_ITER_1 it_src = in.begin();
00103
00104         // TODO:
00105         // assert that the dimensions are the same
00106
00107         // quickly proceed for 1d arrays
00108         if(out.length() == 0) return;
00109         if(out.ndims() == 1) {
00110                 O_ITER_1 it = out._fast_begin(0);
00111                 //todo:!!bug. out != in iterator???
00112                 for(O_ITER_1 it_end = out._fast_last(0); it != it_end; ++it) {
00113                         F::elem_op(*it, *it_src);
00114                         ++it_src;
00115                 }
00116                 F::elem_op(*it, *it_src);
00117                 return;
00118         }
00119
00120         // arrays with info about the dimensions
00121         internal::little_arrayling<iter_t> iters(out.ndims());
00122         internal::little_arrayling<brd_t> first_to_last(out.ndims());
00123         internal::little_arrayling<size_t> pos(out.ndims());
00124         internal::little_arrayling<size_t> length(out.ndims());
00125         internal::little_arrayling<size_t> dim_map(out.ndims());
00126
00127         size_t i, j = 0; // j: the number of total dims after filtering
00128         length[1] = 1;
00129
00130         // remove all singleton dims. in case all need removal, keep the last dim.
00131         for(i = 0; i < out.ndims(); i++) {
00132                 if(out.size(i) > 1 || (j == 0 && i == out.ndims() - 1)) {
00133                         iters[j] = out._fast_begin(i);
00134                         length[j] = out.size(i);
00135                         first_to_last[j] = out.first_to_last(j);
00136                         pos[j] = 0;
00137                         dim_map[j] = i;
00138                         j++;
00139                 }
00140         }
00141
00142         // sorting dim0 <-> dim1 for speed improvement.
00143         /* IS THIS POSSIBLE??? and how do i recognize to use ITER_1 or ITER_0 ??
00144         if(j > 1 && out.size(dim_map[1]) > 2 * out.size(dim_map[0])
00145                         && out.size(dim_map[0]) < 100) {
00146                 std::swap(iters[0], iters[1]);
00147                 std::swap(length[0], length[1]);
00148                 std::swap(first_to_last[0], first_to_last[1]);
00149                 std::swap(dim_map[0], dim_map[1]);
00150         }*/
00151
00152         // local variables for the inner dimensions to improve speed.
00153         O_ITER_1 it0 = iters[0];
00154         O_ITER_1 it0_end;
00155         O_ITER_2 it1 = iters[1];
00156         brd_t first_to_last1 = first_to_last[1];
00157         brd_t first_to_last0 = out.first_to_last(dim_map[0]);
00158         size_t length1 = length[1];
00159         size_t pos1 = 0;
00160
00161         // special loop for 1 or 2 dims (after filtering)
00162         if(j <= 2) {
00163                 it0_end = out._fast_last(dim_map[0]);
00164                 for(; it0 != it0_end; ++it0) {
00165                         F::elem_op(*it0, *it_src);
00166                         ++it_src;
00167                 }
00168                 F::elem_op(*it0, *it_src);
00169                 ++it_src;
00170                 //it0_end = out._end(dim_map[0]);
00171                 while(++pos1 < length1) {
00172                         it0 -= first_to_last0;
00173                         it0.inc_along_other_dim_at_begin(it1);
00174
00175                         for(it0_end = it0 + first_to_last0; it0 != it0_end; ++it0) {
00176                                 F::elem_op(*it0, *it_src);
00177                                 ++it_src;
00178                         }
00179                         F::elem_op(*it0, *it_src);
00180                         ++it_src;
00181                         //it0_end.inc_along(it1);
00182                         // TODO: consider using this approach of it0_end
00183                         // as a possible speed improvement in the generic loop. need bench.
00184                 }
00185                 return;
00186         }
00187
00188         // loop for more than 2 dims.
00189         do {
00190                 //it0 + begin_to_end0; wipe these two strange lines
00191                 //it0 != it0_end;
00192
00193                 for(it0_end = it0 + first_to_last0; it0 != it0_end; ++it0) {
00194                         //*it0;
00195                         //*it_src;
00196                         F::elem_op(*it0, *it_src);
00197                         ++it_src;
00198                 }
00199                 F::elem_op(*it0, *it_src);
00200                 ++it_src;
00201
00202                 it0 -= first_to_last0;
00203                 if(++pos1 < length1) {
00204                         it0.inc_along_other_dim_at_begin(it1);
00205                 }
00206                 else {
00207                         it0 -= first_to_last1;
00208                         pos1 = 0;
00209                         i = 2;
00210                         while(++pos[i] >= length[i]) {
00211                                 pos[i] = 0;
00212                                 it0 -= first_to_last[i];
00213                                 i++;
00214                                 if(i >= j) return; // loop is broken here.
00215                         }
00216                         it0.inc_along_other_dim_at_begin(iters[i]);
00217                 }
00218         } while(1);
00219 }
00220
00221
00223 template<
00224         class F, class A1, class A2,
00225         class O_OPTIMAL_ACCESS,
00226         class O_ITER_1, class I_ITER_1, class I_ITER_2
00227 >
00228 inline
00229 void loop_base_nd<3, F, A1, A2,
00230         O_OPTIMAL_ACCESS, ivl::data::nd_seq_optimal_tag,
00231         types::t_false, types::t_false,
00232         O_ITER_1, I_ITER_1, types::term, I_ITER_2>::
00233         exec(A1& out, A2& in,
00234                 int inner_dim_1, int inner_dim_2)
00235 {
00236         // Remark: as a contradiction to the coding style, this function is
00237         // way more lengthy than expected. However this is because it was
00238         // decided by kimonas that this is the best and simplest design for that
00239         // particular function which induces complications to improve speed.
00240
00241         // type definitions
00242         typedef A1 out_t;
00243         typedef A2 in_t;
00244
00245         typedef typename types::best_fast_iterator_nd<in_t>::type iter_t;
00246         typedef typename iter_t::iter_nd_border_walker brd_t;
00247
00248         // input iterator
00249         O_ITER_1 it_dst = out.begin();
00250
00251         // TODO:
00252         // assert that the dimensions are the same
00253
00254         // quickly proceed for 1d arrays
00255         if(out.length() == 0) return;
00256         // TODO: ????? why is here it_dst and in some other places it_src??
00257         if(in.ndims() == 1) {
00258                 I_ITER_1 it = in._fast_begin(0);
00259                 for(I_ITER_1 it_end = in._fast_last(0); it != it_end; ++it) {
00260                         F::elem_op(*it_dst, *it);
00261                         ++it_dst;
00262                 }
00263                 F::elem_op(*it_dst, *it);
00264                 ++it_dst;
00265                 return;
00266         }
00267
00268         // arrays with info about the dimensions
00269         internal::little_arrayling<iter_t> iters(in.ndims());
00270         internal::little_arrayling<brd_t> first_to_last(in.ndims());
00271         internal::little_arrayling<size_t> pos(in.ndims());
00272         internal::little_arrayling<size_t> length(in.ndims());
00273         internal::little_arrayling<size_t> dim_map(in.ndims());
00274
00275         size_t i, j = 0; // j: the number of total dims after filtering
00276         length[1] = 1;
00277
00278         // remove all singleton dims. in case all need removal, keep the last dim.
00279         for(i = 0; i < in.ndims(); i++) {
00280                 if(in.size(i) > 1 || (j == 0 && i == in.ndims() - 1)) {
00281                         iters[j] = in._fast_begin(i);
00282                         length[j] = in.size(i);
00283                         first_to_last[j] = in.first_to_last(j);
00284                         pos[j] = 0;
00285                         dim_map[j] = i;
00286                         j++;
00287                 }
00288         }
00289
00290         // sorting dim0 <-> dim1 for speed improvement.
00291         // TODO: on all others!!!!!!
00292         /* IS THIS POSSIBLE??? and how do i recognize to use ITER_1 or ITER_0 ??
00293         if(j > 1 && in.size(dim_map[1]) > 2 * in.size(dim_map[0])
00294                         && in.size(dim_map[0]) < 100) {
00295                 std::swap(iters[0], iters[1]);
00296                 std::swap(length[0], length[1]);
00297                 std::swap(first_to_last[0], first_to_last[1]);
00298                 std::swap(dim_map[0], dim_map[1]);
00299         }*/
00300
00301         // local variables for the inner dimensions to improve speed.
00302         I_ITER_1 it0 = iters[0];
00303         I_ITER_1 it0_end;
00304         I_ITER_2 it1 = iters[1];
00305         brd_t first_to_last1 = first_to_last[1];
00306         brd_t first_to_last0 = in.first_to_last(dim_map[0]);
00307         size_t length1 = length[1];
00308         size_t pos1 = 0;
00309
00310         // special loop for 1 or 2 dims (after filtering)
00311         if(j <= 2) {
00312                 it0_end = in._fast_last(dim_map[0]);
00313                 for(; it0 != it0_end; ++it0) {
00314                         F::elem_op(*it_dst, *it0);
00315                         ++it_dst;
00316                 }
00317                 F::elem_op(*it_dst, *it0);
00318                 ++it_dst;
00319                 //it0_end = in._end(dim_map[0]);
00320                 while(++pos1 < length1) {
00321                         it0 -= first_to_last0;
00322                         it0.inc_along_other_dim_at_begin(it1);
00323                         for(it0_end = it0 + first_to_last0; it0 != it0_end; ++it0) {
00324                                 F::elem_op(*it_dst, *it0);
00325                                 ++it_dst;
00326                         }
00327                         F::elem_op(*it_dst, *it0);
00328                         ++it_dst;
00329                         //it0_end.inc_along(it1);
00330                         // TODO: consider using this approach of it0_end
00331                         // as a possible speed improvement in the generic loop. need bench.
00332                 }
00333                 return;
00334         }
00335
00336         // loop for more than 2 dims.
00337         do {
00338                 for(it0_end = it0 + first_to_last0; it0 != it0_end; ++it0) {
00339                         F::elem_op(*it_dst, *it0);
00340                         ++it_dst;
00341                 }
00342                 F::elem_op(*it_dst, *it0);
00343                 ++it_dst;
00344                 it0 -= first_to_last0;
00345                 if(++pos1 < length1) {
00346                         it0.inc_along_other_dim_at_begin(it1);
00347                 }
00348                 else {
00349                         it0 -= first_to_last1;
00350                         pos1 = 0;
00351                         i = 2;
00352                         while(++pos[i] >= length[i]) {
00353                                 pos[i] = 0;
00354                                 it0 -= first_to_last[i];
00355                                 i++;
00356                                 if(i >= j) return; // loop is broken here.
00357                         }
00358                         it0.inc_along_other_dim_at_begin(iters[i]);
00359                 }
00360         } while(1);
00361 }
00362
00363
00364
00365
00367 template<
00368         class F, class A1, class A2,
00369         class O_ITER_1, class I_ITER_1, class O_ITER_2, class I_ITER_2
00370 >
00371 inline
00372 void loop_base_nd<3, F, A1, A2,
00373         ivl::data::nd_seq_optimal_tag, ivl::data::nd_seq_optimal_tag,
00374         types::t_false, types::t_false,
00375         O_ITER_1, I_ITER_1, O_ITER_2, I_ITER_2>::
00376         exec(A1& out, A2& in,
00377                 int inner_dim_1, int inner_dim_2)
00378 {
00379         // Remark: as a contradiction to the coding style, this function is
00380         // way more lengthy than expected. However this is because it was
00381         // decided by kimonas that this is the best and simplest design for that
00382         // particular function which induces complications to improve speed.
00383
00384         // type definitions
00385         typedef A1 out_t;
00386         typedef A2 in_t;
00387
00388         // Almost everything is doubled, one for src, one for dst
00389
00390         //dst
00391         typedef typename out_t::_fast_iterator_nd iter_t;
00392         typedef typename iter_t::iter_nd_border_walker brd_t;
00393         //src
00394         typedef typename types::best_fast_iterator_nd<in_t>::type siter_t;
00395         typedef typename siter_t::iter_nd_border_walker sbrd_t;
00396
00397         // TODO:
00398         // assert that the dimensions are the same.
00399
00400
00401         // quickly proceed for 1d arrays
00402         if(out.length() == 0) return;
00403         if(out.ndims() == 1) {
00404                 O_ITER_1 it = out._fast_begin(0);
00405                 I_ITER_1 it_src = in._fast_begin(0);
00406                 for(O_ITER_1 it_end = out._fast_last(0); it != it_end; ++it) {
00407                         F::elem_op(*it, *it_src);
00408                         ++it_src;
00409                 }
00410                 F::elem_op(*it, *it_src);
00411                 return;
00412         }
00413
00414         // arrays with info about the dimensions
00415         internal::little_arrayling<iter_t> iters(out.ndims());
00416         internal::little_arrayling<brd_t> first_to_last(out.ndims());
00417         internal::little_arrayling<size_t> pos(out.ndims());
00418         internal::little_arrayling<size_t> length(out.ndims());
00419         internal::little_arrayling<size_t> dim_map(out.ndims());
00420         //src
00421         internal::little_arrayling<siter_t> siters(out.ndims());
00422         internal::little_arrayling<sbrd_t> sfirst_to_last(out.ndims());
00423
00424         size_t i, j = 0; // j: the number of total dims after filtering
00425         length[1] = 1;
00426
00427         // remove all singleton dims. in case all need removal, keep the last dim.
00428         for(i = 0; i < out.ndims(); i++) {
00429                 if(out.size(i) > 1 || (j == 0 && i == out.ndims() - 1)) {
00430                         iters[j] = out._fast_begin(i);
00431                         length[j] = out.size(i);
00432                         first_to_last[j] = out.first_to_last(i);
00433                         pos[j] = 0;
00434                         dim_map[j] = i;
00435                         //src
00436                         siters[j] = in._fast_begin(i);
00437                         sfirst_to_last[j] = in.first_to_last(i);
00438
00439                         j++;
00440                 }
00441         }
00442
00443         // sorting dim0 <-> dim1 for speed improvement.
00444         /* IS THIS POSSIBLE??? and how do i recognize to use ITER_1 or ITER_0 ??
00445         if(j > 1 && out.size(dim_map[1]) > 2 * out.size(dim_map[0])
00446                         && out.size(dim_map[0]) < 100) {
00447                 std::swap(iters[0], iters[1]);
00448                 std::swap(siters[0], siters[1]);
00449                 std::swap(length[0], length[1]);
00450                 std::swap(first_to_last[0], first_to_last[1]);
00451                 std::swap(sfirst_to_last[0], sfirst_to_last[1]);
00452                 std::swap(dim_map[0], dim_map[1]);
00453         }*/
00454
00455         // local variables for the inner dimensions to improve speed.
00456         O_ITER_1 it0 = iters[0];
00457         O_ITER_1 it0_end;
00458         O_ITER_2 it1 = iters[1];
00459         brd_t first_to_last1 = first_to_last[1];
00460         brd_t first_to_last0 = out.first_to_last(dim_map[0]);
00461         size_t length1 = length[1];
00462         size_t pos1 = 0;
00463         //src
00464         I_ITER_1 it_src = siters[0];
00465         I_ITER_2 it_src1 = siters[1];
00466         sbrd_t sfirst_to_last1 = sfirst_to_last[1];
00467         sbrd_t sfirst_to_last0 = in.first_to_last(dim_map[0]);
00468
00469         // special loop for 1 or 2 dims (after filtering)
00470         if(j <= 2) {
00471                 it0_end = out._fast_last(dim_map[0]);
00472                 for(; it0 != it0_end; ++it0) {
00473                         F::elem_op(*it0, *it_src);
00474                         ++it_src;
00475                 }
00476                 F::elem_op(*it0, *it_src);
00477                 //it0_end = out._end(dim_map[0]);
00478                 while(++pos1 < length1) {
00479                         it0 -= first_to_last0;
00480                         it0.inc_along_other_dim_at_begin(it1);
00481
00482                         //src
00483                         it_src -= sfirst_to_last0;
00484                         it_src.inc_along_other_dim_at_begin(it_src1);
00485
00486                         for(it0_end = it0 + first_to_last0; it0 != it0_end; ++it0) {
00487                                 F::elem_op(*it0, *it_src);
00488                                 ++it_src;
00489                         }
00490                         F::elem_op(*it0, *it_src);
00491                         //it0_end.inc_along(it1);
00492                         // TODO: consider using this approach of it0_end
00493                         // as a possible speed improvement in the generic loop. need bench.
00494                         // New Note: after testing:
00495                         // this todo is bad idea.
00496                         // this it_end approach was finally commented out
00497                         // because it is moving out of the iterator valid locations... !!
00498
00499                 }
00500                 return;
00501         }
00502
00503         // loop for more than 2 dims.
00504         do {
00505                 for(it0_end = it0 + first_to_last0; it0 != it0_end; ++it0) {
00506                         F::elem_op(*it0, *it_src);
00507                         ++it_src;
00508                 }
00509                 F::elem_op(*it0, *it_src);
00510                 it0 -= first_to_last0;
00511                 it_src -= sfirst_to_last0;
00512
00513                 if(++pos1 < length1) {
00514                         it0.inc_along_other_dim_at_begin(it1);
00515                         it_src.inc_along_other_dim_at_begin(it_src1);
00516                 }
00517                 else {
00518                         it0 -= first_to_last1;
00519                         it_src -= sfirst_to_last1;
00520                         pos1 = 0;
00521                         i = 2;
00522                         while(++pos[i] >= length[i]) {
00523                                 pos[i] = 0;
00524                                 it0 -= first_to_last[i];
00525                                 it_src -= sfirst_to_last[i];
00526                                 i++;
00527                                 if(i >= j) return; // loop is broken here.
00528                         }
00529                         it0.inc_along_other_dim_at_begin(iters[i]);
00530                         it_src.inc_along_other_dim_at_begin(siters[i]);
00531                 }
00532         } while(1);
00533 }
00534
00535
00536
00537 }; /*namespace loops*/
00538
00539 }; /*namespace ivl*/
00540
00541 #endif // IVL_CORE_DETAILS_LOOPS_LOOPS_ND