d6/de1/a00233_source.html

00001 /* This file is part of the ivl C++ library <http://image.ntua.gr/ivl>.
00002    A C++ template library extending syntax towards mathematical notation.
00003
00004    Copyright (C) 2012 Yannis Avrithis <iavr@image.ntua.gr>
00005    Copyright (C) 2012 Kimon Kontosis <kimonas@image.ntua.gr>
00006
00007    ivl is free software; you can redistribute it and/or modify
00008    it under the terms of the GNU Lesser General Public License
00009    version 3 as published by the Free Software Foundation.
00010
00011    Alternatively, you can redistribute it and/or modify it under the terms
00012    of the GNU General Public License version 2 as published by the Free
00013    Software Foundation.
00014
00015    ivl is distributed in the hope that it will be useful,
00016    but WITHOUT ANY WARRANTY; without even the implied warranty of
00017    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
00018    See the GNU General Public License for more details.
00019
00020    You should have received a copy of the GNU General Public License
00021    and a copy of the GNU Lesser General Public License along
00022    with ivl. If not, see <http://www.gnu.org/licenses/>. */
00023
00024 #ifndef IVL_CORE_DETAILS_LOOPS_LOOPS_ND_WITH_END
00025 #define IVL_CORE_DETAILS_LOOPS_LOOPS_ND_WITH_END
00026
00027 namespace ivl {
00028
00029 namespace loops {
00030
00031 template <class F, class T, class S, class K, class J, class D, class P>
00032 void loop(array_nd<T, S, K>& out, const array_nd<J, D, P>& in)
00033 {
00034         typedef array_nd<T, S, K> out_t;
00035         typedef const array_nd<J, D, P> in_t;
00036
00037         int id[2] = {0, 0};
00038         int j = 0;
00039
00040         for(size_t i = 0; i < out.ndims() && j < 2; i++) {
00041                 if(out.size(i) > 1) id[j++] = int(i);
00042         }
00043
00044         loop_base_nd<1, F, out_t, in_t,
00045                 typename out_t::optimal_access_tag,
00046                 typename in_t::optimal_access_tag,
00047                 typename out_t::has_specialized_iter,
00048                 typename in_t::has_specialized_iter>::
00049                         exec(out, in, id[0], id[1]);
00050
00051 }
00052
00053 template <class F, class T, class S, class K, class J, class D, class P>
00054 void loop_ww(array_nd<T, S, K>& out, array_nd<J, D, P>& in)
00055 {
00056         typedef array_nd<T, S, K> out_t;
00057         typedef array_nd<J, D, P> in_t;
00058
00059         int id[2] = {0, 0};
00060         int j = 0;
00061
00062         for(size_t i = 0; i < out.ndims() && j < 2; i++) {
00063                 if(out.size(i) > 1) id[j++] = int(i);
00064         }
00065
00066         loop_base_nd<1, F, out_t, in_t,
00067                 typename out_t::optimal_access_tag,
00068                 typename in_t::optimal_access_tag,
00069                 typename out_t::has_specialized_iter,
00070                 typename in_t::has_specialized_iter>::
00071                         exec(out, in, id[0], id[1]);
00072
00073 }
00074
00076 template<
00077         class F, class A1, class A2,
00078         class I_OPTIMAL_ACCESS,
00079         class O_ITER_1, class I_ITER_1, class O_ITER_2
00080 >
00081 inline
00082 void loop_base_nd<3, F, A1, A2,
00083         ivl::data::nd_seq_optimal_tag, I_OPTIMAL_ACCESS,
00084         types::t_false, types::t_false,
00085         O_ITER_1, I_ITER_1, O_ITER_2, types::term>::
00086         exec(A1& out, A2& in,
00087                 int inner_dim_1, int inner_dim_2)
00088 {
00089         // Remark: as a contradiction to the coding style, this function is
00090         // way more lengthy than expected. However this is because it was
00091         // decided by kimonas that this is the best and simplest design for that
00092         // particular function which induces complications to improve speed.
00093
00094         // type definitions
00095         typedef A1 out_t;
00096         typedef A2 in_t;
00097
00098         typedef typename out_t::iterator_nd iter_t;
00099         typedef typename iter_t::iter_nd_border_walker brd_t;
00100
00101         // input iterator
00102         I_ITER_1 it_src = in.begin();
00103
00104         // TODO:
00105         // assert that the dimensions are the same
00106
00107         // quickly proceed for 1d arrays
00108         if(out.ndims() == 1) {
00109                 O_ITER_1 it = out._begin(0);
00110                 //todo:!!bug. out != in iterator???
00111                 for(O_ITER_1 it_end = out._end(0); it != it_end; ++it) {
00112                         F::elem_op(*it, *it_src);
00113                         ++it_src;
00114                 }
00115                 return;
00116         }
00117
00118         // arrays with info about the dimensions
00119         internal::little_arrayling<iter_t> iters(out.ndims());
00120         internal::little_arrayling<brd_t> first_to_last(out.ndims());
00121         internal::little_arrayling<size_t> pos(out.ndims());
00122         internal::little_arrayling<size_t> length(out.ndims());
00123         internal::little_arrayling<size_t> dim_map(out.ndims());
00124
00125         size_t i, j = 0; // j: the number of total dims after filtering
00126         length[1] = 1;
00127
00128         // remove all singleton dims. in case all need removal, keep the last dim.
00129         for(i = 0; i < out.ndims(); i++) {
00130                 if(out.size(i) > 1 || (j == 0 && i == out.ndims() - 1)) {
00131                         iters[j] = out._begin(i);
00132                         length[j] = out.size(i);
00133                         first_to_last[j] = out.first_to_last(j);
00134                         pos[j] = 0;
00135                         dim_map[j] = i;
00136                         j++;
00137                 }
00138         }
00139
00140         // sorting dim0 <-> dim1 for speed improvement.
00141         /* IS THIS POSSIBLE??? and how do i recognize to use ITER_1 or ITER_0 ??
00142         if(j > 1 && out.size(dim_map[1]) > 2 * out.size(dim_map[0])
00143                         && out.size(dim_map[0]) < 100) {
00144                 std::swap(iters[0], iters[1]);
00145                 std::swap(length[0], length[1]);
00146                 std::swap(first_to_last[0], first_to_last[1]);
00147                 std::swap(dim_map[0], dim_map[1]);
00148         }*/
00149
00150         // local variables for the inner dimensions to improve speed.
00151         O_ITER_1 it0 = iters[0];
00152         O_ITER_1 it0_end;
00153         O_ITER_2 it1 = iters[1];
00154         brd_t first_to_last1 = first_to_last[1];
00155         brd_t begin_to_end0 = out.begin_to_end(dim_map[0]);
00156         size_t length1 = length[1];
00157         size_t pos1 = 0;
00158
00159         // special loop for 1 or 2 dims (after filtering)
00160         if(j <= 2) {
00161                 it0_end = out._end(dim_map[0]);
00162                 for(; it0 != it0_end; ++it0) {
00163                         F::elem_op(*it0, *it_src);
00164                         ++it_src;
00165                 }
00166                 //it0_end = out._end(dim_map[0]);
00167                 while(++pos1 < length1) {
00168                         it0 -= begin_to_end0;
00169                         it0.inc_along_other_dim_at_begin(it1);
00170
00171                         for(it0_end = it0 + begin_to_end0; it0 != it0_end; ++it0) {
00172                                 F::elem_op(*it0, *it_src);
00173                                 ++it_src;
00174                         }
00175                         //it0_end.inc_along(it1);
00176                         // TODO: consider using this approach of it0_end
00177                         // as a possible speed improvement in the generic loop. need bench.
00178                 }
00179                 return;
00180         }
00181
00182         // loop for more than 2 dims.
00183         do {
00184                 //it0 + begin_to_end0; wipe these two strange lines
00185                 //it0 != it0_end;
00186
00187                 for(it0_end = it0 + begin_to_end0; it0 != it0_end; ++it0) {
00188                         *it0;
00189                         *it_src;
00190                         F::elem_op(*it0, *it_src);
00191                         ++it_src;
00192                 }
00193
00194                 it0 -= begin_to_end0;
00195                 if(++pos1 < length1) {
00196                         it0.inc_along_other_dim_at_begin(it1);
00197                 }
00198                 else {
00199                         it0 -= first_to_last1;
00200                         pos1 = 0;
00201                         i = 2;
00202                         while(++pos[i] >= length[i]) {
00203                                 pos[i] = 0;
00204                                 it0 -= first_to_last[i];
00205                                 i++;
00206                                 if(i >= j) return; // loop is broken here.
00207                         }
00208                         it0.inc_along_other_dim_at_begin(iters[i]);
00209                 }
00210         } while(1);
00211 }
00212
00213
00215 template<
00216         class F, class A1, class A2,
00217         class O_OPTIMAL_ACCESS,
00218         class O_ITER_1, class I_ITER_1, class I_ITER_2
00219 >
00220 inline
00221 void loop_base_nd<3, F, A1, A2,
00222         O_OPTIMAL_ACCESS, ivl::data::nd_seq_optimal_tag,
00223         types::t_false, types::t_false,
00224         O_ITER_1, I_ITER_1, types::term, I_ITER_2>::
00225         exec(A1& out, A2& in,
00226                 int inner_dim_1, int inner_dim_2)
00227 {
00228         // Remark: as a contradiction to the coding style, this function is
00229         // way more lengthy than expected. However this is because it was
00230         // decided by kimonas that this is the best and simplest design for that
00231         // particular function which induces complications to improve speed.
00232
00233         // type definitions
00234         typedef A1 out_t;
00235         typedef A2 in_t;
00236
00237         typedef typename in_t::const_iterator_nd iter_t;
00238         typedef typename iter_t::iter_nd_border_walker brd_t;
00239
00240         // input iterator
00241         O_ITER_1 it_dst = out.begin();
00242
00243         // TODO:
00244         // assert that the dimensions are the same
00245
00246         // quickly proceed for 1d arrays
00247         // TODO: ????? why is here it_dst and in some other places it_src??
00248         if(in.ndims() == 1) {
00249                 I_ITER_1 it = in._begin(0);
00250                 for(I_ITER_1 it_end = in._end(0); it != it_end; ++it) {
00251                         F::elem_op(*it_dst, *it);
00252                         ++it_dst;
00253                 }
00254                 return;
00255         }
00256
00257         // arrays with info about the dimensions
00258         internal::little_arrayling<iter_t> iters(in.ndims());
00259         internal::little_arrayling<brd_t> first_to_last(in.ndims());
00260         internal::little_arrayling<size_t> pos(in.ndims());
00261         internal::little_arrayling<size_t> length(in.ndims());
00262         internal::little_arrayling<size_t> dim_map(in.ndims());
00263
00264         size_t i, j = 0; // j: the number of total dims after filtering
00265         length[1] = 1;
00266
00267         // remove all singleton dims. in case all need removal, keep the last dim.
00268         for(i = 0; i < in.ndims(); i++) {
00269                 if(in.size(i) > 1 || (j == 0 && i == in.ndims() - 1)) {
00270                         iters[j] = in._begin(i);
00271                         length[j] = in.size(i);
00272                         first_to_last[j] = in.first_to_last(j);
00273                         pos[j] = 0;
00274                         dim_map[j] = i;
00275                         j++;
00276                 }
00277         }
00278
00279         // sorting dim0 <-> dim1 for speed improvement.
00280         // TODO: on all others!!!!!!
00281         /* IS THIS POSSIBLE??? and how do i recognize to use ITER_1 or ITER_0 ??
00282         if(j > 1 && in.size(dim_map[1]) > 2 * in.size(dim_map[0])
00283                         && in.size(dim_map[0]) < 100) {
00284                 std::swap(iters[0], iters[1]);
00285                 std::swap(length[0], length[1]);
00286                 std::swap(first_to_last[0], first_to_last[1]);
00287                 std::swap(dim_map[0], dim_map[1]);
00288         }*/
00289
00290         // local variables for the inner dimensions to improve speed.
00291         I_ITER_1 it0 = iters[0];
00292         I_ITER_1 it0_end;
00293         I_ITER_2 it1 = iters[1];
00294         brd_t first_to_last1 = first_to_last[1];
00295         brd_t begin_to_end0 = in.begin_to_end(dim_map[0]);
00296         size_t length1 = length[1];
00297         size_t pos1 = 0;
00298
00299         // special loop for 1 or 2 dims (after filtering)
00300         if(j <= 2) {
00301                 it0_end = in._end(dim_map[0]);
00302                 for(; it0 != it0_end; ++it0) {
00303                         F::elem_op(*it_dst, *it0);
00304                         ++it_dst;
00305                 }
00306                 //it0_end = in._end(dim_map[0]);
00307                 while(++pos1 < length1) {
00308                         it0 -= begin_to_end0;
00309                         it0.inc_along_other_dim_at_begin(it1);
00310                         for(it0_end = it0 + begin_to_end0; it0 != it0_end; ++it0) {
00311                                 F::elem_op(*it_dst, *it0);
00312                                 ++it_dst;
00313                         }
00314                         //it0_end.inc_along(it1);
00315                         // TODO: consider using this approach of it0_end
00316                         // as a possible speed improvement in the generic loop. need bench.
00317                 }
00318                 return;
00319         }
00320
00321         // loop for more than 2 dims.
00322         do {
00323                 for(it0_end = it0 + begin_to_end0; it0 != it0_end; ++it0) {
00324                         F::elem_op(*it_dst, *it0);
00325                         ++it_dst;
00326                 }
00327                 it0 -= begin_to_end0;
00328                 if(++pos1 < length1) {
00329                         it0.inc_along_other_dim_at_begin(it1);
00330                 }
00331                 else {
00332                         it0 -= first_to_last1;
00333                         pos1 = 0;
00334                         i = 2;
00335                         while(++pos[i] >= length[i]) {
00336                                 pos[i] = 0;
00337                                 it0 -= first_to_last[i];
00338                                 i++;
00339                                 if(i >= j) return; // loop is broken here.
00340                         }
00341                         it0.inc_along_other_dim_at_begin(iters[i]);
00342                 }
00343         } while(1);
00344 }
00345
00346
00347
00348
00350 template<
00351         class F, class A1, class A2,
00352         class O_ITER_1, class I_ITER_1, class O_ITER_2, class I_ITER_2
00353 >
00354 inline
00355 void loop_base_nd<3, F, A1, A2,
00356         ivl::data::nd_seq_optimal_tag, ivl::data::nd_seq_optimal_tag,
00357         types::t_false, types::t_false,
00358         O_ITER_1, I_ITER_1, O_ITER_2, I_ITER_2>::
00359         exec(A1& out, A2& in,
00360                 int inner_dim_1, int inner_dim_2)
00361 {
00362         // Remark: as a contradiction to the coding style, this function is
00363         // way more lengthy than expected. However this is because it was
00364         // decided by kimonas that this is the best and simplest design for that
00365         // particular function which induces complications to improve speed.
00366
00367         // type definitions
00368         typedef A1 out_t;
00369         typedef A2 in_t;
00370
00371         // Almost everything is doubled, one for src, one for dst
00372
00373         //dst
00374         typedef typename out_t::iterator_nd iter_t;
00375         typedef typename iter_t::iter_nd_border_walker brd_t;
00376         //src
00377         typedef typename in_t::const_iterator_nd siter_t;
00378         typedef typename siter_t::iter_nd_border_walker sbrd_t;
00379
00380         // TODO:
00381         // assert that the dimensions are the same.
00382
00383
00384         // quickly proceed for 1d arrays
00385         if(out.ndims() == 1) {
00386                 O_ITER_1 it = out._begin(0);
00387                 I_ITER_1 it_src = in._begin(0);
00388                 for(O_ITER_1 it_end = out._end(0); it != it_end; ++it) {
00389                         F::elem_op(*it, *it_src);
00390                         ++it_src;
00391                 }
00392                 return;
00393         }
00394
00395         // arrays with info about the dimensions
00396         internal::little_arrayling<iter_t> iters(out.ndims());
00397         internal::little_arrayling<brd_t> first_to_last(out.ndims());
00398         internal::little_arrayling<size_t> pos(out.ndims());
00399         internal::little_arrayling<size_t> length(out.ndims());
00400         internal::little_arrayling<size_t> dim_map(out.ndims());
00401         //src
00402         internal::little_arrayling<siter_t> siters(out.ndims());
00403         internal::little_arrayling<sbrd_t> sfirst_to_last(out.ndims());
00404
00405         size_t i, j = 0; // j: the number of total dims after filtering
00406         length[1] = 1;
00407
00408         // remove all singleton dims. in case all need removal, keep the last dim.
00409         for(i = 0; i < out.ndims(); i++) {
00410                 if(out.size(i) > 1 || (j == 0 && i == out.ndims() - 1)) {
00411                         iters[j] = out._begin(i);
00412                         length[j] = out.size(i);
00413                         first_to_last[j] = out.first_to_last(i);
00414                         pos[j] = 0;
00415                         dim_map[j] = i;
00416                         //src
00417                         siters[j] = in._begin(i);
00418                         sfirst_to_last[j] = in.first_to_last(i);
00419
00420                         j++;
00421                 }
00422         }
00423
00424         // sorting dim0 <-> dim1 for speed improvement.
00425         /* IS THIS POSSIBLE??? and how do i recognize to use ITER_1 or ITER_0 ??
00426         if(j > 1 && out.size(dim_map[1]) > 2 * out.size(dim_map[0])
00427                         && out.size(dim_map[0]) < 100) {
00428                 std::swap(iters[0], iters[1]);
00429                 std::swap(siters[0], siters[1]);
00430                 std::swap(length[0], length[1]);
00431                 std::swap(first_to_last[0], first_to_last[1]);
00432                 std::swap(sfirst_to_last[0], sfirst_to_last[1]);
00433                 std::swap(dim_map[0], dim_map[1]);
00434         }*/
00435
00436         // local variables for the inner dimensions to improve speed.
00437         O_ITER_1 it0 = iters[0];
00438         O_ITER_1 it0_end;
00439         O_ITER_2 it1 = iters[1];
00440         brd_t first_to_last1 = first_to_last[1];
00441         brd_t begin_to_end0 = out.begin_to_end(dim_map[0]);
00442         size_t length1 = length[1];
00443         size_t pos1 = 0;
00444         //src
00445         I_ITER_1 it_src = siters[0];
00446         I_ITER_2 it_src1 = siters[1];
00447         sbrd_t sfirst_to_last1 = sfirst_to_last[1];
00448         sbrd_t sbegin_to_end0 = in.begin_to_end(dim_map[0]);
00449
00450         // special loop for 1 or 2 dims (after filtering)
00451         if(j <= 2) {
00452                 it0_end = out._end(dim_map[0]);
00453                 for(; it0 != it0_end; ++it0) {
00454                         F::elem_op(*it0, *it_src);
00455                         ++it_src;
00456                 }
00457                 //it0_end = out._end(dim_map[0]);
00458                 while(++pos1 < length1) {
00459                         it0 -= begin_to_end0;
00460                         it0.inc_along_other_dim_at_begin(it1);
00461
00462                         //src
00463                         it_src -= sbegin_to_end0;
00464                         it_src.inc_along_other_dim_at_begin(it_src1);
00465
00466                         for(it0_end = it0 + begin_to_end0; it0 != it0_end; ++it0) {
00467                                 F::elem_op(*it0, *it_src);
00468                                 ++it_src;
00469                         }
00470                         //it0_end.inc_along(it1);
00471                         // TODO: consider using this approach of it0_end
00472                         // as a possible speed improvement in the generic loop. need bench.
00473                         // New Note: after testing:
00474                         // this todo is bad idea.
00475                         // this it_end approach was finally commented out
00476                         // because it is moving out of the iterator valid locations... !!
00477
00478                 }
00479                 return;
00480         }
00481
00482         // loop for more than 2 dims.
00483         do {
00484                 for(it0_end = it0 + begin_to_end0; it0 != it0_end; ++it0) {
00485                         F::elem_op(*it0, *it_src);
00486                         ++it_src;
00487                 }
00488                 it0 -= begin_to_end0;
00489                 it_src -= sbegin_to_end0;
00490
00491                 if(++pos1 < length1) {
00492                         it0.inc_along_other_dim_at_begin(it1);
00493                         it_src.inc_along_other_dim_at_begin(it_src1);
00494                 }
00495                 else {
00496                         it0 -= first_to_last1;
00497                         it_src -= sfirst_to_last1;
00498                         pos1 = 0;
00499                         i = 2;
00500                         while(++pos[i] >= length[i]) {
00501                                 pos[i] = 0;
00502                                 it0 -= first_to_last[i];
00503                                 it_src -= sfirst_to_last[i];
00504                                 i++;
00505                                 if(i >= j) return; // loop is broken here.
00506                         }
00507                         it0.inc_along_other_dim_at_begin(iters[i]);
00508                         it_src.inc_along_other_dim_at_begin(siters[i]);
00509                 }
00510         } while(1);
00511 }
00512
00513
00514
00515 }; /*namespace loops*/
00516
00517 }; /*namespace ivl*/
00518
00519 #endif // IVL_CORE_DETAILS_LOOPS_LOOPS_ND_WITH_END