ivl 679
|
00001 /* This file is part of the ivl C++ library <http://image.ntua.gr/ivl>. 00002 A C++ template library extending syntax towards mathematical notation. 00003 00004 Copyright (C) 2012 Yannis Avrithis <iavr@image.ntua.gr> 00005 Copyright (C) 2012 Kimon Kontosis <kimonas@image.ntua.gr> 00006 00007 ivl is free software; you can redistribute it and/or modify 00008 it under the terms of the GNU Lesser General Public License 00009 version 3 as published by the Free Software Foundation. 00010 00011 Alternatively, you can redistribute it and/or modify it under the terms 00012 of the GNU General Public License version 2 as published by the Free 00013 Software Foundation. 00014 00015 ivl is distributed in the hope that it will be useful, 00016 but WITHOUT ANY WARRANTY; without even the implied warranty of 00017 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 00018 See the GNU General Public License for more details. 00019 00020 You should have received a copy of the GNU General Public License 00021 and a copy of the GNU Lesser General Public License along 00022 with ivl. If not, see <http://www.gnu.org/licenses/>. */ 00023 00024 #ifndef IVL_CORE_DETAILS_LOOPS_LOOPS_ND_WITH_END 00025 #define IVL_CORE_DETAILS_LOOPS_LOOPS_ND_WITH_END 00026 00027 namespace ivl { 00028 00029 namespace loops { 00030 00031 template <class F, class T, class S, class K, class J, class D, class P> 00032 void loop(array_nd<T, S, K>& out, const array_nd<J, D, P>& in) 00033 { 00034 typedef array_nd<T, S, K> out_t; 00035 typedef const array_nd<J, D, P> in_t; 00036 00037 int id[2] = {0, 0}; 00038 int j = 0; 00039 00040 for(size_t i = 0; i < out.ndims() && j < 2; i++) { 00041 if(out.size(i) > 1) id[j++] = int(i); 00042 } 00043 00044 loop_base_nd<1, F, out_t, in_t, 00045 typename out_t::optimal_access_tag, 00046 typename in_t::optimal_access_tag, 00047 typename out_t::has_specialized_iter, 00048 typename in_t::has_specialized_iter>:: 00049 exec(out, in, id[0], id[1]); 00050 00051 } 00052 00053 template <class F, class T, class S, class K, class J, class D, class P> 00054 void loop_ww(array_nd<T, S, K>& out, array_nd<J, D, P>& in) 00055 { 00056 typedef array_nd<T, S, K> out_t; 00057 typedef array_nd<J, D, P> in_t; 00058 00059 int id[2] = {0, 0}; 00060 int j = 0; 00061 00062 for(size_t i = 0; i < out.ndims() && j < 2; i++) { 00063 if(out.size(i) > 1) id[j++] = int(i); 00064 } 00065 00066 loop_base_nd<1, F, out_t, in_t, 00067 typename out_t::optimal_access_tag, 00068 typename in_t::optimal_access_tag, 00069 typename out_t::has_specialized_iter, 00070 typename in_t::has_specialized_iter>:: 00071 exec(out, in, id[0], id[1]); 00072 00073 } 00074 00076 template< 00077 class F, class A1, class A2, 00078 class I_OPTIMAL_ACCESS, 00079 class O_ITER_1, class I_ITER_1, class O_ITER_2 00080 > 00081 inline 00082 void loop_base_nd<3, F, A1, A2, 00083 ivl::data::nd_seq_optimal_tag, I_OPTIMAL_ACCESS, 00084 types::t_false, types::t_false, 00085 O_ITER_1, I_ITER_1, O_ITER_2, types::term>:: 00086 exec(A1& out, A2& in, 00087 int inner_dim_1, int inner_dim_2) 00088 { 00089 // Remark: as a contradiction to the coding style, this function is 00090 // way more lengthy than expected. However this is because it was 00091 // decided by kimonas that this is the best and simplest design for that 00092 // particular function which induces complications to improve speed. 00093 00094 // type definitions 00095 typedef A1 out_t; 00096 typedef A2 in_t; 00097 00098 typedef typename out_t::iterator_nd iter_t; 00099 typedef typename iter_t::iter_nd_border_walker brd_t; 00100 00101 // input iterator 00102 I_ITER_1 it_src = in.begin(); 00103 00104 // TODO: 00105 // assert that the dimensions are the same 00106 00107 // quickly proceed for 1d arrays 00108 if(out.ndims() == 1) { 00109 O_ITER_1 it = out._begin(0); 00110 //todo:!!bug. out != in iterator??? 00111 for(O_ITER_1 it_end = out._end(0); it != it_end; ++it) { 00112 F::elem_op(*it, *it_src); 00113 ++it_src; 00114 } 00115 return; 00116 } 00117 00118 // arrays with info about the dimensions 00119 internal::little_arrayling<iter_t> iters(out.ndims()); 00120 internal::little_arrayling<brd_t> first_to_last(out.ndims()); 00121 internal::little_arrayling<size_t> pos(out.ndims()); 00122 internal::little_arrayling<size_t> length(out.ndims()); 00123 internal::little_arrayling<size_t> dim_map(out.ndims()); 00124 00125 size_t i, j = 0; // j: the number of total dims after filtering 00126 length[1] = 1; 00127 00128 // remove all singleton dims. in case all need removal, keep the last dim. 00129 for(i = 0; i < out.ndims(); i++) { 00130 if(out.size(i) > 1 || (j == 0 && i == out.ndims() - 1)) { 00131 iters[j] = out._begin(i); 00132 length[j] = out.size(i); 00133 first_to_last[j] = out.first_to_last(j); 00134 pos[j] = 0; 00135 dim_map[j] = i; 00136 j++; 00137 } 00138 } 00139 00140 // sorting dim0 <-> dim1 for speed improvement. 00141 /* IS THIS POSSIBLE??? and how do i recognize to use ITER_1 or ITER_0 ?? 00142 if(j > 1 && out.size(dim_map[1]) > 2 * out.size(dim_map[0]) 00143 && out.size(dim_map[0]) < 100) { 00144 std::swap(iters[0], iters[1]); 00145 std::swap(length[0], length[1]); 00146 std::swap(first_to_last[0], first_to_last[1]); 00147 std::swap(dim_map[0], dim_map[1]); 00148 }*/ 00149 00150 // local variables for the inner dimensions to improve speed. 00151 O_ITER_1 it0 = iters[0]; 00152 O_ITER_1 it0_end; 00153 O_ITER_2 it1 = iters[1]; 00154 brd_t first_to_last1 = first_to_last[1]; 00155 brd_t begin_to_end0 = out.begin_to_end(dim_map[0]); 00156 size_t length1 = length[1]; 00157 size_t pos1 = 0; 00158 00159 // special loop for 1 or 2 dims (after filtering) 00160 if(j <= 2) { 00161 it0_end = out._end(dim_map[0]); 00162 for(; it0 != it0_end; ++it0) { 00163 F::elem_op(*it0, *it_src); 00164 ++it_src; 00165 } 00166 //it0_end = out._end(dim_map[0]); 00167 while(++pos1 < length1) { 00168 it0 -= begin_to_end0; 00169 it0.inc_along_other_dim_at_begin(it1); 00170 00171 for(it0_end = it0 + begin_to_end0; it0 != it0_end; ++it0) { 00172 F::elem_op(*it0, *it_src); 00173 ++it_src; 00174 } 00175 //it0_end.inc_along(it1); 00176 // TODO: consider using this approach of it0_end 00177 // as a possible speed improvement in the generic loop. need bench. 00178 } 00179 return; 00180 } 00181 00182 // loop for more than 2 dims. 00183 do { 00184 //it0 + begin_to_end0; wipe these two strange lines 00185 //it0 != it0_end; 00186 00187 for(it0_end = it0 + begin_to_end0; it0 != it0_end; ++it0) { 00188 *it0; 00189 *it_src; 00190 F::elem_op(*it0, *it_src); 00191 ++it_src; 00192 } 00193 00194 it0 -= begin_to_end0; 00195 if(++pos1 < length1) { 00196 it0.inc_along_other_dim_at_begin(it1); 00197 } 00198 else { 00199 it0 -= first_to_last1; 00200 pos1 = 0; 00201 i = 2; 00202 while(++pos[i] >= length[i]) { 00203 pos[i] = 0; 00204 it0 -= first_to_last[i]; 00205 i++; 00206 if(i >= j) return; // loop is broken here. 00207 } 00208 it0.inc_along_other_dim_at_begin(iters[i]); 00209 } 00210 } while(1); 00211 } 00212 00213 00215 template< 00216 class F, class A1, class A2, 00217 class O_OPTIMAL_ACCESS, 00218 class O_ITER_1, class I_ITER_1, class I_ITER_2 00219 > 00220 inline 00221 void loop_base_nd<3, F, A1, A2, 00222 O_OPTIMAL_ACCESS, ivl::data::nd_seq_optimal_tag, 00223 types::t_false, types::t_false, 00224 O_ITER_1, I_ITER_1, types::term, I_ITER_2>:: 00225 exec(A1& out, A2& in, 00226 int inner_dim_1, int inner_dim_2) 00227 { 00228 // Remark: as a contradiction to the coding style, this function is 00229 // way more lengthy than expected. However this is because it was 00230 // decided by kimonas that this is the best and simplest design for that 00231 // particular function which induces complications to improve speed. 00232 00233 // type definitions 00234 typedef A1 out_t; 00235 typedef A2 in_t; 00236 00237 typedef typename in_t::const_iterator_nd iter_t; 00238 typedef typename iter_t::iter_nd_border_walker brd_t; 00239 00240 // input iterator 00241 O_ITER_1 it_dst = out.begin(); 00242 00243 // TODO: 00244 // assert that the dimensions are the same 00245 00246 // quickly proceed for 1d arrays 00247 // TODO: ????? why is here it_dst and in some other places it_src?? 00248 if(in.ndims() == 1) { 00249 I_ITER_1 it = in._begin(0); 00250 for(I_ITER_1 it_end = in._end(0); it != it_end; ++it) { 00251 F::elem_op(*it_dst, *it); 00252 ++it_dst; 00253 } 00254 return; 00255 } 00256 00257 // arrays with info about the dimensions 00258 internal::little_arrayling<iter_t> iters(in.ndims()); 00259 internal::little_arrayling<brd_t> first_to_last(in.ndims()); 00260 internal::little_arrayling<size_t> pos(in.ndims()); 00261 internal::little_arrayling<size_t> length(in.ndims()); 00262 internal::little_arrayling<size_t> dim_map(in.ndims()); 00263 00264 size_t i, j = 0; // j: the number of total dims after filtering 00265 length[1] = 1; 00266 00267 // remove all singleton dims. in case all need removal, keep the last dim. 00268 for(i = 0; i < in.ndims(); i++) { 00269 if(in.size(i) > 1 || (j == 0 && i == in.ndims() - 1)) { 00270 iters[j] = in._begin(i); 00271 length[j] = in.size(i); 00272 first_to_last[j] = in.first_to_last(j); 00273 pos[j] = 0; 00274 dim_map[j] = i; 00275 j++; 00276 } 00277 } 00278 00279 // sorting dim0 <-> dim1 for speed improvement. 00280 // TODO: on all others!!!!!! 00281 /* IS THIS POSSIBLE??? and how do i recognize to use ITER_1 or ITER_0 ?? 00282 if(j > 1 && in.size(dim_map[1]) > 2 * in.size(dim_map[0]) 00283 && in.size(dim_map[0]) < 100) { 00284 std::swap(iters[0], iters[1]); 00285 std::swap(length[0], length[1]); 00286 std::swap(first_to_last[0], first_to_last[1]); 00287 std::swap(dim_map[0], dim_map[1]); 00288 }*/ 00289 00290 // local variables for the inner dimensions to improve speed. 00291 I_ITER_1 it0 = iters[0]; 00292 I_ITER_1 it0_end; 00293 I_ITER_2 it1 = iters[1]; 00294 brd_t first_to_last1 = first_to_last[1]; 00295 brd_t begin_to_end0 = in.begin_to_end(dim_map[0]); 00296 size_t length1 = length[1]; 00297 size_t pos1 = 0; 00298 00299 // special loop for 1 or 2 dims (after filtering) 00300 if(j <= 2) { 00301 it0_end = in._end(dim_map[0]); 00302 for(; it0 != it0_end; ++it0) { 00303 F::elem_op(*it_dst, *it0); 00304 ++it_dst; 00305 } 00306 //it0_end = in._end(dim_map[0]); 00307 while(++pos1 < length1) { 00308 it0 -= begin_to_end0; 00309 it0.inc_along_other_dim_at_begin(it1); 00310 for(it0_end = it0 + begin_to_end0; it0 != it0_end; ++it0) { 00311 F::elem_op(*it_dst, *it0); 00312 ++it_dst; 00313 } 00314 //it0_end.inc_along(it1); 00315 // TODO: consider using this approach of it0_end 00316 // as a possible speed improvement in the generic loop. need bench. 00317 } 00318 return; 00319 } 00320 00321 // loop for more than 2 dims. 00322 do { 00323 for(it0_end = it0 + begin_to_end0; it0 != it0_end; ++it0) { 00324 F::elem_op(*it_dst, *it0); 00325 ++it_dst; 00326 } 00327 it0 -= begin_to_end0; 00328 if(++pos1 < length1) { 00329 it0.inc_along_other_dim_at_begin(it1); 00330 } 00331 else { 00332 it0 -= first_to_last1; 00333 pos1 = 0; 00334 i = 2; 00335 while(++pos[i] >= length[i]) { 00336 pos[i] = 0; 00337 it0 -= first_to_last[i]; 00338 i++; 00339 if(i >= j) return; // loop is broken here. 00340 } 00341 it0.inc_along_other_dim_at_begin(iters[i]); 00342 } 00343 } while(1); 00344 } 00345 00346 00347 00348 00350 template< 00351 class F, class A1, class A2, 00352 class O_ITER_1, class I_ITER_1, class O_ITER_2, class I_ITER_2 00353 > 00354 inline 00355 void loop_base_nd<3, F, A1, A2, 00356 ivl::data::nd_seq_optimal_tag, ivl::data::nd_seq_optimal_tag, 00357 types::t_false, types::t_false, 00358 O_ITER_1, I_ITER_1, O_ITER_2, I_ITER_2>:: 00359 exec(A1& out, A2& in, 00360 int inner_dim_1, int inner_dim_2) 00361 { 00362 // Remark: as a contradiction to the coding style, this function is 00363 // way more lengthy than expected. However this is because it was 00364 // decided by kimonas that this is the best and simplest design for that 00365 // particular function which induces complications to improve speed. 00366 00367 // type definitions 00368 typedef A1 out_t; 00369 typedef A2 in_t; 00370 00371 // Almost everything is doubled, one for src, one for dst 00372 00373 //dst 00374 typedef typename out_t::iterator_nd iter_t; 00375 typedef typename iter_t::iter_nd_border_walker brd_t; 00376 //src 00377 typedef typename in_t::const_iterator_nd siter_t; 00378 typedef typename siter_t::iter_nd_border_walker sbrd_t; 00379 00380 // TODO: 00381 // assert that the dimensions are the same. 00382 00383 00384 // quickly proceed for 1d arrays 00385 if(out.ndims() == 1) { 00386 O_ITER_1 it = out._begin(0); 00387 I_ITER_1 it_src = in._begin(0); 00388 for(O_ITER_1 it_end = out._end(0); it != it_end; ++it) { 00389 F::elem_op(*it, *it_src); 00390 ++it_src; 00391 } 00392 return; 00393 } 00394 00395 // arrays with info about the dimensions 00396 internal::little_arrayling<iter_t> iters(out.ndims()); 00397 internal::little_arrayling<brd_t> first_to_last(out.ndims()); 00398 internal::little_arrayling<size_t> pos(out.ndims()); 00399 internal::little_arrayling<size_t> length(out.ndims()); 00400 internal::little_arrayling<size_t> dim_map(out.ndims()); 00401 //src 00402 internal::little_arrayling<siter_t> siters(out.ndims()); 00403 internal::little_arrayling<sbrd_t> sfirst_to_last(out.ndims()); 00404 00405 size_t i, j = 0; // j: the number of total dims after filtering 00406 length[1] = 1; 00407 00408 // remove all singleton dims. in case all need removal, keep the last dim. 00409 for(i = 0; i < out.ndims(); i++) { 00410 if(out.size(i) > 1 || (j == 0 && i == out.ndims() - 1)) { 00411 iters[j] = out._begin(i); 00412 length[j] = out.size(i); 00413 first_to_last[j] = out.first_to_last(i); 00414 pos[j] = 0; 00415 dim_map[j] = i; 00416 //src 00417 siters[j] = in._begin(i); 00418 sfirst_to_last[j] = in.first_to_last(i); 00419 00420 j++; 00421 } 00422 } 00423 00424 // sorting dim0 <-> dim1 for speed improvement. 00425 /* IS THIS POSSIBLE??? and how do i recognize to use ITER_1 or ITER_0 ?? 00426 if(j > 1 && out.size(dim_map[1]) > 2 * out.size(dim_map[0]) 00427 && out.size(dim_map[0]) < 100) { 00428 std::swap(iters[0], iters[1]); 00429 std::swap(siters[0], siters[1]); 00430 std::swap(length[0], length[1]); 00431 std::swap(first_to_last[0], first_to_last[1]); 00432 std::swap(sfirst_to_last[0], sfirst_to_last[1]); 00433 std::swap(dim_map[0], dim_map[1]); 00434 }*/ 00435 00436 // local variables for the inner dimensions to improve speed. 00437 O_ITER_1 it0 = iters[0]; 00438 O_ITER_1 it0_end; 00439 O_ITER_2 it1 = iters[1]; 00440 brd_t first_to_last1 = first_to_last[1]; 00441 brd_t begin_to_end0 = out.begin_to_end(dim_map[0]); 00442 size_t length1 = length[1]; 00443 size_t pos1 = 0; 00444 //src 00445 I_ITER_1 it_src = siters[0]; 00446 I_ITER_2 it_src1 = siters[1]; 00447 sbrd_t sfirst_to_last1 = sfirst_to_last[1]; 00448 sbrd_t sbegin_to_end0 = in.begin_to_end(dim_map[0]); 00449 00450 // special loop for 1 or 2 dims (after filtering) 00451 if(j <= 2) { 00452 it0_end = out._end(dim_map[0]); 00453 for(; it0 != it0_end; ++it0) { 00454 F::elem_op(*it0, *it_src); 00455 ++it_src; 00456 } 00457 //it0_end = out._end(dim_map[0]); 00458 while(++pos1 < length1) { 00459 it0 -= begin_to_end0; 00460 it0.inc_along_other_dim_at_begin(it1); 00461 00462 //src 00463 it_src -= sbegin_to_end0; 00464 it_src.inc_along_other_dim_at_begin(it_src1); 00465 00466 for(it0_end = it0 + begin_to_end0; it0 != it0_end; ++it0) { 00467 F::elem_op(*it0, *it_src); 00468 ++it_src; 00469 } 00470 //it0_end.inc_along(it1); 00471 // TODO: consider using this approach of it0_end 00472 // as a possible speed improvement in the generic loop. need bench. 00473 // New Note: after testing: 00474 // this todo is bad idea. 00475 // this it_end approach was finally commented out 00476 // because it is moving out of the iterator valid locations... !! 00477 00478 } 00479 return; 00480 } 00481 00482 // loop for more than 2 dims. 00483 do { 00484 for(it0_end = it0 + begin_to_end0; it0 != it0_end; ++it0) { 00485 F::elem_op(*it0, *it_src); 00486 ++it_src; 00487 } 00488 it0 -= begin_to_end0; 00489 it_src -= sbegin_to_end0; 00490 00491 if(++pos1 < length1) { 00492 it0.inc_along_other_dim_at_begin(it1); 00493 it_src.inc_along_other_dim_at_begin(it_src1); 00494 } 00495 else { 00496 it0 -= first_to_last1; 00497 it_src -= sfirst_to_last1; 00498 pos1 = 0; 00499 i = 2; 00500 while(++pos[i] >= length[i]) { 00501 pos[i] = 0; 00502 it0 -= first_to_last[i]; 00503 it_src -= sfirst_to_last[i]; 00504 i++; 00505 if(i >= j) return; // loop is broken here. 00506 } 00507 it0.inc_along_other_dim_at_begin(iters[i]); 00508 it_src.inc_along_other_dim_at_begin(siters[i]); 00509 } 00510 } while(1); 00511 } 00512 00513 00514 00515 }; /*namespace loops*/ 00516 00517 }; /*namespace ivl*/ 00518 00519 #endif // IVL_CORE_DETAILS_LOOPS_LOOPS_ND_WITH_END