ivl 679
|
00001 /* This file is part of the ivl C++ library <http://image.ntua.gr/ivl>. 00002 A C++ template library extending syntax towards mathematical notation. 00003 00004 Copyright (C) 2012 Yannis Avrithis <iavr@image.ntua.gr> 00005 Copyright (C) 2012 Kimon Kontosis <kimonas@image.ntua.gr> 00006 00007 ivl is free software; you can redistribute it and/or modify 00008 it under the terms of the GNU Lesser General Public License 00009 version 3 as published by the Free Software Foundation. 00010 00011 Alternatively, you can redistribute it and/or modify it under the terms 00012 of the GNU General Public License version 2 as published by the Free 00013 Software Foundation. 00014 00015 ivl is distributed in the hope that it will be useful, 00016 but WITHOUT ANY WARRANTY; without even the implied warranty of 00017 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 00018 See the GNU General Public License for more details. 00019 00020 You should have received a copy of the GNU General Public License 00021 and a copy of the GNU Lesser General Public License along 00022 with ivl. If not, see <http://www.gnu.org/licenses/>. */ 00023 00024 #ifndef IVL_CORE_DETAILS_LOOPS_LOOPS_ND 00025 #define IVL_CORE_DETAILS_LOOPS_LOOPS_ND 00026 00027 namespace ivl { 00028 00029 namespace loops { 00030 00031 template <class F, class T, class S, class J, class D> 00032 void loop(array_nd<T, S>& out, const array_nd<J, D>& in) 00033 { 00034 typedef array_nd<T, S> out_t; 00035 typedef const array_nd<J, D> in_t; 00036 00037 int id[2] = {0, 0}; 00038 int j = 0; 00039 00040 for(size_t i = 0; i < out.ndims() && j < 2; i++) { 00041 if(out.size(i) > 1) id[j++] = int(i); 00042 } 00043 00044 loop_base_nd<1, F, out_t, in_t, 00045 typename out_t::optimal_access_tag, 00046 typename in_t::optimal_access_tag, 00047 typename out_t::has_specialized_iter, 00048 typename in_t::has_specialized_iter>:: 00049 exec(out, in, id[0], id[1]); 00050 00051 } 00052 00053 template <class F, class T, class S, class J, class D> 00054 void loop_ww(array_nd<T, S>& out, array_nd<J, D>& in) 00055 { 00056 typedef array_nd<T, S> out_t; 00057 typedef array_nd<J, D> in_t; 00058 00059 int id[2] = {0, 0}; 00060 int j = 0; 00061 00062 for(size_t i = 0; i < out.ndims() && j < 2; i++) { 00063 if(out.size(i) > 1) id[j++] = int(i); 00064 } 00065 00066 loop_base_nd<1, F, out_t, in_t, 00067 typename out_t::optimal_access_tag, 00068 typename in_t::optimal_access_tag, 00069 typename out_t::has_specialized_iter, 00070 typename in_t::has_specialized_iter>:: 00071 exec(out, in, id[0], id[1]); 00072 00073 } 00074 00076 template< 00077 class F, class A1, class A2, 00078 class I_OPTIMAL_ACCESS, 00079 class O_ITER_1, class I_ITER_1, class O_ITER_2 00080 > 00081 inline 00082 void loop_base_nd<3, F, A1, A2, 00083 ivl::data::nd_seq_optimal_tag, I_OPTIMAL_ACCESS, 00084 types::t_false, types::t_false, 00085 O_ITER_1, I_ITER_1, O_ITER_2, types::term>:: 00086 exec(A1& out, A2& in, 00087 int inner_dim_1, int inner_dim_2) 00088 { 00089 // Remark: as a contradiction to the coding style, this function is 00090 // way more lengthy than expected. However this is because it was 00091 // decided by kimonas that this is the best and simplest design for that 00092 // particular function which induces complications to improve speed. 00093 00094 // type definitions 00095 typedef A1 out_t; 00096 typedef A2 in_t; 00097 00098 typedef typename out_t::_fast_iterator_nd iter_t; 00099 typedef typename iter_t::iter_nd_border_walker brd_t; 00100 00101 // input iterator 00102 I_ITER_1 it_src = in.begin(); 00103 00104 // TODO: 00105 // assert that the dimensions are the same 00106 00107 // quickly proceed for 1d arrays 00108 if(out.length() == 0) return; 00109 if(out.ndims() == 1) { 00110 O_ITER_1 it = out._fast_begin(0); 00111 //todo:!!bug. out != in iterator??? 00112 for(O_ITER_1 it_end = out._fast_last(0); it != it_end; ++it) { 00113 F::elem_op(*it, *it_src); 00114 ++it_src; 00115 } 00116 F::elem_op(*it, *it_src); 00117 return; 00118 } 00119 00120 // arrays with info about the dimensions 00121 internal::little_arrayling<iter_t> iters(out.ndims()); 00122 internal::little_arrayling<brd_t> first_to_last(out.ndims()); 00123 internal::little_arrayling<size_t> pos(out.ndims()); 00124 internal::little_arrayling<size_t> length(out.ndims()); 00125 internal::little_arrayling<size_t> dim_map(out.ndims()); 00126 00127 size_t i, j = 0; // j: the number of total dims after filtering 00128 length[1] = 1; 00129 00130 // remove all singleton dims. in case all need removal, keep the last dim. 00131 for(i = 0; i < out.ndims(); i++) { 00132 if(out.size(i) > 1 || (j == 0 && i == out.ndims() - 1)) { 00133 iters[j] = out._fast_begin(i); 00134 length[j] = out.size(i); 00135 first_to_last[j] = out.first_to_last(j); 00136 pos[j] = 0; 00137 dim_map[j] = i; 00138 j++; 00139 } 00140 } 00141 00142 // sorting dim0 <-> dim1 for speed improvement. 00143 /* IS THIS POSSIBLE??? and how do i recognize to use ITER_1 or ITER_0 ?? 00144 if(j > 1 && out.size(dim_map[1]) > 2 * out.size(dim_map[0]) 00145 && out.size(dim_map[0]) < 100) { 00146 std::swap(iters[0], iters[1]); 00147 std::swap(length[0], length[1]); 00148 std::swap(first_to_last[0], first_to_last[1]); 00149 std::swap(dim_map[0], dim_map[1]); 00150 }*/ 00151 00152 // local variables for the inner dimensions to improve speed. 00153 O_ITER_1 it0 = iters[0]; 00154 O_ITER_1 it0_end; 00155 O_ITER_2 it1 = iters[1]; 00156 brd_t first_to_last1 = first_to_last[1]; 00157 brd_t first_to_last0 = out.first_to_last(dim_map[0]); 00158 size_t length1 = length[1]; 00159 size_t pos1 = 0; 00160 00161 // special loop for 1 or 2 dims (after filtering) 00162 if(j <= 2) { 00163 it0_end = out._fast_last(dim_map[0]); 00164 for(; it0 != it0_end; ++it0) { 00165 F::elem_op(*it0, *it_src); 00166 ++it_src; 00167 } 00168 F::elem_op(*it0, *it_src); 00169 ++it_src; 00170 //it0_end = out._end(dim_map[0]); 00171 while(++pos1 < length1) { 00172 it0 -= first_to_last0; 00173 it0.inc_along_other_dim_at_begin(it1); 00174 00175 for(it0_end = it0 + first_to_last0; it0 != it0_end; ++it0) { 00176 F::elem_op(*it0, *it_src); 00177 ++it_src; 00178 } 00179 F::elem_op(*it0, *it_src); 00180 ++it_src; 00181 //it0_end.inc_along(it1); 00182 // TODO: consider using this approach of it0_end 00183 // as a possible speed improvement in the generic loop. need bench. 00184 } 00185 return; 00186 } 00187 00188 // loop for more than 2 dims. 00189 do { 00190 //it0 + begin_to_end0; wipe these two strange lines 00191 //it0 != it0_end; 00192 00193 for(it0_end = it0 + first_to_last0; it0 != it0_end; ++it0) { 00194 //*it0; 00195 //*it_src; 00196 F::elem_op(*it0, *it_src); 00197 ++it_src; 00198 } 00199 F::elem_op(*it0, *it_src); 00200 ++it_src; 00201 00202 it0 -= first_to_last0; 00203 if(++pos1 < length1) { 00204 it0.inc_along_other_dim_at_begin(it1); 00205 } 00206 else { 00207 it0 -= first_to_last1; 00208 pos1 = 0; 00209 i = 2; 00210 while(++pos[i] >= length[i]) { 00211 pos[i] = 0; 00212 it0 -= first_to_last[i]; 00213 i++; 00214 if(i >= j) return; // loop is broken here. 00215 } 00216 it0.inc_along_other_dim_at_begin(iters[i]); 00217 } 00218 } while(1); 00219 } 00220 00221 00223 template< 00224 class F, class A1, class A2, 00225 class O_OPTIMAL_ACCESS, 00226 class O_ITER_1, class I_ITER_1, class I_ITER_2 00227 > 00228 inline 00229 void loop_base_nd<3, F, A1, A2, 00230 O_OPTIMAL_ACCESS, ivl::data::nd_seq_optimal_tag, 00231 types::t_false, types::t_false, 00232 O_ITER_1, I_ITER_1, types::term, I_ITER_2>:: 00233 exec(A1& out, A2& in, 00234 int inner_dim_1, int inner_dim_2) 00235 { 00236 // Remark: as a contradiction to the coding style, this function is 00237 // way more lengthy than expected. However this is because it was 00238 // decided by kimonas that this is the best and simplest design for that 00239 // particular function which induces complications to improve speed. 00240 00241 // type definitions 00242 typedef A1 out_t; 00243 typedef A2 in_t; 00244 00245 typedef typename types::best_fast_iterator_nd<in_t>::type iter_t; 00246 typedef typename iter_t::iter_nd_border_walker brd_t; 00247 00248 // input iterator 00249 O_ITER_1 it_dst = out.begin(); 00250 00251 // TODO: 00252 // assert that the dimensions are the same 00253 00254 // quickly proceed for 1d arrays 00255 if(out.length() == 0) return; 00256 // TODO: ????? why is here it_dst and in some other places it_src?? 00257 if(in.ndims() == 1) { 00258 I_ITER_1 it = in._fast_begin(0); 00259 for(I_ITER_1 it_end = in._fast_last(0); it != it_end; ++it) { 00260 F::elem_op(*it_dst, *it); 00261 ++it_dst; 00262 } 00263 F::elem_op(*it_dst, *it); 00264 ++it_dst; 00265 return; 00266 } 00267 00268 // arrays with info about the dimensions 00269 internal::little_arrayling<iter_t> iters(in.ndims()); 00270 internal::little_arrayling<brd_t> first_to_last(in.ndims()); 00271 internal::little_arrayling<size_t> pos(in.ndims()); 00272 internal::little_arrayling<size_t> length(in.ndims()); 00273 internal::little_arrayling<size_t> dim_map(in.ndims()); 00274 00275 size_t i, j = 0; // j: the number of total dims after filtering 00276 length[1] = 1; 00277 00278 // remove all singleton dims. in case all need removal, keep the last dim. 00279 for(i = 0; i < in.ndims(); i++) { 00280 if(in.size(i) > 1 || (j == 0 && i == in.ndims() - 1)) { 00281 iters[j] = in._fast_begin(i); 00282 length[j] = in.size(i); 00283 first_to_last[j] = in.first_to_last(j); 00284 pos[j] = 0; 00285 dim_map[j] = i; 00286 j++; 00287 } 00288 } 00289 00290 // sorting dim0 <-> dim1 for speed improvement. 00291 // TODO: on all others!!!!!! 00292 /* IS THIS POSSIBLE??? and how do i recognize to use ITER_1 or ITER_0 ?? 00293 if(j > 1 && in.size(dim_map[1]) > 2 * in.size(dim_map[0]) 00294 && in.size(dim_map[0]) < 100) { 00295 std::swap(iters[0], iters[1]); 00296 std::swap(length[0], length[1]); 00297 std::swap(first_to_last[0], first_to_last[1]); 00298 std::swap(dim_map[0], dim_map[1]); 00299 }*/ 00300 00301 // local variables for the inner dimensions to improve speed. 00302 I_ITER_1 it0 = iters[0]; 00303 I_ITER_1 it0_end; 00304 I_ITER_2 it1 = iters[1]; 00305 brd_t first_to_last1 = first_to_last[1]; 00306 brd_t first_to_last0 = in.first_to_last(dim_map[0]); 00307 size_t length1 = length[1]; 00308 size_t pos1 = 0; 00309 00310 // special loop for 1 or 2 dims (after filtering) 00311 if(j <= 2) { 00312 it0_end = in._fast_last(dim_map[0]); 00313 for(; it0 != it0_end; ++it0) { 00314 F::elem_op(*it_dst, *it0); 00315 ++it_dst; 00316 } 00317 F::elem_op(*it_dst, *it0); 00318 ++it_dst; 00319 //it0_end = in._end(dim_map[0]); 00320 while(++pos1 < length1) { 00321 it0 -= first_to_last0; 00322 it0.inc_along_other_dim_at_begin(it1); 00323 for(it0_end = it0 + first_to_last0; it0 != it0_end; ++it0) { 00324 F::elem_op(*it_dst, *it0); 00325 ++it_dst; 00326 } 00327 F::elem_op(*it_dst, *it0); 00328 ++it_dst; 00329 //it0_end.inc_along(it1); 00330 // TODO: consider using this approach of it0_end 00331 // as a possible speed improvement in the generic loop. need bench. 00332 } 00333 return; 00334 } 00335 00336 // loop for more than 2 dims. 00337 do { 00338 for(it0_end = it0 + first_to_last0; it0 != it0_end; ++it0) { 00339 F::elem_op(*it_dst, *it0); 00340 ++it_dst; 00341 } 00342 F::elem_op(*it_dst, *it0); 00343 ++it_dst; 00344 it0 -= first_to_last0; 00345 if(++pos1 < length1) { 00346 it0.inc_along_other_dim_at_begin(it1); 00347 } 00348 else { 00349 it0 -= first_to_last1; 00350 pos1 = 0; 00351 i = 2; 00352 while(++pos[i] >= length[i]) { 00353 pos[i] = 0; 00354 it0 -= first_to_last[i]; 00355 i++; 00356 if(i >= j) return; // loop is broken here. 00357 } 00358 it0.inc_along_other_dim_at_begin(iters[i]); 00359 } 00360 } while(1); 00361 } 00362 00363 00364 00365 00367 template< 00368 class F, class A1, class A2, 00369 class O_ITER_1, class I_ITER_1, class O_ITER_2, class I_ITER_2 00370 > 00371 inline 00372 void loop_base_nd<3, F, A1, A2, 00373 ivl::data::nd_seq_optimal_tag, ivl::data::nd_seq_optimal_tag, 00374 types::t_false, types::t_false, 00375 O_ITER_1, I_ITER_1, O_ITER_2, I_ITER_2>:: 00376 exec(A1& out, A2& in, 00377 int inner_dim_1, int inner_dim_2) 00378 { 00379 // Remark: as a contradiction to the coding style, this function is 00380 // way more lengthy than expected. However this is because it was 00381 // decided by kimonas that this is the best and simplest design for that 00382 // particular function which induces complications to improve speed. 00383 00384 // type definitions 00385 typedef A1 out_t; 00386 typedef A2 in_t; 00387 00388 // Almost everything is doubled, one for src, one for dst 00389 00390 //dst 00391 typedef typename out_t::_fast_iterator_nd iter_t; 00392 typedef typename iter_t::iter_nd_border_walker brd_t; 00393 //src 00394 typedef typename types::best_fast_iterator_nd<in_t>::type siter_t; 00395 typedef typename siter_t::iter_nd_border_walker sbrd_t; 00396 00397 // TODO: 00398 // assert that the dimensions are the same. 00399 00400 00401 // quickly proceed for 1d arrays 00402 if(out.length() == 0) return; 00403 if(out.ndims() == 1) { 00404 O_ITER_1 it = out._fast_begin(0); 00405 I_ITER_1 it_src = in._fast_begin(0); 00406 for(O_ITER_1 it_end = out._fast_last(0); it != it_end; ++it) { 00407 F::elem_op(*it, *it_src); 00408 ++it_src; 00409 } 00410 F::elem_op(*it, *it_src); 00411 return; 00412 } 00413 00414 // arrays with info about the dimensions 00415 internal::little_arrayling<iter_t> iters(out.ndims()); 00416 internal::little_arrayling<brd_t> first_to_last(out.ndims()); 00417 internal::little_arrayling<size_t> pos(out.ndims()); 00418 internal::little_arrayling<size_t> length(out.ndims()); 00419 internal::little_arrayling<size_t> dim_map(out.ndims()); 00420 //src 00421 internal::little_arrayling<siter_t> siters(out.ndims()); 00422 internal::little_arrayling<sbrd_t> sfirst_to_last(out.ndims()); 00423 00424 size_t i, j = 0; // j: the number of total dims after filtering 00425 length[1] = 1; 00426 00427 // remove all singleton dims. in case all need removal, keep the last dim. 00428 for(i = 0; i < out.ndims(); i++) { 00429 if(out.size(i) > 1 || (j == 0 && i == out.ndims() - 1)) { 00430 iters[j] = out._fast_begin(i); 00431 length[j] = out.size(i); 00432 first_to_last[j] = out.first_to_last(i); 00433 pos[j] = 0; 00434 dim_map[j] = i; 00435 //src 00436 siters[j] = in._fast_begin(i); 00437 sfirst_to_last[j] = in.first_to_last(i); 00438 00439 j++; 00440 } 00441 } 00442 00443 // sorting dim0 <-> dim1 for speed improvement. 00444 /* IS THIS POSSIBLE??? and how do i recognize to use ITER_1 or ITER_0 ?? 00445 if(j > 1 && out.size(dim_map[1]) > 2 * out.size(dim_map[0]) 00446 && out.size(dim_map[0]) < 100) { 00447 std::swap(iters[0], iters[1]); 00448 std::swap(siters[0], siters[1]); 00449 std::swap(length[0], length[1]); 00450 std::swap(first_to_last[0], first_to_last[1]); 00451 std::swap(sfirst_to_last[0], sfirst_to_last[1]); 00452 std::swap(dim_map[0], dim_map[1]); 00453 }*/ 00454 00455 // local variables for the inner dimensions to improve speed. 00456 O_ITER_1 it0 = iters[0]; 00457 O_ITER_1 it0_end; 00458 O_ITER_2 it1 = iters[1]; 00459 brd_t first_to_last1 = first_to_last[1]; 00460 brd_t first_to_last0 = out.first_to_last(dim_map[0]); 00461 size_t length1 = length[1]; 00462 size_t pos1 = 0; 00463 //src 00464 I_ITER_1 it_src = siters[0]; 00465 I_ITER_2 it_src1 = siters[1]; 00466 sbrd_t sfirst_to_last1 = sfirst_to_last[1]; 00467 sbrd_t sfirst_to_last0 = in.first_to_last(dim_map[0]); 00468 00469 // special loop for 1 or 2 dims (after filtering) 00470 if(j <= 2) { 00471 it0_end = out._fast_last(dim_map[0]); 00472 for(; it0 != it0_end; ++it0) { 00473 F::elem_op(*it0, *it_src); 00474 ++it_src; 00475 } 00476 F::elem_op(*it0, *it_src); 00477 //it0_end = out._end(dim_map[0]); 00478 while(++pos1 < length1) { 00479 it0 -= first_to_last0; 00480 it0.inc_along_other_dim_at_begin(it1); 00481 00482 //src 00483 it_src -= sfirst_to_last0; 00484 it_src.inc_along_other_dim_at_begin(it_src1); 00485 00486 for(it0_end = it0 + first_to_last0; it0 != it0_end; ++it0) { 00487 F::elem_op(*it0, *it_src); 00488 ++it_src; 00489 } 00490 F::elem_op(*it0, *it_src); 00491 //it0_end.inc_along(it1); 00492 // TODO: consider using this approach of it0_end 00493 // as a possible speed improvement in the generic loop. need bench. 00494 // New Note: after testing: 00495 // this todo is bad idea. 00496 // this it_end approach was finally commented out 00497 // because it is moving out of the iterator valid locations... !! 00498 00499 } 00500 return; 00501 } 00502 00503 // loop for more than 2 dims. 00504 do { 00505 for(it0_end = it0 + first_to_last0; it0 != it0_end; ++it0) { 00506 F::elem_op(*it0, *it_src); 00507 ++it_src; 00508 } 00509 F::elem_op(*it0, *it_src); 00510 it0 -= first_to_last0; 00511 it_src -= sfirst_to_last0; 00512 00513 if(++pos1 < length1) { 00514 it0.inc_along_other_dim_at_begin(it1); 00515 it_src.inc_along_other_dim_at_begin(it_src1); 00516 } 00517 else { 00518 it0 -= first_to_last1; 00519 it_src -= sfirst_to_last1; 00520 pos1 = 0; 00521 i = 2; 00522 while(++pos[i] >= length[i]) { 00523 pos[i] = 0; 00524 it0 -= first_to_last[i]; 00525 it_src -= sfirst_to_last[i]; 00526 i++; 00527 if(i >= j) return; // loop is broken here. 00528 } 00529 it0.inc_along_other_dim_at_begin(iters[i]); 00530 it_src.inc_along_other_dim_at_begin(siters[i]); 00531 } 00532 } while(1); 00533 } 00534 00535 00536 00537 }; /*namespace loops*/ 00538 00539 }; /*namespace ivl*/ 00540 00541 #endif // IVL_CORE_DETAILS_LOOPS_LOOPS_ND