@@ -16,45 +16,63 @@ void _initialize_temps(T *data_d, int nx, int ny) {
16
16
17
17
int main () {
18
18
19
- int nx = 10 , ny = 3 ;
19
+ int nx = 2000 , ny = 2000 ;
20
20
int NBLK = std::ceil ((float ) nx / 32 );
21
- int iter = 2 ;
21
+ int iter = 100 ;
22
22
23
23
// naive
24
- thrust::device_vector<double > data_d_old (nx * ny, 0 );
24
+ thrust::device_vector<float > data_d_old (nx * ny, 0 );
25
25
26
26
_initialize_temps<<<NBLK, 32 >>> (thrust::raw_pointer_cast (data_d_old.data ()),
27
27
nx, ny);
28
28
29
- thrust::device_vector<double > data_d_new = data_d_old;
29
+ thrust::device_vector<float > data_d_new = data_d_old;
30
30
31
31
cudaDeviceSynchronize ();
32
- naive::heat_diffusion<double , 16 >(thrust::raw_pointer_cast (data_d_old.data ()),
32
+ naive::heat_diffusion<float , 16 >(thrust::raw_pointer_cast (data_d_old.data ()),
33
33
thrust::raw_pointer_cast (data_d_new.data ()),
34
34
nx, ny, iter);
35
-
35
+ cudaDeviceSynchronize ();
36
36
// Printing device vector
37
- std::cout << " \n Naive: \n " ;
38
- thrust::copy (data_d_new.begin (), data_d_new.end (),
39
- std::ostream_iterator<double >(std::cout, " " ));
37
+ // std::cout << "\n Naive: \n";
38
+ // thrust::copy(data_d_new.begin(), data_d_new.end(),
39
+ // std::ostream_iterator<double>(std::cout, " "));
40
40
41
41
// shared_global
42
- thrust::device_vector<double > data_d_old_sg (nx * ny, 0 );
42
+ thrust::device_vector<float > data_d_old_sg (nx * ny, 0 );
43
43
44
44
_initialize_temps<<<NBLK, 32 >>> (thrust::raw_pointer_cast (data_d_old_sg.data ()),
45
45
nx, ny);
46
46
47
- thrust::device_vector<double > data_d_new_sg = data_d_old_sg;
47
+ thrust::device_vector<float > data_d_new_sg = data_d_old_sg;
48
48
49
49
cudaDeviceSynchronize ();
50
- shared_global::heat_diffusion<double , 16 >(thrust::raw_pointer_cast (data_d_old_sg.data ()),
50
+ shared_global::heat_diffusion<float , 16 >(thrust::raw_pointer_cast (data_d_old_sg.data ()),
51
51
thrust::raw_pointer_cast (data_d_new_sg.data ()),
52
52
nx, ny, iter);
53
+ cudaDeviceSynchronize ();
54
+ // Printing device vector
55
+ // std::cout << "\n Shared & Global: \n";
56
+ // thrust::copy(data_d_new_sg.begin(), data_d_new_sg.end(),
57
+ // std::ostream_iterator<double>(std::cout, " "));
58
+
59
+ // shared_only
60
+ thrust::device_vector<float > data_d_old_so (nx * ny, 0 );
53
61
62
+ _initialize_temps<<<NBLK, 32 >>> (thrust::raw_pointer_cast (data_d_old_so.data ()),
63
+ nx, ny);
64
+
65
+ thrust::device_vector<float > data_d_new_so = data_d_old_so;
66
+
67
+ cudaDeviceSynchronize ();
68
+ shared_only::heat_diffusion<float , 16 >(thrust::raw_pointer_cast (data_d_old_so.data ()),
69
+ thrust::raw_pointer_cast (data_d_new_so.data ()),
70
+ nx, ny, iter);
71
+ cudaDeviceSynchronize ();
54
72
// Printing device vector
55
- std::cout << " \n Shared & Global : \n " ;
56
- thrust::copy (data_d_new_sg .begin (), data_d_new_sg .end (),
57
- std::ostream_iterator<double >(std::cout, " " ));
73
+ // std::cout << "\n Shared Only : \n";
74
+ // thrust::copy(data_d_new_so .begin(), data_d_new_so .end(),
75
+ // std::ostream_iterator<double>(std::cout, " "));
58
76
59
77
return 0 ;
60
78
}
0 commit comments