@inproceedings{oai:uec.repo.nii.ac.jp:00001920,
 author = {Junichi, Ohmura and Akira, Egashira and Shunji, Satoh and Takefumi, Miyoshi and Hidetsugu, Irie and Tsutomu, Yoshinaga},
 book = {Second International Conference on Networking and Computing},
 month = {Sep},
 note = {Numerical simulation for visual processing of thehuman brain is one of time-consuming applications. This papershows acceleration techniques for a simulation program of thevisual processing. We parallelize convolution calculations, whichare core operations, which the simulation program requests, on aGPU-accelerated PC cluster. Our implementation includes threeimprovement points. Firstly, we consider efficient data mappingonto global and shared memories1 of the GPU. Secondly, multipleconvolutions for the same input data are computed by eachnode’s GPU, referred to as package execution. Finally, an input2-dimensional image is divided into regions and convolutions forthese regions are executed in parallel utilizing MPI (MessagePassing Interface). Our experimental results show a linearspeedup up to 12 nodes in the PC cluster for the convolutionprogram. We also show the effects of the package executionand reduced communication on NVIDIA tesla C1060 and C2070,respectively.},
 pages = {228--234},
 publisher = {IEEE},
 title = {Multi-GPU Acceleration of Optical Flow Computation in Visual Functional Simulation},
 year = {2011}
}