2016-01-21 40 views
-4

我想在OpenMP(2核心)中实现以下顺序代码以进行并行计算。openmp的代码实现

我应该做哪些修改以使用OpenMP运行?如果你帮助我,我将不胜感激。在此先感谢

 program POTENTIAL 
     parameter (imx=201, jmx=201) 
     common /pars/ imax,jmax,kmax,kout 
     common /grid/ dr,dth,r(imx),th(jmx),x(imx,jmx),y(imx,jmx) 
     common /vars/ phi_k(imx,jmx),phi_kp1(imx,jmx) 
     data rl2,rl2allow /1.,1.E-7/ 
c..Read the input data, generate the grid data and initialize the solution 

     call INIT 

c..Start the iterative solution loop 

     k = 0 
     DO WHILE (k .lt. kmax .and. rl2 .gt. rl2allow) 
     k = k + 1 
     call BC 
c..Point iterative solutions 

     call SOR 
c..Line iterative solution 

c  call SLOR 
c..Update phi_k array and evaluate the residual 

     rl2=0. 
     do j = 2,jmax-1 
     do i = 2,imax-1 
      rl2 = rl2 + (phi_kp1(i,j) - phi_k(i,j))**2 
      phi_k(i,j) = phi_kp1(i,j) 
     enddo 
     enddo 
     rl2 = SQRT(rl2/((imax-2)*(jmax-2))) 
     print*, '[email protected]=',k,rl2 
c..Output intermediate solutions 

     if(mod(k,kout).eq.0 .and. k.ne.kmax) call IO(k) 
     ENDDO 
     call IO(k) 

     stop 
     end  

     subroutine SOR 

     parameter (imx=201, jmx=201) 
     common /pars/ imax,jmax,kmax,kout 
     common /grid/ dr,dth,r(imx),th(jmx),x(imx,jmx),y(imx,jmx) 
     common /vars/ phi_k(imx,jmx),phi_kp1(imx,jmx) 

     beta2 = (dr/dth)**2 
c..Solve for phi^k+1 

     do i = 2,imax-1 
     c1 = beta2/r(i)**2 
     c2 = 0.5*dr/r(i) 
     c3 = 0.5/(1.+c1) 
     do j = 2,jmax-1 
c..Implement the point Jacobi, Gauss-Seidel and SOR methods 

     phi_kp1(i,j) = c3*((1.-c2)*phi_k(i-1,j) + (1.+c2)*phi_k(i+1,j) 
    >      + c1*(phi_k(i,j-1) + phi_k(i,j+1))) 
     enddo 
     enddo 
     return 
     end 



     subroutine INIT 
     parameter (imx=201, jmx=201) 
     common /pars/ imax,jmax,kmax,kout 
     common /grid/ dr,dth,r(imx),th(jmx),x(imx,jmx),y(imx,jmx) 
     common /vars/ phi_k(imx,jmx),phi_kp1(imx,jmx) 
     data r1,r2/1.0,5.0/, imax,jmax/101,91/ 

c..Read input parameters like imax, jmax 

     print*, 'Enter kmax and kout :' 
     read(*,*) kmax, kout 

     pi = 4.*ATAN(1.) 
     dr = (r2-r1)/float(imax-1) 
     dth = pi/float(jmax-1) 
c..Grid generation 

     do j=1,jmax 
     do i=1,imax 
     r(i) = r1 + dr*(i-1) 
     th(j) = dth*(j-1) 
     x(i,j) = r(i)*COS(th(j)) 
     y(i,j) = r(i)*SIN(th(j)) 
c...initialize the phi arrays 

     phi_k(i,j) = r(i)*COS(th(j)) 
     phi_kp1(i,j) = 0. 
     enddo 
     enddo 
     call BC 
     call IO(1) 

     return 
     end 

c------------------------------------------------------------------- 

     subroutine BC 
     parameter (imx=201, jmx=201) 
     common /pars/ imax,jmax,kmax,kout 
     common /grid/ dr,dth,r(imx),th(jmx),x(imx,jmx),y(imx,jmx) 
     common /vars/ phi_k(imx,jmx),phi_kp1(imx,jmx) 

     do i = 1,imax 
     phi_k(i,1)  = phi_k(i,2) 
     phi_kp1(i,1) = phi_k(i,2) 
     phi_k(i,jmax) = phi_k(i,jmax-1) 
     phi_kp1(i,jmax) = phi_k(i,jmax-1) 
     enddo 
     do j = 1,jmax 
     phi_k(1,j)  = phi_k(2,j) 
     phi_kp1(1,j) = phi_k(2,j) 
     phi_k(imax,j) = r(imax)*COS(th(j)) 
     phi_kp1(imax,j) = phi_k(imax,j) 
     enddo 

     return 
     end 

c------------------------------------------------------------------- 

     subroutine VELOCITY(vx,vy) 
     parameter (imx=201, jmx=201) 
     common /pars/ imax,jmax,kmax,kout 
     common /grid/ dr,dth,r(imx),th(jmx),x(imx,jmx),y(imx,jmx) 
     common /vars/ phi_k(imx,jmx),phi_kp1(imx,jmx) 
     dimension vx(imx,jmx),vy(imx,jmx) 
     do j = 1,jmax 
     do i = 1,imax 
     if(i .eq. 1) then 
      vr = (phi_k(2,j)-phi_k(1,j))/dr 
     elseif(i .eq. imax) then 
      vr = (phi_k(imax,j)-phi_k(imax-1,j))/dr 
     else 
      vr = 0.5*(phi_k(i+1,j)-phi_k(i-1,j))/dr 
     endif 
     if(j .eq. 1) then 
      vth = (phi_k(i,2)-phi_k(i,1))/(dth*r(i)) 
     elseif(j .eq. jmax) then 
      vth = (phi_k(i,jmax)-phi_k(i,jmax-1))/(dth*r(i)) 
     else 
      vth = 0.5*(phi_k(i,j+1)-phi_k(i,j-1))/(dth*r(i)) 
     endif 
     vx(i,j) = vr*COS(th(j)) - vth*SIN(th(j)) 
     vy(i,j) = vr*SIN(th(j)) + vth*COS(th(j)) 
     enddo 
     enddo 
     return 
     end 

c------------------------------------------------------------------- 

     subroutine IO(k) 
c..Output solution in tecplot format 

     parameter (imx=201, jmx=201) 
     common /pars/ imax,jmax,kmax,kout 
     common /grid/ dr,dth,r(imx),th(jmx),x(imx,jmx),y(imx,jmx) 
     common /vars/ phi_k(imx,jmx),phi_kp1(imx,jmx) 
     dimension vx(imx,jmx),vy(imx,jmx) 
     character fname*32,string*8,ext*5 

     call VELOCITY(vx,vy) 
     write(string,'(f8.5)') float(k)/100000 
     read(string,'(3x,a5)') ext 
     fname = 'vars-'//ext//'.tec' 
     open(1,file=fname,form='formatted') 
     write(1,*) ' variables="x","y","phi","u","v" ' 
     write(1,*) ' zone i=',imax, ', j=',jmax 
     do j = 1,jmax 
     do i = 1,imax 
     write(1,*) x(i,j),y(i,j),phi_k(i,j),vx(i,j),vy(i,j) 
     enddo 
     enddo 
     close(1) 

     return 
     end 

c------------------------------------------------------------------- 
     SUBROUTINE THOMAS(mx,il,iu,A,B,C,R) 
c............................................................ 
c Solution of a tridiagonal system of n equations of the form 
c A(i)*x(i-1) + B(i)*x(i) + C(i)*x(i+1) = R(i) for i=il,iu 
c the solution X(i) is stored in F(i) 
c A(il-1) and C(iu+1) are not used. 
c A,B,C,R are arrays to be provided by the user 
c............................................................ 

     dimension a(mx),b(mx),c(mx),r(mx),x(mx) 
     x(il)=c(il)/b(il) 
     r(il)=r(il)/b(il) 
     do i=il+1,iu 
     z=1./(b(i)-a(i)*x(i-1)) 
     x(i)=c(i)*z 
     r(i)=(r(i)-a(i)*r(i-1))*z 
     enddo 
     do i=iu-1,il,-1 
     r(i)=r(i)-x(i)*r(i+1) 
     enddo 
     return 
     end 
+4

我有这种感觉,我已经看到[同样的问题](http://stackoverflow.com/q/34912742/5239503)之前...... – Gilles

回答

1

看你的代码的重复部分(14-37行),我们观察到,有两个调用(BC和SOR),在循环中执行代码。这些是放置OpenMP指令的潜在地方。在这两个例程中,循环都没有公开阻止并行执行的数据依赖性,因此您可以应用以下转换。

对于BC例程(下面),您可以使用两次!$ OMP PARALLEL DO在可用线程中分配两个循环的工作(每个循环一个指令)。

 subroutine BC                     
     parameter (imx=201, jmx=201)                 
     common /pars/ imax,jmax,kmax,kout                
     common /grid/ dr,dth,r(imx),th(jmx),x(imx,jmx),y(imx,jmx)          
     common /vars/ phi_k(imx,jmx),phi_kp1(imx,jmx)             

!$OMP PARALLEL DO SHARED(IMAX,PHI_K,PHI_KP1)               
     do i = 1,imax                     
     phi_k(i,1)  = phi_k(i,2)                 
     phi_kp1(i,1) = phi_k(i,2)                 
     phi_k(i,jmax) = phi_k(i,jmax-1)               
     phi_kp1(i,jmax) = phi_k(i,jmax-1)               
     enddo                       
!$OMP END PARALLEL DO                     

!$OMP PARALLEL DO SHARED(JMAX,PHI_K,PHI_KP1)               
     do j = 1,jmax                     
     phi_k(1,j)  = phi_k(2,j)                 
     phi_kp1(1,j) = phi_k(2,j)                 
     phi_k(imax,j) = r(imax)*COS(th(j))               
     phi_kp1(imax,j) = phi_k(imax,j)                
     enddo                       
!$OMP END PARALLEL DO                     

     return                       
     end                        

与SOR类似,您可以在线程之间分配外循环的工作。在这种情况下,每个线程都需要一个时间变量(c1,c2,c3)和迭代变量(j)的私有副本。

 subroutine SOR                     

     parameter (imx=201, jmx=201)                 
     common /pars/ imax,jmax,kmax,kout                
     common /grid/ dr,dth,r(imx),th(jmx),x(imx,jmx),y(imx,jmx)          
     common /vars/ phi_k(imx,jmx),phi_kp1(imx,jmx)             

     beta2 = (dr/dth)**2                    
c..Solve for phi^k+1                     

C$OMP PARALLEL DO PRIVATE(c1,c2,c3,j) SHARED(IMAX,JMAX,PHI_KP1,PHI_K)         
     do i = 2,imax-1                     
     c1 = beta2/r(i)**2                    
     c2 = 0.5*dr/r(i)                    
     c3 = 0.5/(1.+c1)                    
     do j = 2,jmax-1                     
c..Implement the point Jacobi, Gauss-Seidel and SOR methods           

     phi_kp1(i,j) = c3*((1.-c2)*phi_k(i-1,j) + (1.+c2)*phi_k(i+1,j)         
    >      + c1*(phi_k(i,j-1) + phi_k(i,j+1)))          
     enddo                       
     enddo                       
C$OMP END PARALLEL DO                     

     return                       
     end                        

您可能想要了解其他代码并查看进一步的并行化机会。您可以使用探查器(gprof,TAU,Vampir,Paraver)来确定应用程序热点并将并行化集中在那里。