@@ -165,6 +165,126 @@ exit:
165165 ret void
166166}
167167
168+ define void @load_op_store_loop (ptr %src , ptr %dst , i64 %N , i64 %scale , float %k ) {
169+ ; APPLE-LABEL: define void @load_op_store_loop(
170+ ; APPLE-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[SCALE:%.*]], float [[K:%.*]]) #[[ATTR0]] {
171+ ; APPLE-NEXT: [[ENTRY:.*]]:
172+ ; APPLE-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
173+ ; APPLE-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1
174+ ; APPLE-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
175+ ; APPLE-NEXT: br i1 [[TMP1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]]
176+ ; APPLE: [[ENTRY_NEW]]:
177+ ; APPLE-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
178+ ; APPLE-NEXT: br label %[[LOOP:.*]]
179+ ; APPLE: [[LOOP]]:
180+ ; APPLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ]
181+ ; APPLE-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], %[[LOOP]] ]
182+ ; APPLE-NEXT: [[SCALED_IV:%.*]] = mul nuw nsw i64 [[IV]], [[SCALE]]
183+ ; APPLE-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV]]
184+ ; APPLE-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
185+ ; APPLE-NEXT: [[O:%.*]] = fadd float [[L]], [[K]]
186+ ; APPLE-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV]]
187+ ; APPLE-NEXT: store float [[O]], ptr [[GEP_DST]], align 4
188+ ; APPLE-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
189+ ; APPLE-NEXT: [[SCALED_IV_1:%.*]] = mul nuw nsw i64 [[IV_NEXT]], [[SCALE]]
190+ ; APPLE-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_1]]
191+ ; APPLE-NEXT: [[L_1:%.*]] = load float, ptr [[GEP_SRC_1]], align 4
192+ ; APPLE-NEXT: [[O_1:%.*]] = fadd float [[L_1]], [[K]]
193+ ; APPLE-NEXT: [[GEP_DST_1:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_NEXT]]
194+ ; APPLE-NEXT: store float [[O_1]], ptr [[GEP_DST_1]], align 4
195+ ; APPLE-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
196+ ; APPLE-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2
197+ ; APPLE-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
198+ ; APPLE-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP]]
199+ ; APPLE: [[EXIT_UNR_LCSSA_LOOPEXIT]]:
200+ ; APPLE-NEXT: [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
201+ ; APPLE-NEXT: br label %[[EXIT_UNR_LCSSA]]
202+ ; APPLE: [[EXIT_UNR_LCSSA]]:
203+ ; APPLE-NEXT: [[IV_UNR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
204+ ; APPLE-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
205+ ; APPLE-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[EXIT:.*]]
206+ ; APPLE: [[LOOP_EPIL_PREHEADER]]:
207+ ; APPLE-NEXT: br label %[[LOOP_EPIL:.*]]
208+ ; APPLE: [[LOOP_EPIL]]:
209+ ; APPLE-NEXT: [[SCALED_IV_EPIL:%.*]] = mul nuw nsw i64 [[IV_UNR]], [[SCALE]]
210+ ; APPLE-NEXT: [[GEP_SRC_EPIL:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_EPIL]]
211+ ; APPLE-NEXT: [[L_EPIL:%.*]] = load float, ptr [[GEP_SRC_EPIL]], align 4
212+ ; APPLE-NEXT: [[O_EPIL:%.*]] = fadd float [[L_EPIL]], [[K]]
213+ ; APPLE-NEXT: [[GEP_DST_EPIL:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_UNR]]
214+ ; APPLE-NEXT: store float [[O_EPIL]], ptr [[GEP_DST_EPIL]], align 4
215+ ; APPLE-NEXT: br label %[[EXIT]]
216+ ; APPLE: [[EXIT]]:
217+ ; APPLE-NEXT: ret void
218+ ;
219+ ; OTHER-LABEL: define void @load_op_store_loop(
220+ ; OTHER-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[SCALE:%.*]], float [[K:%.*]]) #[[ATTR0]] {
221+ ; OTHER-NEXT: [[ENTRY:.*]]:
222+ ; OTHER-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
223+ ; OTHER-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1
224+ ; OTHER-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
225+ ; OTHER-NEXT: br i1 [[TMP1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]]
226+ ; OTHER: [[ENTRY_NEW]]:
227+ ; OTHER-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
228+ ; OTHER-NEXT: br label %[[LOOP:.*]]
229+ ; OTHER: [[LOOP]]:
230+ ; OTHER-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ]
231+ ; OTHER-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], %[[LOOP]] ]
232+ ; OTHER-NEXT: [[SCALED_IV:%.*]] = mul nuw nsw i64 [[IV]], [[SCALE]]
233+ ; OTHER-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV]]
234+ ; OTHER-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
235+ ; OTHER-NEXT: [[O:%.*]] = fadd float [[L]], [[K]]
236+ ; OTHER-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV]]
237+ ; OTHER-NEXT: store float [[O]], ptr [[GEP_DST]], align 4
238+ ; OTHER-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
239+ ; OTHER-NEXT: [[SCALED_IV_1:%.*]] = mul nuw nsw i64 [[IV_NEXT]], [[SCALE]]
240+ ; OTHER-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_1]]
241+ ; OTHER-NEXT: [[L_1:%.*]] = load float, ptr [[GEP_SRC_1]], align 4
242+ ; OTHER-NEXT: [[O_1:%.*]] = fadd float [[L_1]], [[K]]
243+ ; OTHER-NEXT: [[GEP_DST_1:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_NEXT]]
244+ ; OTHER-NEXT: store float [[O_1]], ptr [[GEP_DST_1]], align 4
245+ ; OTHER-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
246+ ; OTHER-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2
247+ ; OTHER-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
248+ ; OTHER-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP]]
249+ ; OTHER: [[EXIT_UNR_LCSSA_LOOPEXIT]]:
250+ ; OTHER-NEXT: [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
251+ ; OTHER-NEXT: br label %[[EXIT_UNR_LCSSA]]
252+ ; OTHER: [[EXIT_UNR_LCSSA]]:
253+ ; OTHER-NEXT: [[IV_UNR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
254+ ; OTHER-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
255+ ; OTHER-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[EXIT:.*]]
256+ ; OTHER: [[LOOP_EPIL_PREHEADER]]:
257+ ; OTHER-NEXT: br label %[[LOOP_EPIL:.*]]
258+ ; OTHER: [[LOOP_EPIL]]:
259+ ; OTHER-NEXT: [[SCALED_IV_EPIL:%.*]] = mul nuw nsw i64 [[IV_UNR]], [[SCALE]]
260+ ; OTHER-NEXT: [[GEP_SRC_EPIL:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_EPIL]]
261+ ; OTHER-NEXT: [[L_EPIL:%.*]] = load float, ptr [[GEP_SRC_EPIL]], align 4
262+ ; OTHER-NEXT: [[O_EPIL:%.*]] = fadd float [[L_EPIL]], [[K]]
263+ ; OTHER-NEXT: [[GEP_DST_EPIL:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_UNR]]
264+ ; OTHER-NEXT: store float [[O_EPIL]], ptr [[GEP_DST_EPIL]], align 4
265+ ; OTHER-NEXT: br label %[[EXIT]]
266+ ; OTHER: [[EXIT]]:
267+ ; OTHER-NEXT: ret void
268+ ;
269+ entry:
270+ br label %loop
271+
272+ loop:
273+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop ]
274+ %scaled.iv = mul nuw nsw i64 %iv , %scale
275+ %gep.src = getelementptr inbounds float , ptr %src , i64 %scaled.iv
276+ %l = load float , ptr %gep.src , align 4
277+ %o = fadd float %l , %k
278+ %gep.dst = getelementptr inbounds float , ptr %dst , i64 %iv
279+ store float %o , ptr %gep.dst , align 4
280+ %iv.next = add nuw nsw i64 %iv , 1
281+ %ec = icmp eq i64 %iv.next , %N
282+ br i1 %ec , label %exit , label %loop
283+
284+ exit:
285+ ret void
286+ }
287+
168288@A = external constant [9 x i8 ], align 1
169289@B = external constant [8 x i32 ], align 4
170290@C = external constant [8 x i32 ], align 4
0 commit comments