// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefixes=CHECK %s
// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -verify %s -emit-llvm -o - | FileCheck --check-prefixes=CHECK-DEBUG %s

// expected-no-diagnostics

// TODO: Teach the update script to check new functions too.

#ifndef HEADER
#define HEADER

// CHECK-LABEL: @_Z14parallel_for_0v(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
// CHECK:       omp_parallel:
// CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @_Z14parallel_for_0v..omp_par)
// CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
// CHECK:       omp.par.outlined.exit:
// CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
// CHECK:       omp.par.exit.split:
// CHECK-NEXT:    ret void
//
// CHECK-DEBUG-LABEL: @_Z14parallel_for_0v(
// CHECK-DEBUG-NEXT:  entry:
// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]), !dbg [[DBG13:![0-9]+]]
// CHECK-DEBUG-NEXT:    br label [[OMP_PARALLEL:%.*]]
// CHECK-DEBUG:       omp_parallel:
// CHECK-DEBUG-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @_Z14parallel_for_0v..omp_par), !dbg [[DBG14:![0-9]+]]
// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
// CHECK-DEBUG:       omp.par.outlined.exit:
// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
// CHECK-DEBUG:       omp.par.exit.split:
// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG18:![0-9]+]]
//
void parallel_for_0(void) {
#pragma omp parallel
  {
#pragma omp for
    for (int i = 0; i < 100; ++i) {
    }
  }
}

// CHECK-LABEL: @_Z14parallel_for_1Pfid(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[STRUCTARG17:%.*]] = alloca { ptr, ptr, ptr }, align 8
// CHECK-NEXT:    [[R_ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca double, align 8
// CHECK-NEXT:    store ptr [[R:%.*]], ptr [[R_ADDR]], align 8
// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
// CHECK-NEXT:    store double [[B:%.*]], ptr [[B_ADDR]], align 8
// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
// CHECK:       omp_parallel:
// CHECK-NEXT:    [[GEP_A_ADDR18:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG17]], i32 0, i32 0
// CHECK-NEXT:    store ptr [[A_ADDR]], ptr [[GEP_A_ADDR18]], align 8
// CHECK-NEXT:    [[GEP_B_ADDR19:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG17]], i32 0, i32 1
// CHECK-NEXT:    store ptr [[B_ADDR]], ptr [[GEP_B_ADDR19]], align 8
// CHECK-NEXT:    [[GEP_R_ADDR20:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG17]], i32 0, i32 2
// CHECK-NEXT:    store ptr [[R_ADDR]], ptr [[GEP_R_ADDR20]], align 8
// CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z14parallel_for_1Pfid..omp_par.4, ptr [[STRUCTARG17]])
// CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT16:%.*]]
// CHECK:       omp.par.outlined.exit16:
// CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
// CHECK:       omp.par.exit.split:
// CHECK-NEXT:    ret void
//
// CHECK-DEBUG-LABEL: @_Z14parallel_for_1Pfid(
// CHECK-DEBUG-NEXT:  entry:
// CHECK-DEBUG-NEXT:    [[STRUCTARG17:%.*]] = alloca { ptr, ptr, ptr }, align 8
// CHECK-DEBUG-NEXT:    [[R_ADDR:%.*]] = alloca ptr, align 8
// CHECK-DEBUG-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK-DEBUG-NEXT:    [[B_ADDR:%.*]] = alloca double, align 8
// CHECK-DEBUG-NEXT:    store ptr [[R:%.*]], ptr [[R_ADDR]], align 8
// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[R_ADDR]], metadata [[META72:![0-9]+]], metadata !DIExpression()), !dbg [[DBG73:![0-9]+]]
// CHECK-DEBUG-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META74:![0-9]+]], metadata !DIExpression()), !dbg [[DBG75:![0-9]+]]
// CHECK-DEBUG-NEXT:    store double [[B:%.*]], ptr [[B_ADDR]], align 8
// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META76:![0-9]+]], metadata !DIExpression()), !dbg [[DBG77:![0-9]+]]
// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB6:[0-9]+]]), !dbg [[DBG78:![0-9]+]]
// CHECK-DEBUG-NEXT:    br label [[OMP_PARALLEL:%.*]]
// CHECK-DEBUG:       omp_parallel:
// CHECK-DEBUG-NEXT:    [[GEP_A_ADDR18:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG17]], i32 0, i32 0
// CHECK-DEBUG-NEXT:    store ptr [[A_ADDR]], ptr [[GEP_A_ADDR18]], align 8
// CHECK-DEBUG-NEXT:    [[GEP_B_ADDR19:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG17]], i32 0, i32 1
// CHECK-DEBUG-NEXT:    store ptr [[B_ADDR]], ptr [[GEP_B_ADDR19]], align 8
// CHECK-DEBUG-NEXT:    [[GEP_R_ADDR20:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG17]], i32 0, i32 2
// CHECK-DEBUG-NEXT:    store ptr [[R_ADDR]], ptr [[GEP_R_ADDR20]], align 8
// CHECK-DEBUG-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB6]], i32 1, ptr @_Z14parallel_for_1Pfid..omp_par.4, ptr [[STRUCTARG17]]), !dbg [[DBG79:![0-9]+]]
// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT16:%.*]]
// CHECK-DEBUG:       omp.par.outlined.exit16:
// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
// CHECK-DEBUG:       omp.par.exit.split:
// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG81:![0-9]+]]
//
void parallel_for_1(float *r, int a, double b) {
#pragma omp parallel
  {
#pragma omp parallel
    {
#pragma omp for
      for (int i = 0; i < 100; ++i) {
        *r = a + b;
      }
    }
  }
}

// CHECK-LABEL: @_Z14parallel_for_2Pfid(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr }, align 8
// CHECK-NEXT:    [[R_ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca double, align 8
// CHECK-NEXT:    [[I185:%.*]] = alloca i32, align 4
// CHECK-NEXT:    [[AGG_CAPTURED186:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 8
// CHECK-NEXT:    [[AGG_CAPTURED187:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4
// CHECK-NEXT:    [[DOTCOUNT_ADDR188:%.*]] = alloca i32, align 4
// CHECK-NEXT:    [[P_LASTITER203:%.*]] = alloca i32, align 4
// CHECK-NEXT:    [[P_LOWERBOUND204:%.*]] = alloca i32, align 4
// CHECK-NEXT:    [[P_UPPERBOUND205:%.*]] = alloca i32, align 4
// CHECK-NEXT:    [[P_STRIDE206:%.*]] = alloca i32, align 4
// CHECK-NEXT:    store ptr [[R:%.*]], ptr [[R_ADDR]], align 8
// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
// CHECK-NEXT:    store double [[B:%.*]], ptr [[B_ADDR]], align 8
// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
// CHECK:       omp_parallel:
// CHECK-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0
// CHECK-NEXT:    store ptr [[A_ADDR]], ptr [[GEP_A_ADDR]], align 8
// CHECK-NEXT:    [[GEP_B_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1
// CHECK-NEXT:    store ptr [[B_ADDR]], ptr [[GEP_B_ADDR]], align 8
// CHECK-NEXT:    [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2
// CHECK-NEXT:    store ptr [[R_ADDR]], ptr [[GEP_R_ADDR]], align 8
// CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.23, ptr [[STRUCTARG]])
// CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT184:%.*]]
// CHECK:       omp.par.outlined.exit184:
// CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
// CHECK:       omp.par.exit.split:
// CHECK-NEXT:    store i32 0, ptr [[I185]], align 4
// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[AGG_CAPTURED186]], i32 0, i32 0
// CHECK-NEXT:    store ptr [[I185]], ptr [[TMP0]], align 8
// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[AGG_CAPTURED187]], i32 0, i32 0
// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I185]], align 4
// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP1]], align 4
// CHECK-NEXT:    call void @__captured_stmt.19(ptr [[DOTCOUNT_ADDR188]], ptr [[AGG_CAPTURED186]])
// CHECK-NEXT:    [[DOTCOUNT189:%.*]] = load i32, ptr [[DOTCOUNT_ADDR188]], align 4
// CHECK-NEXT:    br label [[OMP_LOOP_PREHEADER190:%.*]]
// CHECK:       omp_loop.preheader190:
// CHECK-NEXT:    store i32 0, ptr [[P_LOWERBOUND204]], align 4
// CHECK-NEXT:    [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1
// CHECK-NEXT:    store i32 [[TMP3]], ptr [[P_UPPERBOUND205]], align 4
// CHECK-NEXT:    store i32 1, ptr [[P_STRIDE206]], align 4
// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM207:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM207]], i32 34, ptr [[P_LASTITER203]], ptr [[P_LOWERBOUND204]], ptr [[P_UPPERBOUND205]], ptr [[P_STRIDE206]], i32 1, i32 0)
// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[P_LOWERBOUND204]], align 4
// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[P_UPPERBOUND205]], align 4
// CHECK-NEXT:    [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]]
// CHECK-NEXT:    [[TMP7:%.*]] = add i32 [[TMP6]], 1
// CHECK-NEXT:    br label [[OMP_LOOP_HEADER191:%.*]]
// CHECK:       omp_loop.header191:
// CHECK-NEXT:    [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], [[OMP_LOOP_INC194:%.*]] ]
// CHECK-NEXT:    br label [[OMP_LOOP_COND192:%.*]]
// CHECK:       omp_loop.cond192:
// CHECK-NEXT:    [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 [[OMP_LOOP_IV197]], [[TMP7]]
// CHECK-NEXT:    br i1 [[OMP_LOOP_CMP198]], label [[OMP_LOOP_BODY193:%.*]], label [[OMP_LOOP_EXIT195:%.*]]
// CHECK:       omp_loop.body193:
// CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]]
// CHECK-NEXT:    call void @__captured_stmt.20(ptr [[I185]], i32 [[TMP8]], ptr [[AGG_CAPTURED187]])
// CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK-NEXT:    [[CONV200:%.*]] = sitofp i32 [[TMP9]] to double
// CHECK-NEXT:    [[TMP10:%.*]] = load double, ptr [[B_ADDR]], align 8
// CHECK-NEXT:    [[ADD201:%.*]] = fadd double [[CONV200]], [[TMP10]]
// CHECK-NEXT:    [[CONV202:%.*]] = fptrunc double [[ADD201]] to float
// CHECK-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[R_ADDR]], align 8
// CHECK-NEXT:    store float [[CONV202]], ptr [[TMP11]], align 4
// CHECK-NEXT:    br label [[OMP_LOOP_INC194]]
// CHECK:       omp_loop.inc194:
// CHECK-NEXT:    [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1
// CHECK-NEXT:    br label [[OMP_LOOP_HEADER191]]
// CHECK:       omp_loop.exit195:
// CHECK-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM207]])
// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM208]])
// CHECK-NEXT:    br label [[OMP_LOOP_AFTER196:%.*]]
// CHECK:       omp_loop.after196:
// CHECK-NEXT:    ret void
//
// CHECK-DEBUG-LABEL: @_Z14parallel_for_2Pfid(
// CHECK-DEBUG-NEXT:  entry:
// CHECK-DEBUG-NEXT:    [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr }, align 8
// CHECK-DEBUG-NEXT:    [[R_ADDR:%.*]] = alloca ptr, align 8
// CHECK-DEBUG-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK-DEBUG-NEXT:    [[B_ADDR:%.*]] = alloca double, align 8
// CHECK-DEBUG-NEXT:    [[I185:%.*]] = alloca i32, align 4
// CHECK-DEBUG-NEXT:    [[AGG_CAPTURED186:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 8
// CHECK-DEBUG-NEXT:    [[AGG_CAPTURED187:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4
// CHECK-DEBUG-NEXT:    [[DOTCOUNT_ADDR188:%.*]] = alloca i32, align 4
// CHECK-DEBUG-NEXT:    [[P_LASTITER203:%.*]] = alloca i32, align 4
// CHECK-DEBUG-NEXT:    [[P_LOWERBOUND204:%.*]] = alloca i32, align 4
// CHECK-DEBUG-NEXT:    [[P_UPPERBOUND205:%.*]] = alloca i32, align 4
// CHECK-DEBUG-NEXT:    [[P_STRIDE206:%.*]] = alloca i32, align 4
// CHECK-DEBUG-NEXT:    store ptr [[R:%.*]], ptr [[R_ADDR]], align 8
// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[R_ADDR]], metadata [[META133:![0-9]+]], metadata !DIExpression()), !dbg [[DBG134:![0-9]+]]
// CHECK-DEBUG-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META135:![0-9]+]], metadata !DIExpression()), !dbg [[DBG136:![0-9]+]]
// CHECK-DEBUG-NEXT:    store double [[B:%.*]], ptr [[B_ADDR]], align 8
// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META137:![0-9]+]], metadata !DIExpression()), !dbg [[DBG138:![0-9]+]]
// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB13:[0-9]+]]), !dbg [[DBG139:![0-9]+]]
// CHECK-DEBUG-NEXT:    br label [[OMP_PARALLEL:%.*]]
// CHECK-DEBUG:       omp_parallel:
// CHECK-DEBUG-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0
// CHECK-DEBUG-NEXT:    store ptr [[A_ADDR]], ptr [[GEP_A_ADDR]], align 8
// CHECK-DEBUG-NEXT:    [[GEP_B_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1
// CHECK-DEBUG-NEXT:    store ptr [[B_ADDR]], ptr [[GEP_B_ADDR]], align 8
// CHECK-DEBUG-NEXT:    [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2
// CHECK-DEBUG-NEXT:    store ptr [[R_ADDR]], ptr [[GEP_R_ADDR]], align 8
// CHECK-DEBUG-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB13]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.23, ptr [[STRUCTARG]]), !dbg [[DBG140:![0-9]+]]
// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT184:%.*]]
// CHECK-DEBUG:       omp.par.outlined.exit184:
// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
// CHECK-DEBUG:       omp.par.exit.split:
// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[I185]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147:![0-9]+]]
// CHECK-DEBUG-NEXT:    store i32 0, ptr [[I185]], align 4, !dbg [[DBG147]]
// CHECK-DEBUG-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[AGG_CAPTURED186]], i32 0, i32 0, !dbg [[DBG148:![0-9]+]]
// CHECK-DEBUG-NEXT:    store ptr [[I185]], ptr [[TMP0]], align 8, !dbg [[DBG148]]
// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[AGG_CAPTURED187]], i32 0, i32 0, !dbg [[DBG148]]
// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I185]], align 4, !dbg [[DBG149:![0-9]+]]
// CHECK-DEBUG-NEXT:    store i32 [[TMP2]], ptr [[TMP1]], align 4, !dbg [[DBG148]]
// CHECK-DEBUG-NEXT:    call void @__captured_stmt.19(ptr [[DOTCOUNT_ADDR188]], ptr [[AGG_CAPTURED186]]), !dbg [[DBG148]]
// CHECK-DEBUG-NEXT:    [[DOTCOUNT189:%.*]] = load i32, ptr [[DOTCOUNT_ADDR188]], align 4, !dbg [[DBG148]]
// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER190:%.*]], !dbg [[DBG148]]
// CHECK-DEBUG:       omp_loop.preheader190:
// CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND204]], align 4, !dbg [[DBG148]]
// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1, !dbg [[DBG148]]
// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[P_UPPERBOUND205]], align 4, !dbg [[DBG148]]
// CHECK-DEBUG-NEXT:    store i32 1, ptr [[P_STRIDE206]], align 4, !dbg [[DBG148]]
// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM207:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB42:[0-9]+]]), !dbg [[DBG148]]
// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]], i32 34, ptr [[P_LASTITER203]], ptr [[P_LOWERBOUND204]], ptr [[P_UPPERBOUND205]], ptr [[P_STRIDE206]], i32 1, i32 0), !dbg [[DBG148]]
// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[P_LOWERBOUND204]], align 4, !dbg [[DBG148]]
// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[P_UPPERBOUND205]], align 4, !dbg [[DBG148]]
// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]], !dbg [[DBG148]]
// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = add i32 [[TMP6]], 1, !dbg [[DBG148]]
// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER191:%.*]], !dbg [[DBG148]]
// CHECK-DEBUG:       omp_loop.header191:
// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], [[OMP_LOOP_INC194:%.*]] ], !dbg [[DBG148]]
// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND192:%.*]], !dbg [[DBG148]]
// CHECK-DEBUG:       omp_loop.cond192:
// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 [[OMP_LOOP_IV197]], [[TMP7]], !dbg [[DBG148]]
// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP198]], label [[OMP_LOOP_BODY193:%.*]], label [[OMP_LOOP_EXIT195:%.*]], !dbg [[DBG148]]
// CHECK-DEBUG:       omp_loop.body193:
// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]], !dbg [[DBG150:![0-9]+]]
// CHECK-DEBUG-NEXT:    call void @__captured_stmt.20(ptr [[I185]], i32 [[TMP8]], ptr [[AGG_CAPTURED187]]), !dbg [[DBG148]]
// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG151:![0-9]+]]
// CHECK-DEBUG-NEXT:    [[CONV200:%.*]] = sitofp i32 [[TMP9]] to double, !dbg [[DBG151]]
// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load double, ptr [[B_ADDR]], align 8, !dbg [[DBG150]]
// CHECK-DEBUG-NEXT:    [[ADD201:%.*]] = fadd double [[CONV200]], [[TMP10]], !dbg [[DBG152:![0-9]+]]
// CHECK-DEBUG-NEXT:    [[CONV202:%.*]] = fptrunc double [[ADD201]] to float, !dbg [[DBG151]]
// CHECK-DEBUG-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[R_ADDR]], align 8, !dbg [[DBG153:![0-9]+]]
// CHECK-DEBUG-NEXT:    store float [[CONV202]], ptr [[TMP11]], align 4, !dbg [[DBG154:![0-9]+]]
// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC194]], !dbg [[DBG148]]
// CHECK-DEBUG:       omp_loop.inc194:
// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1, !dbg [[DBG148]]
// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER191]], !dbg [[DBG148]]
// CHECK-DEBUG:       omp_loop.exit195:
// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]]), !dbg [[DBG148]]
// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB42]]), !dbg [[DBG150]]
// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB43:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM208]]), !dbg [[DBG150]]
// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER196:%.*]], !dbg [[DBG148]]
// CHECK-DEBUG:       omp_loop.after196:
// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG155:![0-9]+]]
//
void parallel_for_2(float *r, int a, double b) {
#pragma omp parallel
  {
#pragma omp for
    for (int i = 0; i < 100; ++i)
      *r = a + b;
#pragma omp parallel
    {
#pragma omp for
      for (int i = 0; i < 100; ++i)
        *r = a + b;
#pragma omp parallel
      {
#pragma omp for
        for (int i = 0; i < 100; ++i)
          *r = a + b;
      }
#pragma omp for
      for (int i = 0; i < 100; ++i)
        *r = a + b;
#pragma omp parallel
      {
#pragma omp for
        for (int i = 0; i < 100; ++i)
          *r = a + b;
      }
#pragma omp for
      for (int i = 0; i < 100; ++i)
        *r = a + b;
    }
#pragma omp for
    for (int i = 0; i < 100; ++i)
      *r = a + b;
  }
#pragma omp for
  for (int i = 0; i < 100; ++i)
    *r = a + b;
}

#endif
