// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.


//
// #Overview
//
// GC automatically manages memory allocated by managed code.
// The design doc for GC can be found at docs/design/coreclr/botr/garbage-collection.md
//
// This file includes both the code for GC and the allocator. The most common
// case for a GC to be triggered is from the allocator code. See
// code:#try_allocate_more_space where it calls GarbageCollectGeneration.
//
// Entry points for the allocator are GCHeap::Alloc* which are called by the
// allocation helpers in gcscan.cpp
//

#include "common.h"
#include "gcenv.h"

#include "gc.h"
#include "gcscan.h"
#include "gcdesc.h"
#include "softwarewritewatch.h"
#include "handletable.h"
#include "handletable.inl"
#include "gcenv.inl"
#include "gceventstatus.h"
#include <minipal/memorybarrierprocesswide.h>

// If FEATURE_INTERPRETER is set, always enable the GC side of FEATURE_CONSERVATIVE_GC
#ifdef FEATURE_INTERPRETER
#ifndef FEATURE_CONSERVATIVE_GC
#define FEATURE_CONSERVATIVE_GC
#endif
#endif // FEATURE_INTERPRETER

#ifdef __INTELLISENSE__
#if defined(FEATURE_SVR_GC)

#define SERVER_GC 1

#else // defined(FEATURE_SVR_GC)

#ifdef SERVER_GC
#undef SERVER_GC
#endif

#endif // defined(FEATURE_SVR_GC)
#endif // __INTELLISENSE__

#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
#include "vxsort/do_vxsort.h"
#define USE_VXSORT
#else
#define USE_INTROSORT
#endif // TARGET_AMD64 || TARGET_ARM64
#include "introsort.h"

#ifdef SERVER_GC
namespace SVR {
#else // SERVER_GC
namespace WKS {
#endif // SERVER_GC

#include "gcimpl.h"
#include "gcpriv.h"

#ifdef DACCESS_COMPILE
#error this source file should not be compiled with DACCESS_COMPILE!
#endif //DACCESS_COMPILE

// We just needed a simple random number generator for testing.
class gc_rand
{
public:
    static uint64_t x;

    static uint64_t get_rand()
    {
        x = (314159269*x+278281) & 0x7FFFFFFF;
        return x;
    }

    // obtain random number in the range 0 .. r-1
    static uint64_t get_rand(uint64_t r)
    {
        // require r >= 0
        uint64_t x = (uint64_t)((get_rand() * r) >> 31);
        return x;
    }
};

uint64_t gc_rand::x = 0;

#if defined(BACKGROUND_GC) && defined(FEATURE_EVENT_TRACE)
BOOL bgc_heap_walk_for_etw_p = FALSE;
#endif //BACKGROUND_GC && FEATURE_EVENT_TRACE

#define MAX_PTR ((uint8_t*)(~(ptrdiff_t)0))
#define commit_min_th (16*OS_PAGE_SIZE)

#define MIN_SOH_CROSS_GEN_REFS (400)
#define MIN_LOH_CROSS_GEN_REFS (800)

#ifdef SERVER_GC
#define partial_size_th 100
#define num_partial_refs 64
#else //SERVER_GC
#define partial_size_th 100
#define num_partial_refs 32
#endif //SERVER_GC

#define demotion_plug_len_th (6*1024*1024)

#ifdef USE_REGIONS
// If the survived / region_size is 90+%, we don't compact this region.
#define sip_surv_ratio_th (90)
// If the survived due to cards from old generations / region_size is 90+%,
// we don't compact this region, also we immediately promote it to gen2.
#define sip_old_card_surv_ratio_th (90)
#endif //USE_REGIONS

#ifdef HOST_64BIT
#define MARK_STACK_INITIAL_LENGTH 1024
#else
#define MARK_STACK_INITIAL_LENGTH 128
#endif // HOST_64BIT

#define LOH_PIN_QUEUE_LENGTH 100
#define LOH_PIN_DECAY 10

#define UOH_ALLOCATION_RETRY_MAX_COUNT 2

#define MAX_YP_SPIN_COUNT_UNIT 32768

uint32_t yp_spin_count_unit = 0;
uint32_t original_spin_count_unit = 0;
size_t loh_size_threshold = LARGE_OBJECT_SIZE;

#ifdef GC_CONFIG_DRIVEN
int compact_ratio = 0;
#endif //GC_CONFIG_DRIVEN

#ifdef FEATURE_SVR_GC
bool g_built_with_svr_gc = true;
#else
bool g_built_with_svr_gc = false;
#endif // FEATURE_SVR_GC

#if defined(BUILDENV_DEBUG)
uint8_t g_build_variant = 0;
#elif defined(BUILDENV_CHECKED)
uint8_t g_build_variant = 1;
#else
uint8_t g_build_variant = 2;
#endif //BUILDENV_DEBUG

VOLATILE(int32_t) g_no_gc_lock = -1;

#ifdef TRACE_GC
const char * const allocation_state_str[] = {
    "start",
    "can_allocate",
    "cant_allocate",
    "retry_allocate",
    "try_fit",
    "try_fit_new_seg",
    "try_fit_after_cg",
    "try_fit_after_bgc",
    "try_free_full_seg_in_bgc",
    "try_free_after_bgc",
    "try_seg_end",
    "acquire_seg",
    "acquire_seg_after_cg",
    "acquire_seg_after_bgc",
    "check_and_wait_for_bgc",
    "trigger_full_compact_gc",
    "trigger_ephemeral_gc",
    "trigger_2nd_ephemeral_gc",
    "check_retry_seg"
};

const char * const msl_take_state_str[] = {
    "get_large_seg",
    "bgc_loh_sweep",
    "wait_bgc",
    "block_gc",
    "clr_mem",
    "clr_large_mem",
    "t_eph_gc",
    "t_full_gc",
    "alloc_small",
    "alloc_large",
    "alloc_small_cant",
    "alloc_large_cant",
    "try_alloc",
    "try_budget"
};
#endif //TRACE_GC


// Keep this in sync with the definition of gc_reason
#if (defined(DT_LOG) || defined(TRACE_GC))
static const char* const str_gc_reasons[] =
{
    "alloc_soh",
    "induced",
    "lowmem",
    "empty",
    "alloc_loh",
    "oos_soh",
    "oos_loh",
    "induced_noforce",
    "gcstress",
    "induced_lowmem",
    "induced_compacting",
    "lowmemory_host",
    "pm_full_gc",
    "lowmemory_host_blocking"
};

static const char* const str_gc_pause_modes[] =
{
    "batch",
    "interactive",
    "low_latency",
    "sustained_low_latency",
    "no_gc"
};

static const char* const str_root_kinds[] = {
    "Stack",
    "FinalizeQueue",
    "Handles",
    "OlderGen",
    "SizedRef",
    "Overflow",
    "DependentHandles",
    "NewFQ",
    "Steal",
    "BGC"
};
#endif //DT_LOG || TRACE_GC

inline
BOOL is_induced (gc_reason reason)
{
    return ((reason == reason_induced) ||
            (reason == reason_induced_noforce) ||
            (reason == reason_lowmemory) ||
            (reason == reason_lowmemory_blocking) ||
            (reason == reason_induced_compacting) ||
            (reason == reason_induced_aggressive) ||
            (reason == reason_lowmemory_host) ||
            (reason == reason_lowmemory_host_blocking));
}

inline
BOOL is_induced_blocking (gc_reason reason)
{
    return ((reason == reason_induced) ||
            (reason == reason_lowmemory_blocking) ||
            (reason == reason_induced_compacting) ||
            (reason == reason_induced_aggressive) ||
            (reason == reason_lowmemory_host_blocking));
}

gc_oh_num gen_to_oh(int gen)
{
    switch (gen)
    {
        case soh_gen0:
            return gc_oh_num::soh;
        case soh_gen1:
            return gc_oh_num::soh;
        case soh_gen2:
            return gc_oh_num::soh;
        case loh_generation:
            return gc_oh_num::loh;
        case poh_generation:
            return gc_oh_num::poh;
        default:
            assert(false);
            return gc_oh_num::unknown;
    }
}

uint64_t qpf;
double qpf_ms;
double qpf_us;

uint64_t RawGetHighPrecisionTimeStamp()
{
    return (uint64_t)GCToOSInterface::QueryPerformanceCounter();
}

#ifdef BGC_SERVO_TUNING
bool gc_heap::bgc_tuning::enable_fl_tuning = false;
uint32_t gc_heap::bgc_tuning::memory_load_goal = 0;
uint32_t gc_heap::bgc_tuning::memory_load_goal_slack = 0;
uint64_t gc_heap::bgc_tuning::available_memory_goal = 0;
bool gc_heap::bgc_tuning::panic_activated_p = false;
double gc_heap::bgc_tuning::accu_error_panic = 0.0;
double gc_heap::bgc_tuning::above_goal_kp = 0.0;
double gc_heap::bgc_tuning::above_goal_ki = 0.0;
bool gc_heap::bgc_tuning::enable_kd = false;
bool gc_heap::bgc_tuning::enable_ki = false;
bool gc_heap::bgc_tuning::enable_smooth = false;
bool gc_heap::bgc_tuning::enable_tbh = false;
bool gc_heap::bgc_tuning::enable_ff = false;
bool gc_heap::bgc_tuning::enable_gradual_d = false;
double gc_heap::bgc_tuning::above_goal_kd = 0.0;
double gc_heap::bgc_tuning::above_goal_ff = 0.0;
double gc_heap::bgc_tuning::num_gen1s_smooth_factor = 0.0;
double gc_heap::bgc_tuning::ml_kp = 0.0;
double gc_heap::bgc_tuning::ml_ki = 0.0;
double gc_heap::bgc_tuning::accu_error = 0.0;

bool gc_heap::bgc_tuning::fl_tuning_triggered = false;

size_t gc_heap::bgc_tuning::num_bgcs_since_tuning_trigger = 0;

bool gc_heap::bgc_tuning::next_bgc_p = false;

size_t gc_heap::bgc_tuning::gen1_index_last_bgc_end;
size_t gc_heap::bgc_tuning::gen1_index_last_bgc_start;
size_t gc_heap::bgc_tuning::gen1_index_last_bgc_sweep;
size_t gc_heap::bgc_tuning::actual_num_gen1s_to_trigger;

gc_heap::bgc_tuning::tuning_calculation gc_heap::bgc_tuning::gen_calc[2];
gc_heap::bgc_tuning::tuning_stats gc_heap::bgc_tuning::gen_stats[2];
gc_heap::bgc_tuning::bgc_size_data gc_heap::bgc_tuning::current_bgc_end_data[2];

size_t gc_heap::bgc_tuning::last_stepping_bgc_count = 0;
uint32_t gc_heap::bgc_tuning::last_stepping_mem_load = 0;
uint32_t gc_heap::bgc_tuning::stepping_interval = 0;
bool gc_heap::bgc_tuning::use_stepping_trigger_p = true;
double gc_heap::bgc_tuning::gen2_ratio_correction = 0.0;
double gc_heap::bgc_tuning::ratio_correction_step = 0.0;

int gc_heap::saved_bgc_tuning_reason = -1;
#endif //BGC_SERVO_TUNING

inline
size_t round_up_power2 (size_t size)
{
    // Get the 0-based index of the most-significant bit in size-1.
    // If the call failed (because size-1 is zero), size must be 1,
    // so return 1 (because 1 rounds up to itself).
    DWORD highest_set_bit_index;
    if (0 ==
#ifdef HOST_64BIT
        BitScanReverse64(
#else
        BitScanReverse(
#endif
            &highest_set_bit_index, size - 1)) { return 1; }

    // The size == 0 case (which would have overflowed to SIZE_MAX when decremented)
    // is handled below by relying on the fact that highest_set_bit_index is the maximum value
    // (31 or 63, depending on sizeof(size_t)) and left-shifting a value >= 2 by that
    // number of bits shifts in zeros from the right, resulting in an output of zero.
    return static_cast<size_t>(2) << highest_set_bit_index;
}

inline
size_t round_down_power2 (size_t size)
{
    // Get the 0-based index of the most-significant bit in size.
    // If the call failed, size must be zero so return zero.
    DWORD highest_set_bit_index;
    if (0 ==
#ifdef HOST_64BIT
        BitScanReverse64(
#else
        BitScanReverse(
#endif
            &highest_set_bit_index, size)) { return 0; }

    // Left-shift 1 by highest_set_bit_index to get back a value containing only
    // the most-significant set bit of size, i.e. size rounded down
    // to the next power-of-two value.
    return static_cast<size_t>(1) << highest_set_bit_index;
}

// Get the 0-based index of the most-significant bit in the value.
// Returns -1 if the input value is zero (i.e. has no set bits).
inline
int index_of_highest_set_bit (size_t value)
{
    // Get the 0-based index of the most-significant bit in the value.
    // If the call failed (because value is zero), return -1.
    DWORD highest_set_bit_index;
    return (0 ==
#ifdef HOST_64BIT
        BitScanReverse64(
#else
        BitScanReverse(
#endif
            &highest_set_bit_index, value)) ? -1 : static_cast<int>(highest_set_bit_index);
}

inline
int relative_index_power2_plug (size_t power2)
{
    int index = index_of_highest_set_bit (power2);
    assert (index <= MAX_INDEX_POWER2);

    return ((index < MIN_INDEX_POWER2) ? 0 : (index - MIN_INDEX_POWER2));
}

inline
int relative_index_power2_free_space (size_t power2)
{
    int index = index_of_highest_set_bit (power2);
    assert (index <= MAX_INDEX_POWER2);

    return ((index < MIN_INDEX_POWER2) ? -1 : (index - MIN_INDEX_POWER2));
}

inline
float mb (size_t num)
{
    return (float)((float)num / 1000.0 / 1000.0);
}

inline
size_t gib (size_t num)
{
    return (num / 1024 / 1024 / 1024);
}

#ifdef BACKGROUND_GC
uint32_t bgc_alloc_spin_count = 140;
uint32_t bgc_alloc_spin = 2;

// The following 2 ratios dictate how UOH allocations that happen during a BGC should be handled. Because
// UOH is not collected till the very end of a BGC, by default we don't want to allow UOH to grow too large
// during a BGC. So if we only increase the size by 10%, we will allow to allocate normally. But if it's
// too much (ie, > bgc_uoh_inc_ratio_alloc_wait), we will make the allocation wait till the BGC is done.
//
// This means threads that allocate heavily on UOH may be paused during a BGC. If you're willing to accept
// larger UOH sizes in exchange for fewer pauses, you can use the UOHWaitBGCSizeIncPercent config to increase
// the wait ratio. Likewise, set it to use a smaller ratio if you observe that UOH grows too large during
// BGCs.
float bgc_uoh_inc_ratio_alloc_normal = 0.1f;
// This ratio is 2x for regions because regions could start with a much smaller size since a lot of
// memory could be in the free pool.
#ifdef USE_REGIONS
float bgc_uoh_inc_ratio_alloc_wait = 2.0f;
#else
float bgc_uoh_inc_ratio_alloc_wait = 1.0f;
#endif //USE_REGIONS

inline
void c_write (uint32_t& place, uint32_t value)
{
    Interlocked::Exchange (&place, value);
}

// If every heap's gen2 or gen3 size is less than this threshold we will do a blocking GC.
const size_t bgc_min_per_heap = 4*1024*1024;

int gc_heap::gchist_index = 0;
gc_mechanisms_store gc_heap::gchist[max_history_count];

#ifndef MULTIPLE_HEAPS
VOLATILE(bgc_state) gc_heap::current_bgc_state = bgc_not_in_process;
int gc_heap::gchist_index_per_heap = 0;
gc_heap::gc_history gc_heap::gchist_per_heap[max_history_count];
#endif //MULTIPLE_HEAPS
#endif //BACKGROUND_GC

void gc_heap::add_to_history_per_heap()
{
#if defined(GC_HISTORY) && defined(BACKGROUND_GC)
    gc_history* current_hist = &gchist_per_heap[gchist_index_per_heap];
    current_hist->gc_index = settings.gc_index;
    current_hist->current_bgc_state = current_bgc_state;
    size_t elapsed = dd_gc_elapsed_time (dynamic_data_of (0));
    current_hist->gc_time_ms = (uint32_t)(elapsed / 1000);
    current_hist->gc_efficiency = (elapsed ? (total_promoted_bytes / elapsed) : total_promoted_bytes);
#ifndef USE_REGIONS
    current_hist->eph_low = generation_allocation_start (generation_of (max_generation - 1));
    current_hist->gen0_start = generation_allocation_start (generation_of (0));
    current_hist->eph_high = heap_segment_allocated (ephemeral_heap_segment);
#endif //!USE_REGIONS
#ifdef BACKGROUND_GC
    current_hist->bgc_lowest = background_saved_lowest_address;
    current_hist->bgc_highest = background_saved_highest_address;
#endif //BACKGROUND_GC
    current_hist->fgc_lowest = lowest_address;
    current_hist->fgc_highest = highest_address;
    current_hist->g_lowest = g_gc_lowest_address;
    current_hist->g_highest = g_gc_highest_address;

    gchist_index_per_heap++;
    if (gchist_index_per_heap == max_history_count)
    {
        gchist_index_per_heap = 0;
    }
#endif //GC_HISTORY && BACKGROUND_GC
}

void gc_heap::add_to_history()
{
#if defined(GC_HISTORY) && defined(BACKGROUND_GC)
    gc_mechanisms_store* current_settings = &gchist[gchist_index];
    current_settings->store (&settings);

    gchist_index++;
    if (gchist_index == max_history_count)
    {
        gchist_index = 0;
    }
#endif //GC_HISTORY && BACKGROUND_GC
}

#ifdef GC_CONFIG_DRIVEN

BOOL   gc_config_log_on = FALSE;
FILE* gc_config_log = NULL;

// we keep this much in a buffer and only flush when the buffer is full
#define gc_config_log_buffer_size (1*1024) // TEMP
uint8_t* gc_config_log_buffer = 0;
size_t gc_config_log_buffer_offset = 0;

// For config since we log so little we keep the whole history. Also it's only
// ever logged by one thread so no need to synchronize.
void log_va_msg_config(const char *fmt, va_list args)
{
    const int BUFFERSIZE = 256;
    static char rgchBuffer[BUFFERSIZE];
    char *  pBuffer  = &rgchBuffer[0];

    pBuffer[0] = '\n';
    int buffer_start = 1;
    int msg_len = _vsnprintf_s (&pBuffer[buffer_start], BUFFERSIZE - buffer_start, _TRUNCATE, fmt, args );
    assert (msg_len != -1);
    msg_len += buffer_start;

    if ((gc_config_log_buffer_offset + msg_len) > gc_config_log_buffer_size)
    {
        fwrite(gc_config_log_buffer, gc_config_log_buffer_offset, 1, gc_config_log);
        fflush(gc_config_log);
        gc_config_log_buffer_offset = 0;
    }

    memcpy (gc_config_log_buffer + gc_config_log_buffer_offset, pBuffer, msg_len);
    gc_config_log_buffer_offset += msg_len;
}

void GCLogConfig (const char *fmt, ... )
{
    if (gc_config_log_on && (gc_config_log != NULL))
    {
        va_list     args;
        va_start( args, fmt );
        log_va_msg_config (fmt, args);
    }
}
#endif // GC_CONFIG_DRIVEN

void GCHeap::Shutdown()
{
    // This does not work for standalone GC on Windows because windows closed the file
    // handle in DllMain for the standalone GC before we get here.
#if defined(TRACE_GC) && defined(SIMPLE_DPRINTF) && !defined(BUILD_AS_STANDALONE)
    flush_gc_log (true);
#endif //TRACE_GC && SIMPLE_DPRINTF && !BUILD_AS_STANDALONE
}

#ifdef SYNCHRONIZATION_STATS
// Number of GCs have we done since we last logged.
static unsigned int         gc_count_during_log;
 // In ms. This is how often we print out stats.
static const unsigned int   log_interval = 5000;
// Time (in ms) when we start a new log interval.
static uint64_t             log_start_tick;
static unsigned int         gc_lock_contended;
static int64_t              log_start_hires;
// Cycles accumulated in SuspendEE during log_interval.
static uint64_t             suspend_ee_during_log;
// Cycles accumulated in RestartEE during log_interval.
static uint64_t             restart_ee_during_log;
static uint64_t             gc_during_log;
#endif //SYNCHRONIZATION_STATS

void
init_sync_log_stats()
{
#ifdef SYNCHRONIZATION_STATS
    if (gc_count_during_log == 0)
    {
        gc_heap::init_sync_stats();
        suspend_ee_during_log = 0;
        restart_ee_during_log = 0;
        gc_during_log = 0;
        gc_lock_contended = 0;

        log_start_tick = GCToOSInterface::GetLowPrecisionTimeStamp();
        log_start_hires = GCToOSInterface::QueryPerformanceCounter();
    }
    gc_count_during_log++;
#endif //SYNCHRONIZATION_STATS
}

void
process_sync_log_stats()
{
#ifdef SYNCHRONIZATION_STATS

    uint64_t log_elapsed = GCToOSInterface::GetLowPrecisionTimeStamp() - log_start_tick;

    if (log_elapsed > log_interval)
    {
        uint64_t total = GCToOSInterface::QueryPerformanceCounter() - log_start_hires;
        // Print out the cycles we spent on average in each suspend and restart.
        printf("\n_________________________________________________________________________________\n"
            "Past %d(s): #%3d GCs; Total gc_lock contended: %8u; GC: %12u\n"
            "SuspendEE: %8u; RestartEE: %8u GC %.3f%%\n",
            log_interval / 1000,
            gc_count_during_log,
            gc_lock_contended,
            (unsigned int)(gc_during_log / gc_count_during_log),
            (unsigned int)(suspend_ee_during_log / gc_count_during_log),
            (unsigned int)(restart_ee_during_log / gc_count_during_log),
            (double)(100.0f * gc_during_log / total));
        gc_heap::print_sync_stats(gc_count_during_log);

        gc_count_during_log = 0;
    }
#endif //SYNCHRONIZATION_STATS
}

#ifdef MULTIPLE_HEAPS
uint32_t g_num_active_processors = 0;

// Note that when a join is no longer used we still keep the values here because
// tooling already recognized them as having the meaning they were assigned originally.
// It doesn't break tooling if we stop using them but does if we assign a new meaning
// to them.
enum gc_join_stage
{
    gc_join_init_cpu_mapping = 0,
    gc_join_done = 1,
    gc_join_generation_determined = 2,
    gc_join_begin_mark_phase = 3,
    gc_join_scan_dependent_handles = 4,
    gc_join_rescan_dependent_handles = 5,
    gc_join_scan_sizedref_done = 6,
    gc_join_null_dead_short_weak = 7,
    gc_join_scan_finalization = 8,
    gc_join_null_dead_long_weak = 9,
    gc_join_null_dead_syncblk = 10,
    gc_join_decide_on_compaction = 11,
    gc_join_rearrange_segs_compaction = 12,
    gc_join_adjust_handle_age_compact = 13,
    gc_join_adjust_handle_age_sweep = 14,
    gc_join_begin_relocate_phase = 15,
    gc_join_relocate_phase_done = 16,
    gc_join_verify_objects_done = 17,
    gc_join_start_bgc = 18,
    gc_join_restart_ee = 19,
    gc_join_concurrent_overflow = 20,
    gc_join_suspend_ee = 21,
    gc_join_bgc_after_ephemeral = 22,
    gc_join_allow_fgc = 23,
    gc_join_bgc_sweep = 24,
    gc_join_suspend_ee_verify = 25,
    gc_join_restart_ee_verify = 26,
    gc_join_set_state_free = 27,
    gc_r_join_update_card_bundle = 28,
    gc_join_after_absorb = 29,
    gc_join_verify_copy_table = 30,
    gc_join_after_reset = 31,
    gc_join_after_ephemeral_sweep = 32,
    gc_join_after_profiler_heap_walk = 33,
    gc_join_minimal_gc = 34,
    gc_join_after_commit_soh_no_gc = 35,
    gc_join_expand_loh_no_gc = 36,
    gc_join_final_no_gc = 37,
    // No longer in use but do not remove, see comments for this enum.
    gc_join_disable_software_write_watch = 38,
    gc_join_merge_temp_fl = 39,
    gc_join_bridge_processing = 40,
    gc_join_max = 41
};

enum gc_join_flavor
{
    join_flavor_server_gc = 0,
    join_flavor_bgc = 1
};

#define first_thread_arrived 2
#pragma warning(push)
#pragma warning(disable:4324) // don't complain if DECLSPEC_ALIGN actually pads
struct DECLSPEC_ALIGN(HS_CACHE_LINE_SIZE) join_structure
{
    // Shared non volatile keep on separate line to prevent eviction
    int n_threads;

    // Keep polling/wait structures on separate line write once per join
    DECLSPEC_ALIGN(HS_CACHE_LINE_SIZE)
    GCEvent joined_event[3]; // the last event in the array is only used for first_thread_arrived.
    Volatile<int> lock_color;
    VOLATILE(BOOL) wait_done;
    VOLATILE(BOOL) joined_p;

    // Keep volatile counted locks on separate cache line write many per join
    DECLSPEC_ALIGN(HS_CACHE_LINE_SIZE)
    VOLATILE(int) join_lock;
    VOLATILE(int) r_join_lock;

};
#pragma warning(pop)

enum join_type
{
    type_last_join = 0,
    type_join = 1,
    type_restart = 2,
    type_first_r_join = 3,
    type_r_join = 4
};

enum join_time
{
    time_start = 0,
    time_end = 1
};

enum join_heap_index
{
    join_heap_restart = 100,
    join_heap_r_restart = 200
};

class t_join
{
    join_structure join_struct;

    int id;
    gc_join_flavor flavor;

#ifdef JOIN_STATS
    uint64_t start[MAX_SUPPORTED_CPUS], end[MAX_SUPPORTED_CPUS], start_seq;
    // remember join id and last thread to arrive so restart can use these
    int thd;
    // we want to print statistics every 10 seconds - this is to remember the start of the 10 sec interval
    uint64_t start_tick;
    // counters for joins, in 1000's of clock cycles
    uint64_t elapsed_total[gc_join_max], wake_total[gc_join_max], seq_loss_total[gc_join_max], par_loss_total[gc_join_max], in_join_total[gc_join_max];
#endif //JOIN_STATS

public:
    BOOL init (int n_th, gc_join_flavor f)
    {
        dprintf (JOIN_LOG, ("Initializing join structure"));
        join_struct.n_threads = n_th;
        join_struct.lock_color = 0;
        for (int i = 0; i < 3; i++)
        {
            if (!join_struct.joined_event[i].IsValid())
            {
                join_struct.joined_p = FALSE;
                dprintf (JOIN_LOG, ("Creating join event %d", i));
                // TODO - changing this to a non OS event
                // because this is also used by BGC threads which are
                // managed threads and WaitEx does not allow you to wait
                // for an OS event on a managed thread.
                // But we are not sure if this plays well in the hosting
                // environment.
                //join_struct.joined_event[i].CreateOSManualEventNoThrow(FALSE);
                if (!join_struct.joined_event[i].CreateManualEventNoThrow(FALSE))
                    return FALSE;
            }
        }
        join_struct.join_lock = join_struct.n_threads;
        join_struct.r_join_lock = join_struct.n_threads;
        join_struct.wait_done = FALSE;
        flavor = f;

#ifdef JOIN_STATS
        start_tick = GCToOSInterface::GetLowPrecisionTimeStamp();
#endif //JOIN_STATS

        return TRUE;
    }

    void update_n_threads(int n_th)
    {
        join_struct.n_threads = n_th;
        join_struct.join_lock = n_th;
        join_struct.r_join_lock = n_th;
    }

    int get_num_threads()
    {
        return join_struct.n_threads;
    }

    // This is for instrumentation only.
    int get_join_lock()
    {
        return VolatileLoadWithoutBarrier (&join_struct.join_lock);
    }

    void destroy ()
    {
        dprintf (JOIN_LOG, ("Destroying join structure"));
        for (int i = 0; i < 3; i++)
        {
            if (join_struct.joined_event[i].IsValid())
                join_struct.joined_event[i].CloseEvent();
        }
    }

    inline void fire_event (int heap, join_time time, join_type type, int join_id)
    {
        FIRE_EVENT(GCJoin_V2, heap, time, type, join_id);
    }

    void join (gc_heap* gch, int join_id)
    {
#ifdef JOIN_STATS
        // parallel execution ends here
        end[gch->heap_number] = get_ts();
#endif //JOIN_STATS

        assert (!join_struct.joined_p);
        int color = join_struct.lock_color.LoadWithoutBarrier();

        if (Interlocked::Decrement(&join_struct.join_lock) != 0)
        {
            dprintf (JOIN_LOG, ("join%d(%d): Join() Waiting...join_lock is now %d",
                flavor, join_id, (int32_t)(join_struct.join_lock)));

            fire_event (gch->heap_number, time_start, type_join, join_id);

            //busy wait around the color
            if (color == join_struct.lock_color.LoadWithoutBarrier())
            {
respin:
                int spin_count = 128 * yp_spin_count_unit;
                for (int j = 0; j < spin_count; j++)
                {
                    if (color != join_struct.lock_color.LoadWithoutBarrier())
                    {
                        break;
                    }
                    YieldProcessor();           // indicate to the processor that we are spinning
                }

                // we've spun, and if color still hasn't changed, fall into hard wait
                if (color == join_struct.lock_color.LoadWithoutBarrier())
                {
                    dprintf (JOIN_LOG, ("join%d(%d): Join() hard wait on reset event %d, join_lock is now %d",
                        flavor, join_id, color, (int32_t)(join_struct.join_lock)));

                    uint32_t dwJoinWait = join_struct.joined_event[color].Wait(INFINITE, FALSE);

                    if (dwJoinWait != WAIT_OBJECT_0)
                    {
                        STRESS_LOG1 (LF_GC, LL_FATALERROR, "joined event wait failed with code: %zx", dwJoinWait);
                        FATAL_GC_ERROR ();
                    }
                }

                // avoid race due to the thread about to reset the event (occasionally) being preempted before ResetEvent()
                if (color == join_struct.lock_color.LoadWithoutBarrier())
                {
                    dprintf (9999, ("---h%d %d j%d %d - respin!!! (c:%d-%d)",
                        gch->heap_number, join_id, join_struct.n_threads, color, join_struct.lock_color.LoadWithoutBarrier()));
                    goto respin;
                }

                dprintf (JOIN_LOG, ("join%d(%d): Join() done, join_lock is %d",
                    flavor, join_id, (int32_t)(join_struct.join_lock)));
            }

            fire_event (gch->heap_number, time_end, type_join, join_id);

#ifdef JOIN_STATS
            // parallel execution starts here
            start[gch->heap_number] = get_ts();
            Interlocked::ExchangeAdd(&in_join_total[join_id], (start[gch->heap_number] - end[gch->heap_number]));
#endif //JOIN_STATS
        }
        else
        {
            fire_event (gch->heap_number, time_start, type_last_join, join_id);

            join_struct.joined_p = TRUE;
            dprintf (JOIN_LOG, ("join%d(%d): Last thread to complete the join, setting id", flavor, join_id));
            join_struct.joined_event[!color].Reset();
            id = join_id;
#ifdef JOIN_STATS
            // remember the join id, the last thread arriving, the start of the sequential phase,
            // and keep track of the cycles spent waiting in the join
            thd = gch->heap_number;
            start_seq = get_ts();
            Interlocked::ExchangeAdd(&in_join_total[join_id], (start_seq - end[gch->heap_number]));
#endif //JOIN_STATS
        }
    }

    // Reverse join - first thread gets here does the work; other threads will only proceed
    // after the work is done.
    // Note that you cannot call this twice in a row on the same thread. Plus there's no
    // need to call it twice in row - you should just merge the work.
    BOOL r_join (gc_heap* gch, int join_id)
    {

        if (join_struct.n_threads == 1)
        {
            return TRUE;
        }

        if (Interlocked::CompareExchange(&join_struct.r_join_lock, 0, join_struct.n_threads) == 0)
        {
            fire_event (gch->heap_number, time_start, type_join, join_id);

            dprintf (JOIN_LOG, ("r_join() Waiting..."));

            //busy wait around the color
respin:
            int spin_count = 256 * yp_spin_count_unit;
            for (int j = 0; j < spin_count; j++)
            {
                if (join_struct.wait_done)
                {
                    break;
                }
                YieldProcessor();           // indicate to the processor that we are spinning
            }

            // we've spun, and if color still hasn't changed, fall into hard wait
            if (!join_struct.wait_done)
            {
                dprintf (JOIN_LOG, ("Join() hard wait on reset event %d", first_thread_arrived));
                uint32_t dwJoinWait = join_struct.joined_event[first_thread_arrived].Wait(INFINITE, FALSE);
                if (dwJoinWait != WAIT_OBJECT_0)
                {
                    STRESS_LOG1 (LF_GC, LL_FATALERROR, "joined event wait failed with code: %zx", dwJoinWait);
                    FATAL_GC_ERROR ();
                }
            }

            // avoid race due to the thread about to reset the event (occasionally) being preempted before ResetEvent()
            if (!join_struct.wait_done)
            {
                goto respin;
            }

            dprintf (JOIN_LOG, ("r_join() done"));

            fire_event (gch->heap_number, time_end, type_join, join_id);

            return FALSE;
        }
        else
        {
            fire_event (gch->heap_number, time_start, type_first_r_join, join_id);
            return TRUE;
        }
    }

#ifdef JOIN_STATS
    uint64_t get_ts()
    {
        return GCToOSInterface::QueryPerformanceCounter();
    }

    void start_ts (gc_heap* gch)
    {
        // parallel execution ends here
        start[gch->heap_number] = get_ts();
    }
#endif //JOIN_STATS

    void restart()
    {
#ifdef JOIN_STATS
        uint64_t elapsed_seq = get_ts() - start_seq;
        uint64_t max = 0, sum = 0, wake = 0;
        uint64_t min_ts = start[0];
        for (int i = 1; i < join_struct.n_threads; i++)
        {
            if(min_ts > start[i]) min_ts = start[i];
        }

        for (int i = 0; i < join_struct.n_threads; i++)
        {
            uint64_t wake_delay = start[i] - min_ts;
            uint64_t elapsed = end[i] - start[i];
            if (max < elapsed)
                max = elapsed;
            sum += elapsed;
            wake += wake_delay;
        }
        uint64_t seq_loss = (join_struct.n_threads - 1)*elapsed_seq;
        uint64_t par_loss = join_struct.n_threads*max - sum;
        double efficiency = 0.0;
        if (max > 0)
            efficiency = sum*100.0/(join_struct.n_threads*max);

        const double ts_scale = 1e-6;

        // enable this printf to get statistics on each individual join as it occurs
        //printf("join #%3d  seq_loss = %5g   par_loss = %5g  efficiency = %3.0f%%\n", join_id, ts_scale*seq_loss, ts_scale*par_loss, efficiency);

        elapsed_total[id] += sum;
        wake_total[id] += wake;
        seq_loss_total[id] += seq_loss;
        par_loss_total[id] += par_loss;

        // every 10 seconds, print a summary of the time spent in each type of join
        if (GCToOSInterface::GetLowPrecisionTimeStamp() - start_tick > 10*1000)
        {
            printf("**** summary *****\n");
            for (int i = 0; i < 16; i++)
            {
                printf("join #%3d  elapsed_total = %8g wake_loss = %8g seq_loss = %8g  par_loss = %8g  in_join_total = %8g\n",
                   i,
                   ts_scale*elapsed_total[i],
                   ts_scale*wake_total[i],
                   ts_scale*seq_loss_total[i],
                   ts_scale*par_loss_total[i],
                   ts_scale*in_join_total[i]);
                elapsed_total[i] = wake_total[i] = seq_loss_total[i] = par_loss_total[i] = in_join_total[i] = 0;
            }
            start_tick = GCToOSInterface::GetLowPrecisionTimeStamp();
        }
#endif //JOIN_STATS

        fire_event (join_heap_restart, time_start, type_restart, -1);
        assert (join_struct.joined_p);
        join_struct.joined_p = FALSE;
        join_struct.join_lock = join_struct.n_threads;
        dprintf (JOIN_LOG, ("join%d(%d): Restarting from join: join_lock is %d", flavor, id, (int32_t)(join_struct.join_lock)));
        int color = join_struct.lock_color.LoadWithoutBarrier();
        join_struct.lock_color = !color;
        join_struct.joined_event[color].Set();

        fire_event (join_heap_restart, time_end, type_restart, -1);

#ifdef JOIN_STATS
        start[thd] = get_ts();
#endif //JOIN_STATS
    }

    BOOL joined()
    {
        dprintf (JOIN_LOG, ("join%d(%d): joined, join_lock is %d", flavor, id, (int32_t)(join_struct.join_lock)));
        return join_struct.joined_p;
    }

    void r_restart()
    {
        if (join_struct.n_threads != 1)
        {
            fire_event (join_heap_r_restart, time_start, type_restart, -1);
            join_struct.wait_done = TRUE;
            join_struct.joined_event[first_thread_arrived].Set();
            fire_event (join_heap_r_restart, time_end, type_restart, -1);
        }
    }

    void r_init()
    {
        if (join_struct.n_threads != 1)
        {
            join_struct.r_join_lock = join_struct.n_threads;
            join_struct.wait_done = FALSE;
            join_struct.joined_event[first_thread_arrived].Reset();
        }
    }
};

t_join gc_t_join;

#ifdef BACKGROUND_GC
t_join bgc_t_join;
#endif //BACKGROUND_GC

#endif //MULTIPLE_HEAPS

#define spin_and_switch(count_to_spin, expr) \
{ \
    for (int j = 0; j < count_to_spin; j++) \
    { \
        if (expr) \
        { \
            break;\
        } \
        YieldProcessor(); \
    } \
    if (!(expr)) \
    { \
        GCToOSInterface::YieldThread(0); \
    } \
}

#define spin_and_wait(count_to_spin, expr) \
{ \
    while (!expr) \
    { \
        for (int j = 0; j < count_to_spin; j++) \
        { \
            if (expr) \
            { \
                break; \
            } \
                YieldProcessor (); \
        } \
        if (!(expr)) \
        { \
            GCToOSInterface::YieldThread (0); \
        } \
    } \
}

#ifdef BACKGROUND_GC

#define max_pending_allocs 64

class exclusive_sync
{
    VOLATILE(uint8_t*) rwp_object;
    VOLATILE(int32_t) needs_checking;

    int spin_count;

    uint8_t cache_separator[HS_CACHE_LINE_SIZE - (sizeof (spin_count) + sizeof (needs_checking) + sizeof (rwp_object))];

    // TODO - perhaps each object should be on its own cache line...
    VOLATILE(uint8_t*) alloc_objects[max_pending_allocs];

    int find_free_index ()
    {
        for (int i = 0; i < max_pending_allocs; i++)
        {
            if (alloc_objects [i] == (uint8_t*)0)
            {
                return i;
            }
        }

        return -1;
    }

public:
    void init()
    {
        spin_count = 32 * (g_num_processors - 1);
        rwp_object = 0;
        needs_checking = 0;
        for (int i = 0; i < max_pending_allocs; i++)
        {
            alloc_objects [i] = (uint8_t*)0;
        }
    }

    void check()
    {
        for (int i = 0; i < max_pending_allocs; i++)
        {
            if (alloc_objects [i] != (uint8_t*)0)
            {
                FATAL_GC_ERROR();
            }
        }
    }

    void bgc_mark_set (uint8_t* obj)
    {
        dprintf (3, ("cm: probing %p", obj));
retry:
        if (Interlocked::CompareExchange(&needs_checking, 1, 0) == 0)
        {
            // If we spend too much time spending all the allocs,
            // consider adding a high water mark and scan up
            // to that; we'll need to interlock in done when
            // we update the high watermark.
            for (int i = 0; i < max_pending_allocs; i++)
            {
                if (obj == alloc_objects[i])
                {
                    needs_checking = 0;
                    dprintf (3, ("cm: will spin"));
                    spin_and_switch (spin_count, (obj != alloc_objects[i]));
                    goto retry;
                }
            }

            rwp_object = obj;
            needs_checking = 0;
            dprintf (3, ("cm: set %p", obj));
            return;
        }
        else
        {
            spin_and_switch (spin_count, (needs_checking == 0));
            goto retry;
        }
    }

    int uoh_alloc_set (uint8_t* obj)
    {
        if (!gc_heap::cm_in_progress)
        {
            return -1;
        }

retry:
        dprintf (3, ("uoh alloc: probing %p", obj));

        if (Interlocked::CompareExchange(&needs_checking, 1, 0) == 0)
        {
            if (obj == rwp_object)
            {
                needs_checking = 0;
                spin_and_switch (spin_count, (obj != rwp_object));
                goto retry;
            }
            else
            {
                int cookie = find_free_index();

                if (cookie != -1)
                {
                    alloc_objects[cookie] = obj;
                    needs_checking = 0;
                    //if (cookie >= 4)
                    //{
                    //    GCToOSInterface::DebugBreak();
                    //}

                    dprintf (3, ("uoh alloc: set %p at %d", obj, cookie));
                    return cookie;
                }
                else
                {
                    needs_checking = 0;
                    dprintf (3, ("uoh alloc: setting %p will spin to acquire a free index", obj));
                    spin_and_switch (spin_count, (find_free_index () != -1));
                    goto retry;
                }
            }
        }
        else
        {
            dprintf (3, ("uoh alloc: will spin on checking %p", obj));
            spin_and_switch (spin_count, (needs_checking == 0));
            goto retry;
        }
    }

    void bgc_mark_done ()
    {
        dprintf (3, ("cm: release lock on %p", (uint8_t *)rwp_object));
        rwp_object = 0;
    }

    void uoh_alloc_done_with_index (int index)
    {
        dprintf (3, ("uoh alloc: release lock on %p based on %d", (uint8_t *)alloc_objects[index], index));
        assert ((index >= 0) && (index < max_pending_allocs));
        alloc_objects[index] = (uint8_t*)0;
    }

    void uoh_alloc_done (uint8_t* obj)
    {
        if (!gc_heap::cm_in_progress)
        {
            return;
        }

        for (int i = 0; i < max_pending_allocs; i++)
        {
            if (alloc_objects [i] == obj)
            {
                uoh_alloc_done_with_index(i);
                return;
            }
        }
        dprintf (3, ("uoh alloc: could not release lock on %p", obj));
    }
};

#endif //BACKGROUND_GC

void reset_memory (uint8_t* o, size_t sizeo);

#ifdef WRITE_WATCH

#ifndef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
static bool virtual_alloc_hardware_write_watch = false;
#endif // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP

static bool hardware_write_watch_capability = false;

void hardware_write_watch_api_supported()
{
    if (GCToOSInterface::SupportsWriteWatch())
    {
        hardware_write_watch_capability = true;
        dprintf (2, ("WriteWatch supported"));
    }
    else
    {
        dprintf (2,("WriteWatch not supported"));
    }
}

inline bool can_use_hardware_write_watch()
{
    return hardware_write_watch_capability;
}

inline bool can_use_write_watch_for_gc_heap()
{
#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
    return true;
#else // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
    return can_use_hardware_write_watch();
#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
}

inline bool can_use_write_watch_for_card_table()
{
#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
    return true;
#else
    return can_use_hardware_write_watch();
#endif
}

#else //WRITE_WATCH
#define mem_reserve (MEM_RESERVE)
#endif //WRITE_WATCH

void WaitLongerNoInstru (int i)
{
    // every 8th attempt:
    bool bToggleGC = GCToEEInterface::EnablePreemptiveGC();

    // if we're waiting for gc to finish, we should block immediately
    if (g_fSuspensionPending == 0)
    {
        if  (g_num_processors > 1)
        {
            YieldProcessor();           // indicate to the processor that we are spinning
            if  (i & 0x01f)
                GCToOSInterface::YieldThread (0);
            else
                GCToOSInterface::Sleep (5);
        }
        else
            GCToOSInterface::Sleep (5);
    }

    // If CLR is hosted, a thread may reach here while it is in preemptive GC mode,
    // or it has no Thread object, in order to force a task to yield, or to triger a GC.
    // It is important that the thread is going to wait for GC.  Otherwise the thread
    // is in a tight loop.  If the thread has high priority, the perf is going to be very BAD.
    if (bToggleGC)
    {
#ifdef _DEBUG
        // In debug builds, all enter_spin_lock operations go through this code.  If a GC has
        // started, it is important to block until the GC thread calls set_gc_done (since it is
        // guaranteed to have cleared g_TrapReturningThreads by this point).  This avoids livelock
        // conditions which can otherwise occur if threads are allowed to spin in this function
        // (and therefore starve the GC thread) between the point when the GC thread sets the
        // WaitForGC event and the point when the GC thread clears g_TrapReturningThreads.
        if (gc_heap::gc_started)
        {
            gc_heap::wait_for_gc_done();
        }
#endif // _DEBUG
        GCToEEInterface::DisablePreemptiveGC();
    }
    else if (g_fSuspensionPending > 0)
    {
        g_theGCHeap->WaitUntilGCComplete();
    }
}

inline
static void safe_switch_to_thread()
{
    bool cooperative_mode = gc_heap::enable_preemptive();

    GCToOSInterface::YieldThread(0);

    gc_heap::disable_preemptive(cooperative_mode);
}

#define check_msl_status(msg, size) if (msl_status == msl_retry_different_heap) \
    { \
        dprintf (5555, ("h%d RETRY %s(%Id)", heap_number, msg, size)); \
        return a_state_retry_allocate; \
    }

static const int32_t lock_free = -1;
static const int32_t lock_taken = 0;
static const int32_t lock_decommissioned = 1;


// If our heap got decommissioned, we need to try an existing heap.
//inline
bool gc_heap::should_move_heap (GCSpinLock* msl)
{
#ifdef MULTIPLE_HEAPS
    if (msl->lock == lock_decommissioned)
    {
        dprintf (5555, ("heap#%d got decommissioned! need to retry", heap_number));
    }
    return (msl->lock == lock_decommissioned);
#else //MULTIPLE_HEAPS
    return false;
#endif //MULTIPLE_HEAPS
}

// All the places where we could be stopped because there was a suspension should call should_move_heap to check if we need to return
// so we can try another heap or we can continue the allocation on the same heap.
enter_msl_status gc_heap::enter_spin_lock_msl_helper (GCSpinLock* msl)
{
    do
    {
#ifdef DYNAMIC_HEAP_COUNT
        uint64_t start = GetHighPrecisionTimeStamp();
#endif //DYNAMIC_HEAP_COUNT

        unsigned int i = 0;
        while (VolatileLoad (&msl->lock) != lock_free)
        {
            if (should_move_heap (msl))
            {
                return msl_retry_different_heap;
            }
            if ((++i & 7) && !IsGCInProgress ())
            {
                if (g_num_processors > 1)
                {
#ifndef MULTIPLE_HEAPS
                    int spin_count = 32 * yp_spin_count_unit;
#else //!MULTIPLE_HEAPS
                    int spin_count = yp_spin_count_unit;
#endif //!MULTIPLE_HEAPS
                    for (int j = 0; j < spin_count; j++)
                    {
                        if (VolatileLoad (&msl->lock) == lock_free || IsGCInProgress ())
                            break;
                        // give the HT neighbor a chance to run
                        YieldProcessor ();
                    }
                    if (VolatileLoad (&msl->lock) != lock_free && !IsGCInProgress ())
                    {
#ifdef DYNAMIC_HEAP_COUNT
                        start -= GetHighPrecisionTimeStamp();
#endif //DYNAMIC_HEAP_COUNT
                        safe_switch_to_thread ();
#ifdef DYNAMIC_HEAP_COUNT
                        start += GetHighPrecisionTimeStamp();
#endif //DYNAMIC_HEAP_COUNT
                    }
                }
                else
                {
                    safe_switch_to_thread ();
                }
            }
            else
            {
#ifdef DYNAMIC_HEAP_COUNT
                start -= GetHighPrecisionTimeStamp();
#endif //DYNAMIC_HEAP_COUNT
                WaitLongerNoInstru (i);
#ifdef DYNAMIC_HEAP_COUNT
                start += GetHighPrecisionTimeStamp();
#endif //DYNAMIC_HEAP_COUNT
            }
        }
#ifdef DYNAMIC_HEAP_COUNT
        uint64_t end = GetHighPrecisionTimeStamp();
        Interlocked::ExchangeAdd64 (&msl->msl_wait_time, end - start);
        dprintf (3, ("h%d wait for msl lock wait time %zd, total wait time: %zd", heap_number, (end - start), msl->msl_wait_time));
#endif //DYNAMIC_HEAP_COUNT
    }
    while (Interlocked::CompareExchange (&msl->lock, lock_taken, lock_free) != lock_free);

    return msl_entered;
}

inline
enter_msl_status gc_heap::enter_spin_lock_msl (GCSpinLock* msl)
{
    if (Interlocked::CompareExchange (&msl->lock, lock_taken, lock_free) == lock_free)
        return msl_entered;

    return enter_spin_lock_msl_helper (msl);
}

//
// We need the following methods to have volatile arguments, so that they can accept
// raw pointers in addition to the results of the & operator on Volatile<T>.
// this will never be used for the more_space_lock_xxx, which is why
// "lock_decommissioned" cannot happen.
inline
static void enter_spin_lock_noinstru (RAW_KEYWORD(volatile) int32_t* lock)
{
retry:

    if (Interlocked::CompareExchange(lock, lock_taken, lock_free) != lock_free)
    {
        unsigned int i = 0;
        while (VolatileLoad(lock) != lock_free)
        {
            // will never be used for more_space_lock_xxx
            assert (VolatileLoad(lock) != lock_decommissioned);
            if ((++i & 7) && !IsGCInProgress())
            {
                if  (g_num_processors > 1)
                {
#ifndef MULTIPLE_HEAPS
                    int spin_count = 32 * yp_spin_count_unit;
#else //!MULTIPLE_HEAPS
                    int spin_count = yp_spin_count_unit;
#endif //!MULTIPLE_HEAPS
                    for (int j = 0; j < spin_count; j++)
                    {
                        if  (VolatileLoad(lock) == lock_free || IsGCInProgress())
                            break;
                        YieldProcessor();           // indicate to the processor that we are spinning
                    }
                    if  (VolatileLoad(lock) != lock_free && !IsGCInProgress())
                    {
                        safe_switch_to_thread();
                    }
                }
                else
                {
                    safe_switch_to_thread();
                }
            }
            else
            {
                WaitLongerNoInstru(i);
            }
        }
        goto retry;
    }
}

inline
static BOOL try_enter_spin_lock_noinstru(RAW_KEYWORD(volatile) int32_t* lock)
{
    return (Interlocked::CompareExchange(&*lock, lock_taken, lock_free) == lock_free);
}

inline
static void leave_spin_lock_noinstru (RAW_KEYWORD(volatile) int32_t* lock)
{
    VolatileStore<int32_t>((int32_t*)lock, lock_free);
}

#ifdef _DEBUG

inline
static void enter_spin_lock (GCSpinLock *pSpinLock)
{
    enter_spin_lock_noinstru (&pSpinLock->lock);
    assert (pSpinLock->holding_thread == (Thread*)-1);
    pSpinLock->holding_thread = GCToEEInterface::GetThread();
}

inline
static BOOL try_enter_spin_lock(GCSpinLock *pSpinLock)
{
    BOOL ret = try_enter_spin_lock_noinstru(&pSpinLock->lock);
    if (ret)
        pSpinLock->holding_thread = GCToEEInterface::GetThread();
    return ret;
}

inline
static void leave_spin_lock(GCSpinLock *pSpinLock)
{
    bool gc_thread_p = GCToEEInterface::WasCurrentThreadCreatedByGC();
    pSpinLock->released_by_gc_p = gc_thread_p;
    pSpinLock->holding_thread = (Thread*) -1;
    if (pSpinLock->lock != lock_free)
        leave_spin_lock_noinstru(&pSpinLock->lock);
}

#define ASSERT_HOLDING_SPIN_LOCK(pSpinLock) \
    _ASSERTE((pSpinLock)->holding_thread == GCToEEInterface::GetThread());

#define ASSERT_NOT_HOLDING_SPIN_LOCK(pSpinLock) \
    _ASSERTE((pSpinLock)->holding_thread != GCToEEInterface::GetThread());

#else //_DEBUG

//In the concurrent version, the Enable/DisablePreemptiveGC is optional because
//the gc thread call WaitLonger.
void WaitLonger (int i
#ifdef SYNCHRONIZATION_STATS
    , GCSpinLock* spin_lock
#endif //SYNCHRONIZATION_STATS
    )
{
#ifdef SYNCHRONIZATION_STATS
    (spin_lock->num_wait_longer)++;
#endif //SYNCHRONIZATION_STATS

    // every 8th attempt:
    bool bToggleGC = GCToEEInterface::EnablePreemptiveGC();
    assert (bToggleGC);

    // if we're waiting for gc to finish, we should block immediately
    if (!gc_heap::gc_started)
    {
#ifdef SYNCHRONIZATION_STATS
        (spin_lock->num_switch_thread_w)++;
#endif //SYNCHRONIZATION_STATS
        if  (g_num_processors > 1)
        {
            YieldProcessor();           // indicate to the processor that we are spinning
            if  (i & 0x01f)
                GCToOSInterface::YieldThread (0);
            else
                GCToOSInterface::Sleep (5);
        }
        else
            GCToOSInterface::Sleep (5);
    }

    // If CLR is hosted, a thread may reach here while it is in preemptive GC mode,
    // or it has no Thread object, in order to force a task to yield, or to triger a GC.
    // It is important that the thread is going to wait for GC.  Otherwise the thread
    // is in a tight loop.  If the thread has high priority, the perf is going to be very BAD.
    if (gc_heap::gc_started)
    {
        gc_heap::wait_for_gc_done();
    }

    if (bToggleGC)
    {
#ifdef SYNCHRONIZATION_STATS
        (spin_lock->num_disable_preemptive_w)++;
#endif //SYNCHRONIZATION_STATS
        GCToEEInterface::DisablePreemptiveGC();
    }
}

inline
static void enter_spin_lock (GCSpinLock* spin_lock)
{
retry:

    if (Interlocked::CompareExchange(&spin_lock->lock, lock_taken, lock_free) != lock_free)
    {
        unsigned int i = 0;
        while (spin_lock->lock != lock_free)
        {
            assert (spin_lock->lock != lock_decommissioned);
            if ((++i & 7) && !gc_heap::gc_started)
            {
                if  (g_num_processors > 1)
                {
#ifndef MULTIPLE_HEAPS
                    int spin_count = 32 * yp_spin_count_unit;
#else //!MULTIPLE_HEAPS
                    int spin_count = yp_spin_count_unit;
#endif //!MULTIPLE_HEAPS
                    for (int j = 0; j < spin_count; j++)
                    {
                        if  (spin_lock->lock == lock_free || gc_heap::gc_started)
                            break;
                        YieldProcessor();           // indicate to the processor that we are spinning
                    }
                    if  (spin_lock->lock != lock_free && !gc_heap::gc_started)
                    {
#ifdef SYNCHRONIZATION_STATS
                        (spin_lock->num_switch_thread)++;
#endif //SYNCHRONIZATION_STATS
                        bool cooperative_mode = gc_heap::enable_preemptive ();

                        GCToOSInterface::YieldThread(0);

                        gc_heap::disable_preemptive (cooperative_mode);
                    }
                }
                else
                    GCToOSInterface::YieldThread(0);
            }
            else
            {
                WaitLonger(i
#ifdef SYNCHRONIZATION_STATS
                        , spin_lock
#endif //SYNCHRONIZATION_STATS
                    );
            }
        }
        goto retry;
    }
}

inline
static BOOL try_enter_spin_lock(GCSpinLock* spin_lock)
{
    return (Interlocked::CompareExchange(&spin_lock->lock, lock_taken, lock_free) == lock_free);
}

inline
static void leave_spin_lock (GCSpinLock * spin_lock)
{
    spin_lock->lock = lock_free;
}

#define ASSERT_HOLDING_SPIN_LOCK(pSpinLock)

#endif //_DEBUG

bool gc_heap::enable_preemptive ()
{
    return GCToEEInterface::EnablePreemptiveGC();
}

void gc_heap::disable_preemptive (bool restore_cooperative)
{
    if (restore_cooperative)
    {
        GCToEEInterface::DisablePreemptiveGC();
    }
}

typedef void **  PTR_PTR;
inline
void memclr ( uint8_t* mem, size_t size)
{
    dprintf (3, ("MEMCLR: %p, %zd", mem, size));
    assert ((size & (sizeof(PTR_PTR)-1)) == 0);
    assert (sizeof(PTR_PTR) == DATA_ALIGNMENT);
    memset (mem, 0, size);
}

void memcopy (uint8_t* dmem, uint8_t* smem, size_t size)
{
    const size_t sz4ptr = sizeof(PTR_PTR)*4;
    const size_t sz2ptr = sizeof(PTR_PTR)*2;
    const size_t sz1ptr = sizeof(PTR_PTR)*1;

    assert ((size & (sizeof (PTR_PTR)-1)) == 0);
    assert (sizeof(PTR_PTR) == DATA_ALIGNMENT);

    // copy in groups of four pointer sized things at a time
    if (size >= sz4ptr)
    {
        do
        {
            ((PTR_PTR)dmem)[0] = ((PTR_PTR)smem)[0];
            ((PTR_PTR)dmem)[1] = ((PTR_PTR)smem)[1];
            ((PTR_PTR)dmem)[2] = ((PTR_PTR)smem)[2];
            ((PTR_PTR)dmem)[3] = ((PTR_PTR)smem)[3];
            dmem += sz4ptr;
            smem += sz4ptr;
        }
        while ((size -= sz4ptr) >= sz4ptr);
    }

    // still two pointer sized things or more left to copy?
    if (size & sz2ptr)
    {
        ((PTR_PTR)dmem)[0] = ((PTR_PTR)smem)[0];
        ((PTR_PTR)dmem)[1] = ((PTR_PTR)smem)[1];
        dmem += sz2ptr;
        smem += sz2ptr;
    }

    // still one pointer sized thing left to copy?
    if (size & sz1ptr)
    {
        ((PTR_PTR)dmem)[0] = ((PTR_PTR)smem)[0];
    }
}

inline
ptrdiff_t round_down (ptrdiff_t add, int pitch)
{
    return ((add / pitch) * pitch);
}

#if defined(FEATURE_STRUCTALIGN) && defined(RESPECT_LARGE_ALIGNMENT)
// FEATURE_STRUCTALIGN allows the compiler to dictate the alignment,
// i.e, if a larger alignment matters or is beneficial, the compiler
// generated info tells us so.  RESPECT_LARGE_ALIGNMENT is just the
// converse - it's a heuristic for the GC to use a larger alignment.
#error FEATURE_STRUCTALIGN should imply !RESPECT_LARGE_ALIGNMENT
#endif

#if defined(FEATURE_STRUCTALIGN) && defined(FEATURE_LOH_COMPACTION)
#error FEATURE_STRUCTALIGN and FEATURE_LOH_COMPACTION are mutually exclusive
#endif

// Returns true if two pointers have the same large (double than normal) alignment.
inline
BOOL same_large_alignment_p (uint8_t* p1, uint8_t* p2)
{
#ifdef RESPECT_LARGE_ALIGNMENT
    const size_t LARGE_ALIGNMENT_MASK = 2 * DATA_ALIGNMENT - 1;
    return ((((size_t)p1 ^ (size_t)p2) & LARGE_ALIGNMENT_MASK) == 0);
#else
    UNREFERENCED_PARAMETER(p1);
    UNREFERENCED_PARAMETER(p2);
    return TRUE;
#endif // RESPECT_LARGE_ALIGNMENT
}

// Determines the padding size required to fix large alignment during relocation.
inline
size_t switch_alignment_size (BOOL already_padded_p)
{
#ifndef RESPECT_LARGE_ALIGNMENT
    assert (!"Should not be called");
#endif // RESPECT_LARGE_ALIGNMENT

    if (already_padded_p)
        return DATA_ALIGNMENT;
    else
        return Align (min_obj_size) | DATA_ALIGNMENT;
}

#ifdef FEATURE_STRUCTALIGN
void set_node_aligninfo (uint8_t *node, int requiredAlignment, ptrdiff_t pad);
void clear_node_aligninfo (uint8_t *node);
#else // FEATURE_STRUCTALIGN
#define node_realigned(node)    (((plug_and_reloc*)(node))[-1].reloc & 1)
void set_node_realigned (uint8_t* node);
void clear_node_realigned(uint8_t* node);
#endif // FEATURE_STRUCTALIGN

inline
size_t AlignQword (size_t nbytes)
{
#ifdef FEATURE_STRUCTALIGN
    // This function is used to align everything on the large object
    // heap to an 8-byte boundary, to reduce the number of unaligned
    // accesses to (say) arrays of doubles.  With FEATURE_STRUCTALIGN,
    // the compiler dictates the optimal alignment instead of having
    // a heuristic in the GC.
    return Align (nbytes);
#else // FEATURE_STRUCTALIGN
    return (nbytes + 7) & ~7;
#endif // FEATURE_STRUCTALIGN
}

inline
BOOL Aligned (size_t n)
{
    return (n & ALIGNCONST) == 0;
}

#define OBJECT_ALIGNMENT_OFFSET (sizeof(MethodTable *))

#ifdef FEATURE_STRUCTALIGN
#define MAX_STRUCTALIGN OS_PAGE_SIZE
#else // FEATURE_STRUCTALIGN
#define MAX_STRUCTALIGN 0
#endif // FEATURE_STRUCTALIGN

#ifdef FEATURE_STRUCTALIGN
inline
ptrdiff_t AdjustmentForMinPadSize(ptrdiff_t pad, int requiredAlignment)
{
    // The resulting alignpad must be either 0 or at least min_obj_size.
    // Note that by computing the following difference on unsigned types,
    // we can do the range check 0 < alignpad < min_obj_size with a
    // single conditional branch.
    if ((size_t)(pad - DATA_ALIGNMENT) < Align (min_obj_size) - DATA_ALIGNMENT)
    {
        return requiredAlignment;
    }
    return 0;
}

inline
uint8_t* StructAlign (uint8_t* origPtr, int requiredAlignment, ptrdiff_t alignmentOffset=OBJECT_ALIGNMENT_OFFSET)
{
    // required alignment must be a power of two
    _ASSERTE(((size_t)origPtr & ALIGNCONST) == 0);
    _ASSERTE(((requiredAlignment - 1) & requiredAlignment) == 0);
    _ASSERTE(requiredAlignment >= sizeof(void *));
    _ASSERTE(requiredAlignment <= MAX_STRUCTALIGN);

    // When this method is invoked for individual objects (i.e., alignmentOffset
    // is just the size of the PostHeader), what needs to be aligned when
    // we're done is the pointer to the payload of the object (which means
    // the actual resulting object pointer is typically not aligned).

    uint8_t* result = (uint8_t*)Align ((size_t)origPtr + alignmentOffset, requiredAlignment-1) - alignmentOffset;
    ptrdiff_t alignpad = result - origPtr;

    return result + AdjustmentForMinPadSize (alignpad, requiredAlignment);
}

inline
ptrdiff_t ComputeStructAlignPad (uint8_t* plug, int requiredAlignment, size_t alignmentOffset=OBJECT_ALIGNMENT_OFFSET)
{
    return StructAlign (plug, requiredAlignment, alignmentOffset) - plug;
}

BOOL IsStructAligned (uint8_t *ptr, int requiredAlignment)
{
    return StructAlign (ptr, requiredAlignment) == ptr;
}

inline
ptrdiff_t ComputeMaxStructAlignPad (int requiredAlignment)
{
    if (requiredAlignment == DATA_ALIGNMENT)
        return 0;
    // Since a non-zero alignment padding cannot be less than min_obj_size (so we can fit the
    // alignment padding object), the worst-case alignment padding is correspondingly larger
    // than the required alignment.
    return requiredAlignment + Align (min_obj_size) - DATA_ALIGNMENT;
}

inline
ptrdiff_t ComputeMaxStructAlignPadLarge (int requiredAlignment)
{
    if (requiredAlignment <= get_alignment_constant (TRUE)+1)
        return 0;
    // This is the same as ComputeMaxStructAlignPad, except that in addition to leaving space
    // for padding before the actual object, it also leaves space for filling a gap after the
    // actual object.  This is needed on the large object heap, as the outer allocation functions
    // don't operate on an allocation context (which would have left space for the final gap).
    return requiredAlignment + Align (min_obj_size) * 2 - DATA_ALIGNMENT;
}

uint8_t* gc_heap::pad_for_alignment (uint8_t* newAlloc, int requiredAlignment, size_t size, alloc_context* acontext)
{
    uint8_t* alignedPtr = StructAlign (newAlloc, requiredAlignment);
    if (alignedPtr != newAlloc) {
        make_unused_array (newAlloc, alignedPtr - newAlloc);
    }
    acontext->alloc_ptr = alignedPtr + Align (size);
    return alignedPtr;
}

uint8_t* gc_heap::pad_for_alignment_large (uint8_t* newAlloc, int requiredAlignment, size_t size)
{
    uint8_t* alignedPtr = StructAlign (newAlloc, requiredAlignment);
    if (alignedPtr != newAlloc) {
        make_unused_array (newAlloc, alignedPtr - newAlloc);
    }
    if (alignedPtr < newAlloc + ComputeMaxStructAlignPadLarge (requiredAlignment)) {
        make_unused_array (alignedPtr + AlignQword (size), newAlloc + ComputeMaxStructAlignPadLarge (requiredAlignment) - alignedPtr);
    }
    return alignedPtr;
}
#else // FEATURE_STRUCTALIGN
#define ComputeMaxStructAlignPad(requiredAlignment) 0
#define ComputeMaxStructAlignPadLarge(requiredAlignment) 0
#endif // FEATURE_STRUCTALIGN

//CLR_SIZE  is the max amount of bytes from gen0 that is set to 0 in one chunk
#ifdef SERVER_GC
#define CLR_SIZE ((size_t)(8*1024+32))
#else //SERVER_GC
#define CLR_SIZE ((size_t)(8*1024+32))
#endif //SERVER_GC

#define END_SPACE_AFTER_GC (loh_size_threshold + MAX_STRUCTALIGN)
// When we fit into the free list we need an extra of a min obj
#define END_SPACE_AFTER_GC_FL (END_SPACE_AFTER_GC + Align (min_obj_size))

#if defined(BACKGROUND_GC) && !defined(USE_REGIONS)
#define SEGMENT_INITIAL_COMMIT (2*OS_PAGE_SIZE)
#else
#define SEGMENT_INITIAL_COMMIT (OS_PAGE_SIZE)
#endif //BACKGROUND_GC && !USE_REGIONS

// This is always power of 2.
#ifdef HOST_64BIT
const size_t min_segment_size_hard_limit = 1024*1024*16;
#else //HOST_64BIT
const size_t min_segment_size_hard_limit = 1024*1024*4;
#endif //HOST_64BIT

#ifndef HOST_64BIT
// Max size of heap hard limit (2^31) to be able to be aligned and rounded up on power of 2 and not overflow
const size_t max_heap_hard_limit = (size_t)2 * (size_t)1024 * (size_t)1024 * (size_t)1024;
#endif //!HOST_64BIT

inline
size_t align_on_segment_hard_limit (size_t add)
{
    return ((size_t)(add + (min_segment_size_hard_limit - 1)) & ~(min_segment_size_hard_limit - 1));
}

#ifdef SERVER_GC

#ifdef HOST_64BIT

#define INITIAL_ALLOC ((size_t)((size_t)4*1024*1024*1024))
#define LHEAP_ALLOC   ((size_t)(1024*1024*256))

#else

#define INITIAL_ALLOC ((size_t)(1024*1024*64))
#define LHEAP_ALLOC   ((size_t)(1024*1024*32))

#endif  // HOST_64BIT

#else //SERVER_GC

#ifdef HOST_64BIT

#define INITIAL_ALLOC ((size_t)(1024*1024*256))
#define LHEAP_ALLOC   ((size_t)(1024*1024*128))

#else

#define INITIAL_ALLOC ((size_t)(1024*1024*16))
#define LHEAP_ALLOC   ((size_t)(1024*1024*16))

#endif  // HOST_64BIT

#endif //SERVER_GC

const size_t etw_allocation_tick = 100*1024;

const size_t low_latency_alloc = 256*1024;

const size_t fgn_check_quantum = 2*1024*1024;

#ifdef MH_SC_MARK
const int max_snoop_level = 128;
#endif //MH_SC_MARK

#ifdef CARD_BUNDLE
//threshold of heap size to turn on card bundles.
#define SH_TH_CARD_BUNDLE  (40*1024*1024)
#define MH_TH_CARD_BUNDLE  (180*1024*1024)
#endif //CARD_BUNDLE

// min size to decommit to make the OS call worthwhile
#define MIN_DECOMMIT_SIZE  (100*OS_PAGE_SIZE)

// max size to decommit per millisecond
#define DECOMMIT_SIZE_PER_MILLISECOND (160*1024)

// time in milliseconds between decommit steps
#define DECOMMIT_TIME_STEP_MILLISECONDS (100)

inline
size_t align_on_page (size_t add)
{
    return ((add + OS_PAGE_SIZE - 1) & ~((size_t)OS_PAGE_SIZE - 1));
}

inline
uint8_t* align_on_page (uint8_t* add)
{
    return (uint8_t*)align_on_page ((size_t) add);
}

inline
size_t align_lower_page (size_t add)
{
    return (add & ~((size_t)OS_PAGE_SIZE - 1));
}

inline
uint8_t* align_lower_page (uint8_t* add)
{
    return (uint8_t*)align_lower_page ((size_t)add);
}

inline
size_t align_write_watch_lower_page (size_t add)
{
    return (add & ~(WRITE_WATCH_UNIT_SIZE - 1));
}

inline
uint8_t* align_write_watch_lower_page (uint8_t* add)
{
    return (uint8_t*)align_lower_page ((size_t)add);
}

inline
BOOL power_of_two_p (size_t integer)
{
    return !(integer & (integer-1));
}

inline
BOOL oddp (size_t integer)
{
    return (integer & 1) != 0;
}

// we only ever use this for WORDs.
size_t logcount (size_t word)
{
    //counts the number of high bits in a 16 bit word.
    assert (word < 0x10000);
    size_t count;
    count = (word & 0x5555) + ( (word >> 1 ) & 0x5555);
    count = (count & 0x3333) + ( (count >> 2) & 0x3333);
    count = (count & 0x0F0F) + ( (count >> 4) & 0x0F0F);
    count = (count & 0x00FF) + ( (count >> 8) & 0x00FF);
    return count;
}

void stomp_write_barrier_resize(bool is_runtime_suspended, bool requires_upper_bounds_check)
{
    WriteBarrierParameters args = {};
    args.operation = WriteBarrierOp::StompResize;
    args.is_runtime_suspended = is_runtime_suspended;
    args.requires_upper_bounds_check = requires_upper_bounds_check;

    args.card_table = g_gc_card_table;
#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
    args.card_bundle_table = g_gc_card_bundle_table;
#endif

    args.lowest_address = g_gc_lowest_address;
    args.highest_address = g_gc_highest_address;

#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
    if (SoftwareWriteWatch::IsEnabledForGCHeap())
    {
        args.write_watch_table = g_gc_sw_ww_table;
    }
#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP

    GCToEEInterface::StompWriteBarrier(&args);
}

#ifdef USE_REGIONS
void region_write_barrier_settings (WriteBarrierParameters* args,
                                    gc_heap::region_info* map_region_to_generation_skewed,
                                    uint8_t region_shr)
{
    switch (GCConfig::GetGCWriteBarrier())
    {
    default:
    case GCConfig::WRITE_BARRIER_DEFAULT:
    case GCConfig::WRITE_BARRIER_REGION_BIT:
        // bitwise region write barrier is the default now
        args->region_to_generation_table = (uint8_t*)map_region_to_generation_skewed;
        args->region_shr = region_shr;
        args->region_use_bitwise_write_barrier = true;
        break;

    case GCConfig::WRITE_BARRIER_REGION_BYTE:
        // bytewise region write barrier
        args->region_to_generation_table = (uint8_t*)map_region_to_generation_skewed;
        args->region_shr = region_shr;
        assert (args->region_use_bitwise_write_barrier == false);
        break;

    case GCConfig::WRITE_BARRIER_SERVER:
        // server write barrier
        // args should have been zero initialized
        assert (args->region_use_bitwise_write_barrier == false);
        assert (args->region_to_generation_table == nullptr);
        assert (args->region_shr == 0);
        break;
    }
}
#endif //USE_REGIONS

void stomp_write_barrier_ephemeral (uint8_t* ephemeral_low, uint8_t* ephemeral_high
#ifdef USE_REGIONS
                                   , gc_heap::region_info* map_region_to_generation_skewed
                                   , uint8_t region_shr
#endif //USE_REGIONS
                                   )
{
#ifndef USE_REGIONS
    initGCShadow();
#endif

    WriteBarrierParameters args = {};
    args.operation = WriteBarrierOp::StompEphemeral;
    args.is_runtime_suspended = true;
    args.ephemeral_low = ephemeral_low;
    args.ephemeral_high = ephemeral_high;
#ifdef USE_REGIONS
    region_write_barrier_settings (&args, map_region_to_generation_skewed, region_shr);
#endif //USE_REGIONS
    GCToEEInterface::StompWriteBarrier(&args);
}

void stomp_write_barrier_initialize(uint8_t* ephemeral_low, uint8_t* ephemeral_high
#ifdef USE_REGIONS
                                   , gc_heap::region_info* map_region_to_generation_skewed
                                   , uint8_t region_shr
#endif //USE_REGIONS
                                   )
{
    WriteBarrierParameters args = {};
    args.operation = WriteBarrierOp::Initialize;
    args.is_runtime_suspended = true;
    args.requires_upper_bounds_check = false;
    args.card_table = g_gc_card_table;

#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
    args.card_bundle_table = g_gc_card_bundle_table;
#endif

    args.lowest_address = g_gc_lowest_address;
    args.highest_address = g_gc_highest_address;
    args.ephemeral_low = ephemeral_low;
    args.ephemeral_high = ephemeral_high;

#ifdef USE_REGIONS
    region_write_barrier_settings (&args, map_region_to_generation_skewed, region_shr);
#endif //USE_REGIONS

    GCToEEInterface::StompWriteBarrier(&args);
}

//extract the low bits [0,low[ of a uint32_t
#define lowbits(wrd, bits) ((wrd) & ((1 << (bits))-1))
//extract the high bits [high, 32] of a uint32_t
#define highbits(wrd, bits) ((wrd) & ~((1 << (bits))-1))

// Things we need to manually initialize:
// gen0 min_size - based on cache
// gen0/1 max_size - based on segment size
static static_data static_data_table[latency_level_last - latency_level_first + 1][total_generation_count] =
{
    // latency_level_memory_footprint
    {
        // gen0
        {0, 0, 40000, 0.5f, 9.0f, 20.0f, (1000 * 1000), 1},
        // gen1
        {160*1024, 0, 80000, 0.5f, 2.0f, 7.0f, (10 * 1000 * 1000), 10},
        // gen2
        {256*1024, SSIZE_T_MAX, 200000, 0.25f, 1.2f, 1.8f, (100 * 1000 * 1000), 100},
        // loh
        {3*1024*1024, SSIZE_T_MAX, 0, 0.0f, 1.25f, 4.5f, 0, 0},
        // poh
        {3*1024*1024, SSIZE_T_MAX, 0, 0.0f, 1.25f, 4.5f, 0, 0},
    },

    // latency_level_balanced
    {
        // gen0
        {0, 0, 40000, 0.5f,
#ifdef MULTIPLE_HEAPS
            20.0f, 40.0f,
#else
            9.0f, 20.0f,
#endif //MULTIPLE_HEAPS
            (1000 * 1000), 1},
        // gen1
        {256*1024, 0, 80000, 0.5f, 2.0f, 7.0f, (10 * 1000 * 1000), 10},
        // gen2
        {256*1024, SSIZE_T_MAX, 200000, 0.25f, 1.2f, 1.8f, (100 * 1000 * 1000), 100},
        // loh
        {3*1024*1024, SSIZE_T_MAX, 0, 0.0f, 1.25f, 4.5f, 0, 0},
        // poh
        {3*1024*1024, SSIZE_T_MAX, 0, 0.0f, 1.25f, 4.5f, 0, 0}
    },
};

class mark;
class generation;
class heap_segment;
class CObjectHeader;
class dynamic_data;
class l_heap;
class sorted_table;
class c_synchronize;

#ifdef FEATURE_PREMORTEM_FINALIZATION
static
HRESULT AllocateCFinalize(CFinalize **pCFinalize);
#endif // FEATURE_PREMORTEM_FINALIZATION

uint8_t* tree_search (uint8_t* tree, uint8_t* old_address);


#ifdef USE_INTROSORT
#define _sort introsort::sort
#elif defined(USE_VXSORT)
// in this case we have do_vxsort which takes an additional range that
// all items to be sorted are contained in
// so do not #define _sort
#else //USE_INTROSORT
#define _sort qsort1
void qsort1(uint8_t** low, uint8_t** high, unsigned int depth);
#endif //USE_INTROSORT

void* virtual_alloc (size_t size);
void* virtual_alloc (size_t size, bool use_large_pages_p, uint16_t numa_node = NUMA_NODE_UNDEFINED);

/* per heap static initialization */
#if defined(BACKGROUND_GC) && !defined(MULTIPLE_HEAPS)
uint32_t*   gc_heap::mark_array;
#endif //BACKGROUND_GC && !MULTIPLE_HEAPS

uint8_t**   gc_heap::g_mark_list;
uint8_t**   gc_heap::g_mark_list_copy;
size_t      gc_heap::mark_list_size;
size_t      gc_heap::g_mark_list_total_size;
bool        gc_heap::mark_list_overflow;
#ifdef USE_REGIONS
uint8_t***  gc_heap::g_mark_list_piece;
size_t      gc_heap::g_mark_list_piece_size;
size_t      gc_heap::g_mark_list_piece_total_size;
#endif //USE_REGIONS

seg_mapping* seg_mapping_table;

#ifdef FEATURE_BASICFREEZE
sorted_table* gc_heap::seg_table;
#endif //FEATURE_BASICFREEZE

#ifdef MULTIPLE_HEAPS
GCEvent     gc_heap::ee_suspend_event;
size_t      gc_heap::min_gen0_balance_delta = 0;
size_t      gc_heap::min_balance_threshold = 0;
#endif //MULTIPLE_HEAPS

VOLATILE(BOOL) gc_heap::gc_started;

#ifdef MULTIPLE_HEAPS
GCEvent     gc_heap::gc_start_event;
bool        gc_heap::gc_thread_no_affinitize_p = false;
uintptr_t   process_mask = 0;

int         gc_heap::n_heaps;       // current number of heaps
int         gc_heap::n_max_heaps;   // maximum number of heaps

gc_heap**   gc_heap::g_heaps;

#if !defined(USE_REGIONS) || defined(_DEBUG)
size_t*     gc_heap::g_promoted;
#endif //!USE_REGIONS || _DEBUG

#ifdef MH_SC_MARK
int*        gc_heap::g_mark_stack_busy;
#endif //MH_SC_MARK

#ifdef BACKGROUND_GC
size_t*     gc_heap::g_bpromoted;
#endif //BACKGROUND_GC

BOOL        gc_heap::gradual_decommit_in_progress_p = FALSE;
size_t      gc_heap::max_decommit_step_size = 0;
#else  //MULTIPLE_HEAPS

#if !defined(USE_REGIONS) || defined(_DEBUG)
size_t      gc_heap::g_promoted;
#endif //!USE_REGIONS || _DEBUG

#ifdef BACKGROUND_GC
size_t      gc_heap::g_bpromoted;
#endif //BACKGROUND_GC

// this is just to have fewer #ifdefs in code shared between WKS and SVR
// for filling out ScanContext structs
const int n_heaps = 1;

#endif //MULTIPLE_HEAPS

size_t      gc_heap::card_table_element_layout[total_bookkeeping_elements + 1];
uint8_t*    gc_heap::bookkeeping_start = nullptr;
#ifdef USE_REGIONS
uint8_t*    gc_heap::bookkeeping_covered_committed = nullptr;
size_t      gc_heap::bookkeeping_sizes[total_bookkeeping_elements];
#endif //USE_REGIONS

size_t      gc_heap::reserved_memory = 0;
size_t      gc_heap::reserved_memory_limit = 0;
BOOL        gc_heap::g_low_memory_status;

static gc_reason gc_trigger_reason = reason_empty;

gc_latency_level gc_heap::latency_level = latency_level_default;

gc_mechanisms  gc_heap::settings;

gc_history_global gc_heap::gc_data_global;

uint64_t    gc_heap::gc_last_ephemeral_decommit_time = 0;

CLRCriticalSection gc_heap::check_commit_cs;

#ifdef COMMITTED_BYTES_SHADOW
CLRCriticalSection gc_heap::decommit_lock;
#endif //COMMITTED_BYTES_SHADOW

size_t      gc_heap::current_total_committed = 0;

size_t      gc_heap::committed_by_oh[recorded_committed_bucket_counts];

size_t      gc_heap::current_total_committed_bookkeeping = 0;

BOOL        gc_heap::reset_mm_p = TRUE;

#ifdef FEATURE_EVENT_TRACE
bool gc_heap::informational_event_enabled_p = false;

uint64_t*   gc_heap::gc_time_info = 0;

#ifdef BACKGROUND_GC
uint64_t*   gc_heap::bgc_time_info = 0;
#endif //BACKGROUND_GC

size_t      gc_heap::physical_memory_from_config = 0;

size_t      gc_heap::gen0_min_budget_from_config = 0;

size_t      gc_heap::gen0_max_budget_from_config = 0;

int         gc_heap::high_mem_percent_from_config = 0;

bool        gc_heap::use_frozen_segments_p = false;

#ifdef FEATURE_LOH_COMPACTION
gc_heap::etw_loh_compact_info* gc_heap::loh_compact_info;
#endif //FEATURE_LOH_COMPACTION
#endif //FEATURE_EVENT_TRACE

bool        gc_heap::hard_limit_config_p = false;

#if defined(SHORT_PLUGS) && !defined(USE_REGIONS)
double      gc_heap::short_plugs_pad_ratio = 0;
#endif //SHORT_PLUGS && !USE_REGIONS

int         gc_heap::generation_skip_ratio_threshold = 0;
int         gc_heap::conserve_mem_setting = 0;
bool        gc_heap::spin_count_unit_config_p = false;

uint64_t    gc_heap::suspended_start_time = 0;
uint64_t    gc_heap::end_gc_time = 0;
uint64_t    gc_heap::total_suspended_time = 0;
uint64_t    gc_heap::process_start_time = 0;
last_recorded_gc_info gc_heap::last_ephemeral_gc_info;
last_recorded_gc_info gc_heap::last_full_blocking_gc_info;

uint64_t    gc_heap::last_alloc_reset_suspended_end_time = 0;
size_t      gc_heap::max_peak_heap_size = 0;
VOLATILE(size_t) gc_heap::llc_size = 0;

#ifdef BACKGROUND_GC
last_recorded_gc_info gc_heap::last_bgc_info[2];
VOLATILE(bool)        gc_heap::is_last_recorded_bgc = false;
VOLATILE(int)         gc_heap::last_bgc_info_index = 0;
#endif //BACKGROUND_GC

#ifdef DYNAMIC_HEAP_COUNT
size_t      gc_heap::hc_change_cancelled_count_prep = 0;
#ifdef BACKGROUND_GC
int         gc_heap::bgc_th_creation_hist_index = 0;
gc_heap::bgc_thread_creation_history gc_heap::bgc_th_creation_hist[max_bgc_thread_creation_count];
size_t      gc_heap::bgc_th_count_created = 0;
size_t      gc_heap::bgc_th_count_created_th_existed = 0;
size_t      gc_heap::bgc_th_count_creation_failed = 0;
size_t      gc_heap::bgc_init_gc_index = 0;
VOLATILE(short) gc_heap::bgc_init_n_heaps = 0;
size_t      gc_heap::hc_change_cancelled_count_bgc = 0;
#endif //BACKGROUND_GC
#endif //DYNAMIC_HEAP_COUNT

#if defined(HOST_64BIT)
#define MAX_ALLOWED_MEM_LOAD 85

// consider putting this in dynamic data -
// we may want different values for workstation
// and server GC.
#define MIN_YOUNGEST_GEN_DESIRED (16*1024*1024)

size_t      gc_heap::youngest_gen_desired_th;
#endif //HOST_64BIT

uint64_t    gc_heap::mem_one_percent = 0;

uint32_t    gc_heap::high_memory_load_th = 0;

uint32_t    gc_heap::m_high_memory_load_th;

uint32_t    gc_heap::v_high_memory_load_th;

uint32_t    gc_heap::almost_high_memory_load_th;

bool        gc_heap::is_restricted_physical_mem;

uint64_t    gc_heap::total_physical_mem = 0;

uint64_t    gc_heap::entry_available_physical_mem = 0;

size_t      gc_heap::heap_hard_limit = 0;

size_t      gc_heap::heap_hard_limit_oh[total_oh_count];

#ifdef USE_REGIONS

size_t      gc_heap::regions_range = 0;

#endif //USE_REGIONS

bool        affinity_config_specified_p = false;

#ifdef USE_REGIONS
region_allocator global_region_allocator;
uint8_t*(*initial_regions)[total_generation_count][2] = nullptr;
size_t      gc_heap::region_count = 0;

gc_heap::region_info* gc_heap::map_region_to_generation = nullptr;
gc_heap::region_info* gc_heap::map_region_to_generation_skewed = nullptr;

#endif //USE_REGIONS

#ifdef BACKGROUND_GC
GCEvent     gc_heap::bgc_start_event;

gc_mechanisms gc_heap::saved_bgc_settings;

gc_history_global gc_heap::bgc_data_global;

GCEvent     gc_heap::background_gc_done_event;

GCEvent     gc_heap::ee_proceed_event;

bool        gc_heap::gc_can_use_concurrent = false;

bool        gc_heap::temp_disable_concurrent_p = false;

uint32_t    gc_heap::cm_in_progress = FALSE;

BOOL        gc_heap::dont_restart_ee_p = FALSE;

BOOL        gc_heap::keep_bgc_threads_p = FALSE;

GCEvent     gc_heap::bgc_threads_sync_event;

BOOL        gc_heap::do_ephemeral_gc_p = FALSE;

BOOL        gc_heap::do_concurrent_p = FALSE;

size_t      gc_heap::ephemeral_fgc_counts[max_generation];

VOLATILE(c_gc_state) gc_heap::current_c_gc_state = c_gc_state_free;

VOLATILE(BOOL) gc_heap::gc_background_running = FALSE;
#endif //BACKGROUND_GC

#ifdef USE_REGIONS
#ifdef MULTIPLE_HEAPS
uint8_t*    gc_heap::gc_low;
uint8_t*    gc_heap::gc_high;
#endif //MULTIPLE_HEAPS
VOLATILE(uint8_t*)    gc_heap::ephemeral_low;
VOLATILE(uint8_t*)    gc_heap::ephemeral_high;
#endif //USE_REGIONS

#ifndef MULTIPLE_HEAPS
#ifdef SPINLOCK_HISTORY
int         gc_heap::spinlock_info_index = 0;
spinlock_info gc_heap::last_spinlock_info[max_saved_spinlock_info];
allocation_state gc_heap::current_uoh_alloc_state = (allocation_state)-1;
#endif //SPINLOCK_HISTORY

uint32_t    gc_heap::fgn_maxgen_percent = 0;
size_t      gc_heap::fgn_last_alloc = 0;

int         gc_heap::generation_skip_ratio = 100;
#ifdef FEATURE_CARD_MARKING_STEALING
VOLATILE(size_t) gc_heap::n_eph_soh = 0;
VOLATILE(size_t) gc_heap::n_gen_soh = 0;
VOLATILE(size_t) gc_heap::n_eph_loh = 0;
VOLATILE(size_t) gc_heap::n_gen_loh = 0;
#endif //FEATURE_CARD_MARKING_STEALING

uint64_t    gc_heap::loh_alloc_since_cg = 0;

BOOL        gc_heap::elevation_requested = FALSE;

BOOL        gc_heap::last_gc_before_oom = FALSE;

BOOL        gc_heap::sufficient_gen0_space_p = FALSE;

BOOL        gc_heap::decide_promote_gen1_pins_p = TRUE;

#ifdef BACKGROUND_GC
uint8_t*    gc_heap::background_saved_lowest_address = 0;
uint8_t*    gc_heap::background_saved_highest_address = 0;
uint8_t*    gc_heap::next_sweep_obj = 0;
uint8_t*    gc_heap::current_sweep_pos = 0;
#ifdef DOUBLY_LINKED_FL
heap_segment* gc_heap::current_sweep_seg = 0;
#endif //DOUBLY_LINKED_FL
exclusive_sync* gc_heap::bgc_alloc_lock;
#endif //BACKGROUND_GC

oom_history gc_heap::oom_info;

int         gc_heap::oomhist_index_per_heap = 0;

oom_history gc_heap::oomhist_per_heap[max_oom_history_count];

fgm_history gc_heap::fgm_result;

size_t      gc_heap::allocated_since_last_gc[total_oh_count];

#ifndef USE_REGIONS
BOOL        gc_heap::ro_segments_in_range = FALSE;
uint8_t*    gc_heap::ephemeral_low;
uint8_t*    gc_heap::ephemeral_high;
BOOL        gc_heap::ephemeral_promotion;
uint8_t*    gc_heap::saved_ephemeral_plan_start[ephemeral_generation_count];
size_t      gc_heap::saved_ephemeral_plan_start_size[ephemeral_generation_count];
#endif //!USE_REGIONS

uint8_t*    gc_heap::lowest_address;

uint8_t*    gc_heap::highest_address;

short*      gc_heap::brick_table;

uint32_t*   gc_heap::card_table;

#ifdef CARD_BUNDLE
uint32_t*   gc_heap::card_bundle_table;
#endif //CARD_BUNDLE

uint8_t*    gc_heap::gc_low = 0;

uint8_t*    gc_heap::gc_high = 0;

#ifndef USE_REGIONS
uint8_t*    gc_heap::demotion_low;

uint8_t*    gc_heap::demotion_high;

uint8_t*    gc_heap::last_gen1_pin_end;
#endif //!USE_REGIONS

gen_to_condemn_tuning gc_heap::gen_to_condemn_reasons;

size_t      gc_heap::etw_allocation_running_amount[total_oh_count];

uint64_t    gc_heap::total_alloc_bytes_soh = 0;

uint64_t    gc_heap::total_alloc_bytes_uoh = 0;

int         gc_heap::gc_policy = 0;

uint64_t    gc_heap::allocation_running_time;

size_t      gc_heap::allocation_running_amount;

heap_segment* gc_heap::ephemeral_heap_segment = 0;

#ifdef USE_REGIONS
#ifdef STRESS_REGIONS
OBJECTHANDLE* gc_heap::pinning_handles_for_alloc = 0;
int         gc_heap::ph_index_per_heap = 0;
int         gc_heap::pinning_seg_interval = 2;
size_t      gc_heap::num_gen0_regions = 0;
int         gc_heap::sip_seg_interval = 0;
int         gc_heap::sip_seg_maxgen_interval = 0;
size_t      gc_heap::num_condemned_regions = 0;
#endif //STRESS_REGIONS

region_free_list gc_heap::free_regions[count_free_region_kinds];

int         gc_heap::num_regions_freed_in_sweep = 0;

int         gc_heap::regions_per_gen[max_generation + 1];

int         gc_heap::planned_regions_per_gen[max_generation + 1];

int         gc_heap::sip_maxgen_regions_per_gen[max_generation + 1];

heap_segment* gc_heap::reserved_free_regions_sip[max_generation];

int         gc_heap::new_gen0_regions_in_plns = 0;
int         gc_heap::new_regions_in_prr = 0;
int         gc_heap::new_regions_in_threading = 0;

size_t      gc_heap::end_gen0_region_space = 0;

size_t      gc_heap::end_gen0_region_committed_space = 0;

size_t      gc_heap::gen0_pinned_free_space = 0;

bool        gc_heap::gen0_large_chunk_found = false;

size_t*     gc_heap::survived_per_region = nullptr;

size_t*     gc_heap::old_card_survived_per_region = nullptr;
#endif //USE_REGIONS

BOOL        gc_heap::blocking_collection = FALSE;

heap_segment* gc_heap::freeable_uoh_segment = 0;

uint64_t    gc_heap::time_bgc_last = 0;

size_t      gc_heap::mark_stack_tos = 0;

size_t      gc_heap::mark_stack_bos = 0;

size_t      gc_heap::mark_stack_array_length = 0;

mark*       gc_heap::mark_stack_array = 0;

#if defined (_DEBUG) && defined (VERIFY_HEAP)
BOOL        gc_heap::verify_pinned_queue_p = FALSE;
#endif //_DEBUG && VERIFY_HEAP

uint8_t*    gc_heap::oldest_pinned_plug = 0;

size_t      gc_heap::num_pinned_objects = 0;

#ifdef FEATURE_LOH_COMPACTION
size_t      gc_heap::loh_pinned_queue_tos = 0;

size_t      gc_heap::loh_pinned_queue_bos = 0;

size_t      gc_heap::loh_pinned_queue_length = 0;

mark*       gc_heap::loh_pinned_queue = 0;

BOOL        gc_heap::loh_compacted_p = FALSE;
#endif //FEATURE_LOH_COMPACTION

#ifdef BACKGROUND_GC

EEThreadId  gc_heap::bgc_thread_id;

uint8_t*    gc_heap::background_written_addresses [array_size+2];

heap_segment* gc_heap::freeable_soh_segment = 0;

size_t      gc_heap::bgc_overflow_count = 0;

size_t      gc_heap::bgc_begin_uoh_size[uoh_generation_count] = {};
size_t      gc_heap::bgc_uoh_current_size[uoh_generation_count] = {};
size_t      gc_heap::end_uoh_size[uoh_generation_count] = {};

size_t      gc_heap::uoh_a_no_bgc[uoh_generation_count] = {};
size_t      gc_heap::uoh_a_bgc_marking[uoh_generation_count] = {};
size_t      gc_heap::uoh_a_bgc_planning[uoh_generation_count] = {};
#ifdef BGC_SERVO_TUNING
size_t      gc_heap::bgc_maxgen_end_fl_size = 0;
#endif //BGC_SERVO_TUNING

size_t      gc_heap::background_soh_size_end_mark = 0;

size_t      gc_heap::background_soh_alloc_count = 0;

uint8_t**   gc_heap::background_mark_stack_tos = 0;

uint8_t**   gc_heap::background_mark_stack_array = 0;

size_t      gc_heap::background_mark_stack_array_length = 0;

BOOL        gc_heap::processed_eph_overflow_p = FALSE;

#ifdef USE_REGIONS
BOOL        gc_heap::background_overflow_p = FALSE;
#else //USE_REGIONS
uint8_t*    gc_heap::background_min_overflow_address =0;

uint8_t*    gc_heap::background_max_overflow_address =0;

uint8_t*    gc_heap::background_min_soh_overflow_address =0;

uint8_t*    gc_heap::background_max_soh_overflow_address =0;

heap_segment* gc_heap::saved_overflow_ephemeral_seg = 0;

heap_segment* gc_heap::saved_sweep_ephemeral_seg = 0;

uint8_t*    gc_heap::saved_sweep_ephemeral_start = 0;
#endif //USE_REGIONS

Thread*     gc_heap::bgc_thread = 0;

uint8_t**   gc_heap::c_mark_list = 0;

size_t      gc_heap::c_mark_list_length = 0;

size_t      gc_heap::c_mark_list_index = 0;

gc_history_per_heap gc_heap::bgc_data_per_heap;

BOOL    gc_heap::bgc_thread_running;

CLRCriticalSection gc_heap::bgc_threads_timeout_cs;

#endif //BACKGROUND_GC

uint8_t**   gc_heap::mark_list;
uint8_t**   gc_heap::mark_list_index;
uint8_t**   gc_heap::mark_list_end;

#ifdef SNOOP_STATS
snoop_stats_data gc_heap::snoop_stat;
#endif //SNOOP_STATS

uint8_t*    gc_heap::min_overflow_address = MAX_PTR;

uint8_t*    gc_heap::max_overflow_address = 0;

uint8_t*    gc_heap::shigh = 0;

uint8_t*    gc_heap::slow = MAX_PTR;

#ifndef USE_REGIONS
size_t      gc_heap::ordered_free_space_indices[MAX_NUM_BUCKETS];

size_t      gc_heap::saved_ordered_free_space_indices[MAX_NUM_BUCKETS];

size_t      gc_heap::ordered_plug_indices[MAX_NUM_BUCKETS];

size_t      gc_heap::saved_ordered_plug_indices[MAX_NUM_BUCKETS];

BOOL        gc_heap::ordered_plug_indices_init = FALSE;

BOOL        gc_heap::use_bestfit = FALSE;

uint8_t*    gc_heap::bestfit_first_pin = 0;

BOOL        gc_heap::commit_end_of_seg = FALSE;

size_t      gc_heap::max_free_space_items = 0;

size_t      gc_heap::free_space_buckets = 0;

size_t      gc_heap::free_space_items = 0;

int         gc_heap::trimmed_free_space_index = 0;

size_t      gc_heap::total_ephemeral_plugs = 0;

seg_free_spaces* gc_heap::bestfit_seg = 0;

size_t      gc_heap::total_ephemeral_size = 0;
#endif //!USE_REGIONS

#ifdef HEAP_ANALYZE

size_t      gc_heap::internal_root_array_length = initial_internal_roots;

uint8_t**   gc_heap::internal_root_array = 0;

size_t      gc_heap::internal_root_array_index = 0;

BOOL        gc_heap::heap_analyze_success = TRUE;

uint8_t*    gc_heap::current_obj = 0;
size_t      gc_heap::current_obj_size = 0;

#endif //HEAP_ANALYZE

#ifdef GC_CONFIG_DRIVEN
size_t gc_heap::interesting_data_per_gc[max_idp_count];
//size_t gc_heap::interesting_data_per_heap[max_idp_count];
//size_t gc_heap::interesting_mechanisms_per_heap[max_im_count];
#endif //GC_CONFIG_DRIVEN
#endif //MULTIPLE_HEAPS

no_gc_region_info gc_heap::current_no_gc_region_info;
FinalizerWorkItem* gc_heap::finalizer_work;
BOOL gc_heap::proceed_with_gc_p = FALSE;
GCSpinLock gc_heap::gc_lock;

#ifdef FEATURE_JAVAMARSHAL
uint8_t**   gc_heap::global_bridge_list;
size_t      gc_heap::num_global_bridge_objs;
#endif //FEATURE_JAVAMARSHAL

#ifdef BACKGROUND_GC
uint64_t gc_heap::total_uoh_a_last_bgc = 0;
#endif //BACKGROUND_GC

#ifdef USE_REGIONS
region_free_list gc_heap::global_regions_to_decommit[count_free_region_kinds];
region_free_list gc_heap::global_free_huge_regions;
#else //USE_REGIONS
size_t gc_heap::eph_gen_starts_size = 0;
heap_segment* gc_heap::segment_standby_list;
#endif //USE_REGIONS
bool          gc_heap::use_large_pages_p = 0;
#ifdef HEAP_BALANCE_INSTRUMENTATION
size_t        gc_heap::last_gc_end_time_us = 0;
#endif //HEAP_BALANCE_INSTRUMENTATION
#ifdef USE_REGIONS
bool          gc_heap::enable_special_regions_p = false;
#else //USE_REGIONS
size_t        gc_heap::min_segment_size = 0;
size_t        gc_heap::min_uoh_segment_size = 0;
#endif //!USE_REGIONS
size_t        gc_heap::min_segment_size_shr = 0;
size_t        gc_heap::soh_segment_size = 0;
size_t        gc_heap::segment_info_size = 0;

#ifdef GC_CONFIG_DRIVEN
size_t gc_heap::compact_or_sweep_gcs[2];
#endif //GC_CONFIG_DRIVEN

#ifdef FEATURE_LOH_COMPACTION
BOOL                   gc_heap::loh_compaction_always_p = FALSE;
gc_loh_compaction_mode gc_heap::loh_compaction_mode = loh_compaction_default;
#endif //FEATURE_LOH_COMPACTION

GCEvent gc_heap::full_gc_approach_event;

GCEvent gc_heap::full_gc_end_event;

uint32_t gc_heap::fgn_loh_percent = 0;

#ifdef BACKGROUND_GC
BOOL gc_heap::fgn_last_gc_was_concurrent = FALSE;
#endif //BACKGROUND_GC

VOLATILE(bool) gc_heap::full_gc_approach_event_set;

size_t gc_heap::full_gc_counts[gc_type_max];

bool gc_heap::maxgen_size_inc_p = false;

#ifndef USE_REGIONS
BOOL gc_heap::should_expand_in_full_gc = FALSE;
#endif //!USE_REGIONS

#ifdef DYNAMIC_HEAP_COUNT
int gc_heap::dynamic_adaptation_mode = dynamic_adaptation_default;
gc_heap::dynamic_heap_count_data_t SVR::gc_heap::dynamic_heap_count_data;
size_t gc_heap::current_total_soh_stable_size = 0;
uint64_t gc_heap::last_suspended_end_time = 0;
uint64_t gc_heap::change_heap_count_time = 0;
uint64_t gc_heap::total_change_heap_count = 0;
uint64_t gc_heap::total_change_heap_count_time = 0;
size_t gc_heap::gc_index_full_gc_end = 0;
uint64_t gc_heap::before_distribute_free_regions_time = 0;
bool gc_heap::trigger_initial_gen2_p = false;

#ifdef BACKGROUND_GC
bool gc_heap::trigger_bgc_for_rethreading_p = false;
int gc_heap::total_bgc_threads = 0;
int gc_heap::last_bgc_n_heaps = 0;
int gc_heap::last_total_bgc_threads = 0;
#endif //BACKGROUND_GC

#ifdef STRESS_DYNAMIC_HEAP_COUNT
int gc_heap::heaps_in_this_gc = 0;
int gc_heap::bgc_to_ngc2_ratio = 0;
#endif //STRESS_DYNAMIC_HEAP_COUNT
#endif // DYNAMIC_HEAP_COUNT

// Provisional mode related stuff.
bool gc_heap::provisional_mode_triggered = false;
bool gc_heap::pm_trigger_full_gc = false;
size_t gc_heap::provisional_triggered_gc_count = 0;
size_t gc_heap::provisional_off_gc_count = 0;
size_t gc_heap::num_provisional_triggered = 0;
bool   gc_heap::pm_stress_on = false;

#ifdef HEAP_ANALYZE
BOOL        gc_heap::heap_analyze_enabled = FALSE;
#endif //HEAP_ANALYZE

#ifndef MULTIPLE_HEAPS

alloc_list gc_heap::gen2_alloc_list[NUM_GEN2_ALIST - 1];
alloc_list gc_heap::loh_alloc_list [NUM_LOH_ALIST - 1];
alloc_list gc_heap::poh_alloc_list [NUM_POH_ALIST - 1];

#ifdef DOUBLY_LINKED_FL
// size we removed with no undo; only for recording purpose
size_t gc_heap::gen2_removed_no_undo = 0;
size_t gc_heap::saved_pinned_plug_index = INVALID_SAVED_PINNED_PLUG_INDEX;
#endif //DOUBLY_LINKED_FL

#ifdef FEATURE_EVENT_TRACE
etw_bucket_info gc_heap::bucket_info[NUM_GEN2_ALIST];
#endif //FEATURE_EVENT_TRACE

dynamic_data gc_heap::dynamic_data_table [total_generation_count];
gc_history_per_heap gc_heap::gc_data_per_heap;
size_t gc_heap::total_promoted_bytes = 0;
size_t gc_heap::finalization_promoted_bytes = 0;
size_t gc_heap::maxgen_pinned_compact_before_advance = 0;

uint8_t* gc_heap::alloc_allocated = 0;

size_t gc_heap::allocation_quantum = CLR_SIZE;

GCSpinLock gc_heap::more_space_lock_soh;
GCSpinLock gc_heap::more_space_lock_uoh;

#ifdef BACKGROUND_GC
VOLATILE(int32_t) gc_heap::uoh_alloc_thread_count = 0;
#endif //BACKGROUND_GC

#ifdef SYNCHRONIZATION_STATS
unsigned int gc_heap::good_suspension = 0;
unsigned int gc_heap::bad_suspension = 0;
uint64_t     gc_heap::total_msl_acquire = 0;
unsigned int gc_heap::num_msl_acquired = 0;
unsigned int gc_heap::num_high_msl_acquire = 0;
unsigned int gc_heap::num_low_msl_acquire = 0;
#endif //SYNCHRONIZATION_STATS

size_t   gc_heap::alloc_contexts_used = 0;
size_t   gc_heap::soh_allocation_no_gc = 0;
size_t   gc_heap::loh_allocation_no_gc = 0;
bool     gc_heap::no_gc_oom_p = false;
heap_segment* gc_heap::saved_loh_segment_no_gc = 0;

#endif //MULTIPLE_HEAPS

#ifndef MULTIPLE_HEAPS

BOOL        gc_heap::gen0_bricks_cleared = FALSE;

int         gc_heap::gen0_must_clear_bricks = 0;

#ifdef FEATURE_PREMORTEM_FINALIZATION
CFinalize*  gc_heap::finalize_queue = 0;
#endif // FEATURE_PREMORTEM_FINALIZATION

#ifdef FEATURE_CARD_MARKING_STEALING
VOLATILE(uint32_t) gc_heap::card_mark_chunk_index_soh;
VOLATILE(bool) gc_heap::card_mark_done_soh;
VOLATILE(uint32_t) gc_heap::card_mark_chunk_index_loh;
VOLATILE(uint32_t) gc_heap::card_mark_chunk_index_poh;
VOLATILE(bool) gc_heap::card_mark_done_uoh;
#endif // FEATURE_CARD_MARKING_STEALING

generation gc_heap::generation_table [total_generation_count];

size_t     gc_heap::interesting_data_per_heap[max_idp_count];

size_t     gc_heap::compact_reasons_per_heap[max_compact_reasons_count];

size_t     gc_heap::expand_mechanisms_per_heap[max_expand_mechanisms_count];

size_t     gc_heap::interesting_mechanism_bits_per_heap[max_gc_mechanism_bits_count];

mark_queue_t gc_heap::mark_queue;

#ifdef USE_REGIONS
bool gc_heap::special_sweep_p = false;
#endif //USE_REGIONS

int gc_heap::loh_pinned_queue_decay = LOH_PIN_DECAY;

#endif // MULTIPLE_HEAPS

/* end of per heap static initialization */

#ifdef USE_REGIONS
const size_t uninitialized_end_gen0_region_space = (size_t)(-1);
#endif //USE_REGIONS

// budget smoothing
size_t     gc_heap::smoothed_desired_total[total_generation_count];
/* end of static initialization */

// This is for methods that need to iterate through all SOH heap segments/regions.
inline
int get_start_generation_index()
{
#ifdef USE_REGIONS
    return 0;
#else
    return max_generation;
#endif //USE_REGIONS
}

inline
int get_stop_generation_index (int condemned_gen_number)
{
#ifdef USE_REGIONS
    return 0;
#else
    return condemned_gen_number;
#endif //USE_REGIONS
}

void gen_to_condemn_tuning::print (int heap_num)
{
#ifdef DT_LOG
    dprintf (DT_LOG_0, ("condemned reasons (%d %d)", condemn_reasons_gen, condemn_reasons_condition));
    dprintf (DT_LOG_0, ("%s", record_condemn_reasons_gen_header));
    gc_condemn_reason_gen r_gen;
    for (int i = 0; i < gcrg_max; i++)
    {
        r_gen = (gc_condemn_reason_gen)(i);
        str_reasons_gen[i * 2] = get_gen_char (get_gen (r_gen));
    }
    dprintf (DT_LOG_0, ("[%2d]%s", heap_num, str_reasons_gen));

    dprintf (DT_LOG_0, ("%s", record_condemn_reasons_condition_header));
    gc_condemn_reason_condition r_condition;
    for (int i = 0; i < gcrc_max; i++)
    {
        r_condition = (gc_condemn_reason_condition)(i);
        str_reasons_condition[i * 2] = get_condition_char (get_condition (r_condition));
    }

    dprintf (DT_LOG_0, ("[%2d]%s", heap_num, str_reasons_condition));
#else
    UNREFERENCED_PARAMETER(heap_num);
#endif //DT_LOG
}

void gc_generation_data::print (int heap_num, int gen_num)
{
#if defined(SIMPLE_DPRINTF) && defined(DT_LOG)
    dprintf (DT_LOG_0, ("[%2d]gen%d beg %zd fl %zd fo %zd end %zd fl %zd fo %zd in %zd p %zd np %zd alloc %zd",
                heap_num, gen_num,
                size_before,
                free_list_space_before, free_obj_space_before,
                size_after,
                free_list_space_after, free_obj_space_after,
                in, pinned_surv, npinned_surv,
                new_allocation));
#else
    UNREFERENCED_PARAMETER(heap_num);
    UNREFERENCED_PARAMETER(gen_num);
#endif //SIMPLE_DPRINTF && DT_LOG
}

void gc_history_per_heap::set_mechanism (gc_mechanism_per_heap mechanism_per_heap, uint32_t value)
{
    uint32_t* mechanism = &mechanisms[mechanism_per_heap];
    *mechanism = 0;
    *mechanism |= mechanism_mask;
    *mechanism |= (1 << value);

#ifdef DT_LOG
    gc_mechanism_descr* descr = &gc_mechanisms_descr[mechanism_per_heap];
    dprintf (DT_LOG_0, ("setting %s: %s",
            descr->name,
            (descr->descr)[value]));
#endif //DT_LOG
}

void gc_history_per_heap::print()
{
#if defined(SIMPLE_DPRINTF) && defined(DT_LOG)
    for (int i = 0; i < (sizeof (gen_data)/sizeof (gc_generation_data)); i++)
    {
        gen_data[i].print (heap_index, i);
    }

    dprintf (DT_LOG_0, ("fla %zd flr %zd esa %zd ca %zd pa %zd paa %zd, rfle %d, ec %zd",
                    maxgen_size_info.free_list_allocated,
                    maxgen_size_info.free_list_rejected,
                    maxgen_size_info.end_seg_allocated,
                    maxgen_size_info.condemned_allocated,
                    maxgen_size_info.pinned_allocated,
                    maxgen_size_info.pinned_allocated_advance,
                    maxgen_size_info.running_free_list_efficiency,
                    extra_gen0_committed));

    int mechanism = 0;
    gc_mechanism_descr* descr = 0;

    for (int i = 0; i < max_mechanism_per_heap; i++)
    {
        mechanism = get_mechanism ((gc_mechanism_per_heap)i);

        if (mechanism >= 0)
        {
            descr = &gc_mechanisms_descr[(gc_mechanism_per_heap)i];
            dprintf (DT_LOG_0, ("[%2d]%s%s",
                        heap_index,
                        descr->name,
                        (descr->descr)[mechanism]));
        }
    }
#endif //SIMPLE_DPRINTF && DT_LOG
}

void gc_history_global::print()
{
#ifdef DT_LOG
    char str_settings[64];
    memset (str_settings, '|', sizeof (char) * 64);
    str_settings[max_global_mechanisms_count*2] = 0;

    for (int i = 0; i < max_global_mechanisms_count; i++)
    {
        str_settings[i * 2] = (get_mechanism_p ((gc_global_mechanism_p)i) ? 'Y' : 'N');
    }

    dprintf (DT_LOG_0, ("[hp]|c|p|o|d|b|e|"));

    dprintf (DT_LOG_0, ("%4d|%s", num_heaps, str_settings));
    dprintf (DT_LOG_0, ("Condemned gen%d(reason: %s; mode: %s), youngest budget %zd(%d), memload %d",
                        condemned_generation,
                        str_gc_reasons[reason],
                        str_gc_pause_modes[pause_mode],
                        final_youngest_desired,
                        gen0_reduction_count,
                        mem_pressure));
#endif //DT_LOG
}

uint32_t limit_time_to_uint32 (uint64_t time)
{
    time = min (time, (uint64_t)UINT32_MAX);
    return (uint32_t)time;
}

void gc_heap::fire_per_heap_hist_event (gc_history_per_heap* current_gc_data_per_heap, int heap_num)
{
    maxgen_size_increase* maxgen_size_info = &(current_gc_data_per_heap->maxgen_size_info);
    FIRE_EVENT(GCPerHeapHistory_V3,
               (void *)(maxgen_size_info->free_list_allocated),
               (void *)(maxgen_size_info->free_list_rejected),
               (void *)(maxgen_size_info->end_seg_allocated),
               (void *)(maxgen_size_info->condemned_allocated),
               (void *)(maxgen_size_info->pinned_allocated),
               (void *)(maxgen_size_info->pinned_allocated_advance),
               maxgen_size_info->running_free_list_efficiency,
               current_gc_data_per_heap->gen_to_condemn_reasons.get_reasons0(),
               current_gc_data_per_heap->gen_to_condemn_reasons.get_reasons1(),
               current_gc_data_per_heap->mechanisms[gc_heap_compact],
               current_gc_data_per_heap->mechanisms[gc_heap_expand],
               current_gc_data_per_heap->heap_index,
               (void *)(current_gc_data_per_heap->extra_gen0_committed),
               total_generation_count,
               (uint32_t)(sizeof (gc_generation_data)),
               (void *)&(current_gc_data_per_heap->gen_data[0]));

    current_gc_data_per_heap->print();
    current_gc_data_per_heap->gen_to_condemn_reasons.print (heap_num);
}

void gc_heap::fire_pevents()
{
    gc_history_global* current_gc_data_global = get_gc_data_global();

    settings.record (current_gc_data_global);
    current_gc_data_global->print();

#ifdef FEATURE_EVENT_TRACE
    if (!informational_event_enabled_p) return;

    uint32_t count_time_info = (settings.concurrent ? max_bgc_time_type :
                                (settings.compaction ? max_compact_time_type : max_sweep_time_type));

#ifdef BACKGROUND_GC
    uint64_t* time_info = (settings.concurrent ? bgc_time_info : gc_time_info);
#else
    uint64_t* time_info = gc_time_info;
#endif //BACKGROUND_GC
    // We don't want to have to fire the time info as 64-bit integers as there's no need to
    // so compress them down to 32-bit ones.
    uint32_t* time_info_32 = (uint32_t*)time_info;
    for (uint32_t i = 0; i < count_time_info; i++)
    {
        time_info_32[i] = limit_time_to_uint32 (time_info[i]);
    }

    FIRE_EVENT(GCGlobalHeapHistory_V4,
               current_gc_data_global->final_youngest_desired,
               current_gc_data_global->num_heaps,
               current_gc_data_global->condemned_generation,
               current_gc_data_global->gen0_reduction_count,
               current_gc_data_global->reason,
               current_gc_data_global->global_mechanisms_p,
               current_gc_data_global->pause_mode,
               current_gc_data_global->mem_pressure,
               current_gc_data_global->gen_to_condemn_reasons.get_reasons0(),
               current_gc_data_global->gen_to_condemn_reasons.get_reasons1(),
               count_time_info,
               (uint32_t)(sizeof (uint32_t)),
               (void*)time_info_32);

#ifdef MULTIPLE_HEAPS
    for (int i = 0; i < gc_heap::n_heaps; i++)
    {
        gc_heap* hp = gc_heap::g_heaps[i];
        gc_history_per_heap* current_gc_data_per_heap = hp->get_gc_data_per_heap();
        fire_per_heap_hist_event (current_gc_data_per_heap, hp->heap_number);
    }
#else
    gc_history_per_heap* current_gc_data_per_heap = get_gc_data_per_heap();
    fire_per_heap_hist_event (current_gc_data_per_heap, heap_number);
#endif //MULTIPLE_HEAPS

#ifdef FEATURE_LOH_COMPACTION
    if (!settings.concurrent && settings.loh_compaction)
    {
        // Not every heap will compact LOH, the ones that didn't will just have 0s
        // in its info.
        FIRE_EVENT(GCLOHCompact,
                   (uint16_t)get_num_heaps(),
                   (uint32_t)(sizeof (etw_loh_compact_info)),
                   (void *)loh_compact_info);
    }
#endif //FEATURE_LOH_COMPACTION
#endif //FEATURE_EVENT_TRACE
}

// This fires the amount of total committed in use, in free and on the decommit list.
// It's fired on entry and exit of each blocking GC and on entry of each BGC (not firing this on exit of a GC
// because EE is not suspended then. On entry it's fired after the GCStart event, on exit it's fire before the GCStop event.
void gc_heap::fire_committed_usage_event()
{
#ifdef FEATURE_EVENT_TRACE
    if (!EVENT_ENABLED (GCMarkWithType)) return;

    size_t total_committed = 0;
    size_t committed_decommit = 0;
    size_t committed_free = 0;
    size_t committed_bookkeeping = 0;
    size_t new_current_total_committed;
    size_t new_current_total_committed_bookkeeping;
    size_t new_committed_by_oh[recorded_committed_bucket_counts];
    compute_committed_bytes(total_committed, committed_decommit, committed_free,
                            committed_bookkeeping, new_current_total_committed, new_current_total_committed_bookkeeping,
                            new_committed_by_oh);

    size_t total_committed_in_use = new_committed_by_oh[soh] + new_committed_by_oh[loh] + new_committed_by_oh[poh];
#ifdef USE_REGIONS
    size_t total_committed_in_global_decommit = committed_decommit;
    size_t total_committed_in_free = committed_free;
    size_t total_committed_in_global_free = new_committed_by_oh[recorded_committed_free_bucket] - total_committed_in_free - total_committed_in_global_decommit;
#else
    assert (committed_decommit == 0);
    assert (committed_free == 0);
    size_t total_committed_in_global_decommit = 0;
    size_t total_committed_in_free = 0;
    size_t total_committed_in_global_free = 0;
    // For segments, bookkeeping committed does not include mark array
#endif //USE_REGIONS
    size_t total_bookkeeping_committed = committed_bookkeeping;

    GCEventFireCommittedUsage_V1 (
        (uint64_t)total_committed_in_use,
        (uint64_t)total_committed_in_global_decommit,
        (uint64_t)total_committed_in_free,
        (uint64_t)total_committed_in_global_free,
        (uint64_t)total_bookkeeping_committed
    );
#endif //FEATURE_EVENT_TRACE
}

inline BOOL
gc_heap::dt_low_ephemeral_space_p (gc_tuning_point tp)
{
    BOOL ret = FALSE;

    switch (tp)
    {
        case tuning_deciding_condemned_gen:
#ifndef USE_REGIONS
        case tuning_deciding_compaction:
        case tuning_deciding_expansion:
#endif //USE_REGIONS
        case tuning_deciding_full_gc:
        {
            ret = (!ephemeral_gen_fit_p (tp));
            break;
        }
#ifndef USE_REGIONS
        case tuning_deciding_promote_ephemeral:
        {
            size_t new_gen0size = approximate_new_allocation();
            ptrdiff_t plan_ephemeral_size = total_ephemeral_size;

            dprintf (GTC_LOG, ("h%d: plan eph size is %zd, new gen0 is %zd",
                heap_number, plan_ephemeral_size, new_gen0size));
            // If we were in no_gc_region we could have allocated a larger than normal segment,
            // and the next seg we allocate will be a normal sized seg so if we can't fit the new
            // ephemeral generations there, do an ephemeral promotion.
            ret = ((soh_segment_size - segment_info_size) < (plan_ephemeral_size + new_gen0size));
            break;
        }
#endif //USE_REGIONS
        default:
        {
            assert (!"invalid tuning reason");
            break;
        }
    }

    return ret;
}

BOOL
gc_heap::dt_high_frag_p (gc_tuning_point tp,
                         int gen_number,
                         BOOL elevate_p)
{
    BOOL ret = FALSE;

    switch (tp)
    {
        case tuning_deciding_condemned_gen:
        {
            dynamic_data* dd = dynamic_data_of (gen_number);
            float fragmentation_burden = 0;

            if (elevate_p)
            {
                ret = (dd_fragmentation (dynamic_data_of (max_generation)) >= dd_max_size(dd));
                if (ret)
                {
                    dprintf (6666, ("h%d: frag is %zd, max size is %zd",
                        heap_number, dd_fragmentation (dd), dd_max_size(dd)));
                }
            }
            else
            {
#ifndef MULTIPLE_HEAPS
                if (gen_number == max_generation)
                {
                    size_t maxgen_size = generation_size (max_generation);
                    float frag_ratio = (maxgen_size ? ((float)dd_fragmentation (dynamic_data_of (max_generation)) / (float)maxgen_size) : 0.0f);
                    if (frag_ratio > 0.65)
                    {
                        dprintf (GTC_LOG, ("g2 FR: %d%%", (int)(frag_ratio*100)));
                        return TRUE;
                    }
                }
#endif //!MULTIPLE_HEAPS
                size_t fr = generation_unusable_fragmentation (generation_of (gen_number), heap_number);
                ret = (fr > dd_fragmentation_limit(dd));
                if (ret)
                {
                    size_t gen_size = generation_size (gen_number);
                    fragmentation_burden = (gen_size ? ((float)fr / (float)gen_size) : 0.0f);
                    ret = (fragmentation_burden > dd_v_fragmentation_burden_limit (dd));
                }
                if (ret)
                {
                    dprintf (6666, ("h%d: gen%d, frag is %zd, alloc effi: %zu%%, unusable frag is %zd, ratio is %d",
                        heap_number, gen_number, dd_fragmentation (dd),
                        generation_allocator_efficiency_percent (generation_of (gen_number)),
                        fr, (int)(fragmentation_burden * 100)));
                }
            }
            break;
        }
        default:
            break;
    }

    return ret;
}

inline BOOL
gc_heap::dt_estimate_reclaim_space_p (gc_tuning_point tp, int gen_number)
{
    BOOL ret = FALSE;

    switch (tp)
    {
        case tuning_deciding_condemned_gen:
        {
            if (gen_number == max_generation)
            {
                size_t est_maxgen_free = estimated_reclaim (gen_number);

                uint32_t num_heaps = 1;
#ifdef MULTIPLE_HEAPS
                num_heaps = gc_heap::n_heaps;
#endif //MULTIPLE_HEAPS

                size_t min_frag_th = min_reclaim_fragmentation_threshold (num_heaps);
                dprintf (GTC_LOG, ("h%d, min frag is %zd", heap_number, min_frag_th));
                ret = (est_maxgen_free >= min_frag_th);
            }
            else
            {
                assert (0);
            }
            break;
        }

        default:
            break;
    }

    return ret;
}

// DTREVIEW: Right now we only estimate gen2 fragmentation.
// on 64-bit though we should consider gen1 or even gen0 fragmentation as
// well
inline BOOL
gc_heap::dt_estimate_high_frag_p (gc_tuning_point tp, int gen_number, uint64_t available_mem)
{
    BOOL ret = FALSE;

    switch (tp)
    {
        case tuning_deciding_condemned_gen:
        {
            if (gen_number == max_generation)
            {
                dynamic_data* dd = dynamic_data_of (gen_number);
                float est_frag_ratio = 0;
                if (dd_current_size (dd) == 0)
                {
                    est_frag_ratio = 1;
                }
                else if ((dd_fragmentation (dd) == 0) || (dd_fragmentation (dd) + dd_current_size (dd) == 0))
                {
                    est_frag_ratio = 0;
                }
                else
                {
                    est_frag_ratio = (float)dd_fragmentation (dd) / (float)(dd_fragmentation (dd) + dd_current_size (dd));
                }

                size_t est_frag = (dd_fragmentation (dd) + (size_t)((dd_desired_allocation (dd) - dd_new_allocation (dd)) * est_frag_ratio));
                dprintf (GTC_LOG, ("h%d: gen%d: current_size is %zd, frag is %zd, est_frag_ratio is %d%%, estimated frag is %zd",
                    heap_number,
                    gen_number,
                    dd_current_size (dd),
                    dd_fragmentation (dd),
                    (int)(est_frag_ratio * 100),
                    est_frag));

                uint32_t num_heaps = 1;

#ifdef MULTIPLE_HEAPS
                num_heaps = gc_heap::n_heaps;
#endif //MULTIPLE_HEAPS
                uint64_t min_frag_th = min_high_fragmentation_threshold(available_mem, num_heaps);
                //dprintf (GTC_LOG, ("h%d, min frag is %zd", heap_number, min_frag_th));
                ret = (est_frag >= min_frag_th);
            }
            else
            {
                assert (0);
            }
            break;
        }

        default:
            break;
    }

    return ret;
}

inline BOOL
gc_heap::dt_low_card_table_efficiency_p (gc_tuning_point tp)
{
    BOOL ret = FALSE;

    switch (tp)
    {
    case tuning_deciding_condemned_gen:
    {
        /* promote into max-generation if the card table has too many
        * generation faults besides the n -> 0
        */
        ret = (generation_skip_ratio < generation_skip_ratio_threshold);
        break;
    }

    default:
        break;
    }

    return ret;
}

inline BOOL
gc_heap::dt_high_memory_load_p()
{
    return ((settings.entry_memory_load >= high_memory_load_th) || g_low_memory_status);
}

inline BOOL
in_range_for_segment(uint8_t* add, heap_segment* seg)
{
    return ((add >= heap_segment_mem (seg)) && (add < heap_segment_reserved (seg)));
}

#ifdef FEATURE_BASICFREEZE
// The array we allocate is organized as follows:
// 0th element is the address of the last array we allocated.
// starting from the 1st element are the segment addresses, that's
// what buckets() returns.
struct bk
{
    uint8_t* add;
    size_t val;
};

class sorted_table
{
private:
    ptrdiff_t size;
    ptrdiff_t count;
    bk* slots;
    bk* buckets() { return (slots + 1); }
    uint8_t*& last_slot (bk* arr) { return arr[0].add; }
    bk* old_slots;
public:
    static  sorted_table* make_sorted_table ();
    BOOL    insert (uint8_t* add, size_t val);;
    size_t  lookup (uint8_t*& add);
    void    remove (uint8_t* add);
    void    clear ();
    void    delete_sorted_table();
    void    delete_old_slots();
    void    enqueue_old_slot(bk* sl);
    BOOL    ensure_space_for_insert();
};

sorted_table*
sorted_table::make_sorted_table ()
{
    size_t size = 400;

    // allocate one more bk to store the older slot address.
    sorted_table* res = (sorted_table*)new (nothrow) char [sizeof (sorted_table) + (size + 1) * sizeof (bk)];
    if (!res)
        return 0;
    res->size = size;
    res->slots = (bk*)(res + 1);
    res->old_slots = 0;
    res->clear();
    return res;
}

void
sorted_table::delete_sorted_table()
{
    if (slots != (bk*)(this+1))
    {
        delete[] slots;
    }
    delete_old_slots();
}
void
sorted_table::delete_old_slots()
{
    uint8_t* sl = (uint8_t*)old_slots;
    while (sl)
    {
        uint8_t* dsl = sl;
        sl = last_slot ((bk*)sl);
        delete[] dsl;
    }
    old_slots = 0;
}
void
sorted_table::enqueue_old_slot(bk* sl)
{
    last_slot (sl) = (uint8_t*)old_slots;
    old_slots = sl;
}

inline
size_t
sorted_table::lookup (uint8_t*& add)
{
    ptrdiff_t high = (count-1);
    ptrdiff_t low = 0;
    ptrdiff_t ti;
    ptrdiff_t mid;
    bk* buck = buckets();
    while (low <= high)
    {
        mid = ((low + high)/2);
        ti = mid;
        if (buck[ti].add > add)
        {
            if ((ti > 0) && (buck[ti-1].add <= add))
            {
                add = buck[ti-1].add;
                return buck[ti - 1].val;
            }
            high = mid - 1;
        }
        else
        {
            if (buck[ti+1].add > add)
            {
                add = buck[ti].add;
                return buck[ti].val;
            }
            low = mid + 1;
        }
    }
    add = 0;
    return 0;
}

BOOL
sorted_table::ensure_space_for_insert()
{
    if (count == size)
    {
        size = (size * 3)/2;
        assert((size * sizeof (bk)) > 0);
        bk* res = (bk*)new (nothrow) char [(size + 1) * sizeof (bk)];
        assert (res);
        if (!res)
            return FALSE;

        last_slot (res) = 0;
        memcpy (((bk*)res + 1), buckets(), count * sizeof (bk));
        bk* last_old_slots = slots;
        slots = res;
        if (last_old_slots != (bk*)(this + 1))
            enqueue_old_slot (last_old_slots);
    }
    return TRUE;
}

BOOL
sorted_table::insert (uint8_t* add, size_t val)
{
    //grow if no more room
    assert (count < size);

    //insert sorted
    ptrdiff_t high = (count-1);
    ptrdiff_t low = 0;
    ptrdiff_t ti;
    ptrdiff_t mid;
    bk* buck = buckets();
    while (low <= high)
    {
        mid = ((low + high)/2);
        ti = mid;
        if (buck[ti].add > add)
        {
            if ((ti == 0) || (buck[ti-1].add <= add))
            {
                // found insertion point
                for (ptrdiff_t k = count; k > ti;k--)
                {
                    buck [k] = buck [k-1];
                }
                buck[ti].add = add;
                buck[ti].val = val;
                count++;
                return TRUE;
            }
            high = mid - 1;
        }
        else
        {
            if (buck[ti+1].add > add)
            {
                //found the insertion point
                for (ptrdiff_t k = count; k > ti+1;k--)
                {
                    buck [k] = buck [k-1];
                }
                buck[ti+1].add = add;
                buck[ti+1].val = val;
                count++;
                return TRUE;
            }
            low = mid + 1;
        }
    }
    assert (0);
    return TRUE;
}

void
sorted_table::remove (uint8_t* add)
{
    ptrdiff_t high = (count-1);
    ptrdiff_t low = 0;
    ptrdiff_t ti;
    ptrdiff_t mid;
    bk* buck = buckets();
    while (low <= high)
    {
        mid = ((low + high)/2);
        ti = mid;
        if (buck[ti].add > add)
        {
            if (buck[ti-1].add <= add)
            {
                for (ptrdiff_t k = ti; k < count; k++)
                    buck[k-1] = buck[k];
                count--;
                return;
            }
            high = mid - 1;
        }
        else
        {
            if (buck[ti+1].add > add)
            {
                for (ptrdiff_t k = ti+1; k < count; k++)
                    buck[k-1] = buck[k];
                count--;
                return;
            }
            low = mid + 1;
        }
    }
    assert (0);
}

void
sorted_table::clear()
{
    count = 1;
    buckets()[0].add = MAX_PTR;
}
#endif //FEATURE_BASICFREEZE

#ifdef USE_REGIONS
inline
size_t get_skewed_basic_region_index_for_address (uint8_t* address)
{
    assert ((g_gc_lowest_address <= address) && (address <= g_gc_highest_address));
    size_t skewed_basic_region_index = (size_t)address >> gc_heap::min_segment_size_shr;
    return skewed_basic_region_index;
}

inline
size_t get_basic_region_index_for_address (uint8_t* address)
{
    size_t skewed_basic_region_index = get_skewed_basic_region_index_for_address (address);
    return (skewed_basic_region_index - get_skewed_basic_region_index_for_address (g_gc_lowest_address));
}

// Go from a random address to its region info. The random address could be
// in one of the basic regions of a larger region so we need to check for that.
inline
heap_segment* get_region_info_for_address (uint8_t* address)
{
    size_t basic_region_index = (size_t)address >> gc_heap::min_segment_size_shr;
    heap_segment* basic_region_info_entry = (heap_segment*)&seg_mapping_table[basic_region_index];
    ptrdiff_t first_field = (ptrdiff_t)heap_segment_allocated (basic_region_info_entry);
    if (first_field < 0)
    {
        basic_region_index += first_field;
    }

    return ((heap_segment*)(&seg_mapping_table[basic_region_index]));
}

// Go from the physical start of a region to its region info.
inline
heap_segment* get_region_info (uint8_t* region_start)
{
    size_t region_index = (size_t)region_start >> gc_heap::min_segment_size_shr;
    heap_segment* region_info_entry = (heap_segment*)&seg_mapping_table[region_index];
    dprintf (REGIONS_LOG, ("region info for region %p is at %zd, %zx (alloc: %p)",
        region_start, region_index, (size_t)region_info_entry, heap_segment_allocated (region_info_entry)));
    return (heap_segment*)&seg_mapping_table[region_index];
}

// Go from the actual region info to its region start.
inline
uint8_t* get_region_start (heap_segment* region_info)
{
    uint8_t* obj_start = heap_segment_mem (region_info);
    return (obj_start - sizeof (aligned_plug_and_gap));
}

inline
size_t get_region_size (heap_segment* region_info)
{
    return (size_t)(heap_segment_reserved (region_info) - get_region_start (region_info));
}

inline
size_t get_region_committed_size (heap_segment* region)
{
    uint8_t* start = get_region_start (region);
    uint8_t* committed = heap_segment_committed (region);
    return committed - start;
}

inline bool is_free_region (heap_segment* region)
{
    return (heap_segment_allocated (region) == nullptr);
}

bool region_allocator::init (uint8_t* start, uint8_t* end, size_t alignment, uint8_t** lowest, uint8_t** highest)
{
    uint8_t* actual_start = start;
    region_alignment = alignment;
    large_region_alignment = LARGE_REGION_FACTOR * alignment;
    global_region_start = (uint8_t*)align_region_up ((size_t)actual_start);
    uint8_t* actual_end = end;
    global_region_end = (uint8_t*)align_region_down ((size_t)actual_end);
    global_region_left_used = global_region_start;
    global_region_right_used = global_region_end;
    num_left_used_free_units = 0;
    num_right_used_free_units = 0;

    // Note: I am allocating a map that covers the whole reserved range.
    // We can optimize it to only cover the current heap range.
    size_t total_num_units = (global_region_end - global_region_start) / region_alignment;
    total_free_units = (uint32_t)total_num_units;

    uint32_t* unit_map = new (nothrow) uint32_t[total_num_units];
    if (unit_map)
    {
        memset (unit_map, 0, sizeof (uint32_t) * total_num_units);
        region_map_left_start = unit_map;
        region_map_left_end = region_map_left_start;

        region_map_right_start = unit_map + total_num_units;
        region_map_right_end = region_map_right_start;

        dprintf (REGIONS_LOG, ("start: %zx, end: %zx, total %zdmb(alignment: %zdmb), map units %zd",
            (size_t)start, (size_t)end,
            (size_t)((end - start) / 1024 / 1024),
            (alignment / 1024 / 1024),
            total_num_units));

        *lowest = global_region_start;
        *highest = global_region_end;
    }
    else
    {
        log_init_error_to_host ("global region allocator failed to allocate %zd bytes during init", (total_num_units * sizeof (uint32_t)));
    }

    return (unit_map != 0);
}

inline
uint8_t* region_allocator::region_address_of (uint32_t* map_index)
{
    return (global_region_start + ((map_index - region_map_left_start) * region_alignment));
}

inline
uint32_t* region_allocator::region_map_index_of (uint8_t* address)
{
    return (region_map_left_start + ((address - global_region_start) / region_alignment));
}

void region_allocator::make_busy_block (uint32_t* index_start, uint32_t num_units)
{
#ifdef _DEBUG
    dprintf (REGIONS_LOG, ("MBB[B: %zd] %d->%d", (size_t)num_units, (int)(index_start - region_map_left_start), (int)(index_start - region_map_left_start + num_units)));
#endif //_DEBUG
    ASSERT_HOLDING_SPIN_LOCK (&region_allocator_lock);
    uint32_t* index_end = index_start + (num_units - 1);
    *index_start = *index_end = num_units;
}

void region_allocator::make_free_block (uint32_t* index_start, uint32_t num_units)
{
#ifdef _DEBUG
    dprintf (REGIONS_LOG, ("MFB[F: %zd] %d->%d", (size_t)num_units, (int)(index_start - region_map_left_start), (int)(index_start - region_map_left_start + num_units)));
#endif //_DEBUG
    ASSERT_HOLDING_SPIN_LOCK (&region_allocator_lock);
    uint32_t* index_end = index_start + (num_units - 1);
    *index_start = *index_end = region_alloc_free_bit | num_units;
}

void region_allocator::print_map (const char* msg)
{
    ASSERT_HOLDING_SPIN_LOCK (&region_allocator_lock);
#ifdef _DEBUG
    const char* heap_type = "UH";
    dprintf (REGIONS_LOG, ("[%s]-----printing----%s", heap_type, msg));

    uint32_t* current_index = region_map_left_start;
    uint32_t* end_index = region_map_left_end;
    uint32_t  count_free_units = 0;

    for (int i = 0; i < 2; i++)
    {
        while (current_index < end_index)
        {
            uint32_t current_val = *current_index;
            uint32_t current_num_units = get_num_units (current_val);
            bool free_p = is_unit_memory_free (current_val);

            dprintf (REGIONS_LOG, ("[%s][%s: %zd]%d->%d", heap_type, (free_p ? "F" : "B"), (size_t)current_num_units,
                (int)(current_index - region_map_left_start),
                (int)(current_index - region_map_left_start + current_num_units)));

            if (free_p)
            {
                count_free_units += current_num_units;
            }

            current_index += current_num_units;
        }
        current_index = region_map_right_start;
        end_index = region_map_right_end;
        if (i == 0)
        {
            assert (count_free_units == num_left_used_free_units);
        }
        else
        {
            assert (count_free_units == num_left_used_free_units + num_right_used_free_units);
        }
    }

    count_free_units += (uint32_t)(region_map_right_start - region_map_left_end);
    assert(count_free_units == total_free_units);

    uint32_t total_regions = (uint32_t)((global_region_end - global_region_start) / region_alignment);

    dprintf (REGIONS_LOG, ("[%s]-----end printing----[%d total, left used %zd (free: %d), right used %zd (free: %d)]\n", heap_type, total_regions,
        (region_map_left_end - region_map_left_start), num_left_used_free_units, (region_map_right_end - region_map_right_start), num_right_used_free_units));
#endif //_DEBUG
}

uint8_t* region_allocator::allocate_end (uint32_t num_units, allocate_direction direction)
{
    uint8_t* alloc = NULL;

    ASSERT_HOLDING_SPIN_LOCK (&region_allocator_lock);

    if (global_region_left_used < global_region_right_used)
    {
        size_t end_remaining = global_region_right_used - global_region_left_used;

        if ((end_remaining / region_alignment) >= num_units)
        {
            if (direction == allocate_forward)
            {
                make_busy_block (region_map_left_end, num_units);
                region_map_left_end += num_units;
                alloc = global_region_left_used;
                global_region_left_used += num_units * region_alignment;
            }
            else
            {
                assert(direction == allocate_backward);
                region_map_right_start -= num_units;
                make_busy_block (region_map_right_start, num_units);
                global_region_right_used -= num_units * region_alignment;
                alloc = global_region_right_used;
            }
        }
    }

    return alloc;
}

void region_allocator::enter_spin_lock()
{
    while (true)
    {
        if (Interlocked::CompareExchange(&region_allocator_lock.lock, 0, -1) < 0)
            break;

        while (region_allocator_lock.lock >= 0)
        {
            YieldProcessor();           // indicate to the processor that we are spinning
        }
    }
#ifdef _DEBUG
    region_allocator_lock.holding_thread = GCToEEInterface::GetThread();
#endif //_DEBUG
}

void region_allocator::leave_spin_lock()
{
#ifdef _DEBUG
    region_allocator_lock.holding_thread = (Thread*)-1;
#endif //_DEBUG
    region_allocator_lock.lock = -1;
}

uint8_t* region_allocator::allocate (uint32_t num_units, allocate_direction direction, region_allocator_callback_fn fn)
{
    enter_spin_lock();

    uint32_t* current_index;
    uint32_t* end_index;
    if (direction == allocate_forward)
    {
        current_index = region_map_left_start;
        end_index = region_map_left_end;
    }
    else
    {
        assert(direction == allocate_backward);
        current_index = region_map_right_end;
        end_index = region_map_right_start;
    }

    dprintf (REGIONS_LOG, ("searching %d->%d", (int)(current_index - region_map_left_start), (int)(end_index - region_map_left_start)));

    print_map ("before alloc");

    if (((direction == allocate_forward) && (num_left_used_free_units >= num_units)) ||
        ((direction == allocate_backward) && (num_right_used_free_units >= num_units)))
    {
        while (((direction == allocate_forward) && (current_index < end_index)) ||
            ((direction == allocate_backward) && (current_index > end_index)))
        {
            uint32_t current_val = *(current_index - ((direction == allocate_backward) ? 1 : 0));
            uint32_t current_num_units = get_num_units (current_val);
            bool free_p = is_unit_memory_free (current_val);
            dprintf (REGIONS_LOG, ("ALLOC[%s: %zd]%d->%d", (free_p ? "F" : "B"), (size_t)current_num_units,
                (int)(current_index - region_map_left_start), (int)(current_index + current_num_units - region_map_left_start)));

            if (free_p)
            {
                if (current_num_units >= num_units)
                {
                    dprintf (REGIONS_LOG, ("found %zd contiguous free units(%d->%d), sufficient",
                        (size_t)current_num_units,
                        (int)(current_index - region_map_left_start),
                        (int)(current_index - region_map_left_start + current_num_units)));

                    if (direction == allocate_forward)
                    {
                        assert (num_left_used_free_units >= num_units);
                        num_left_used_free_units -= num_units;
                    }
                    else
                    {
                        assert (direction == allocate_backward);
                        assert (num_right_used_free_units >= num_units);
                        num_right_used_free_units -= num_units;
                    }

                    uint32_t* busy_block;
                    uint32_t* free_block;
                    if (direction == 1)
                    {
                        busy_block = current_index;
                        free_block = current_index + num_units;
                    }
                    else
                    {
                        busy_block = current_index - num_units;
                        free_block = current_index - current_num_units;
                    }

                    make_busy_block (busy_block, num_units);
                    if ((current_num_units - num_units) > 0)
                    {
                        make_free_block (free_block, (current_num_units - num_units));
                    }

                    total_free_units -= num_units;
                    print_map ("alloc: found in free");

                    leave_spin_lock();

                    return region_address_of (busy_block);
                }
            }

            if (direction == allocate_forward)
            {
                current_index += current_num_units;
            }
            else
            {
                current_index -= current_num_units;
            }
        }
    }

    uint8_t* alloc = allocate_end (num_units, direction);

    if (alloc)
    {
        total_free_units -= num_units;
        if (fn != nullptr)
        {
            if (!fn (global_region_left_used))
            {
                delete_region_impl (alloc);
                alloc = nullptr;
            }
        }
        if (alloc)
        {
            print_map ("alloc: found at the end");
        }
    }
    else
    {
        dprintf (REGIONS_LOG, ("couldn't find memory at the end! only %zd bytes left", (global_region_right_used - global_region_left_used)));
    }

    leave_spin_lock();

    return alloc;
}

bool region_allocator::allocate_region (int gen_num, size_t size, uint8_t** start, uint8_t** end, allocate_direction direction, region_allocator_callback_fn fn)
{
    size_t alignment = region_alignment;
    size_t alloc_size = align_region_up (size);

    uint32_t num_units = (uint32_t)(alloc_size / alignment);
    bool ret = false;
    uint8_t* alloc = NULL;
    dprintf (REGIONS_LOG, ("----GET %u-----", num_units));

    alloc = allocate (num_units, direction, fn);
    *start = alloc;
    *end = alloc + alloc_size;
    ret = (alloc != NULL);

    gc_etw_segment_type segment_type;

    if (gen_num == loh_generation)
    {
        segment_type = gc_etw_segment_large_object_heap;
    }
    else if (gen_num == poh_generation)
    {
        segment_type = gc_etw_segment_pinned_object_heap;
    }
    else
    {
        segment_type = gc_etw_segment_small_object_heap;
    }

    FIRE_EVENT(GCCreateSegment_V1, (alloc + sizeof (aligned_plug_and_gap)),
                                  size - sizeof (aligned_plug_and_gap),
                                  segment_type);

    return ret;
}

bool region_allocator::allocate_basic_region (int gen_num, uint8_t** start, uint8_t** end, region_allocator_callback_fn fn)
{
    return allocate_region (gen_num, region_alignment, start, end, allocate_forward, fn);
}

// Large regions are 8x basic region sizes by default. If you need a larger region than that,
// call allocate_region with the size.
bool region_allocator::allocate_large_region (int gen_num, uint8_t** start, uint8_t** end, allocate_direction direction, size_t size, region_allocator_callback_fn fn)
{
    if (size == 0)
        size = large_region_alignment;
    else
    {
        // round up size to a multiple of large_region_alignment
        // for the below computation to work, large_region_alignment must be a power of 2
        assert (round_up_power2(large_region_alignment) == large_region_alignment);
        size = (size + (large_region_alignment - 1)) & ~(large_region_alignment - 1);
    }
    return allocate_region (gen_num, size, start, end, direction, fn);
}

// Whenever a region is deleted, it is expected that the memory and the mark array
// of the region is decommitted already.
void region_allocator::delete_region (uint8_t* region_start)
{
    enter_spin_lock();
    delete_region_impl (region_start);
    leave_spin_lock();
}

void region_allocator::delete_region_impl (uint8_t* region_start)
{
    ASSERT_HOLDING_SPIN_LOCK (&region_allocator_lock);
    assert (is_region_aligned (region_start));

    print_map ("before delete");

    uint32_t* current_index = region_map_index_of (region_start);
    uint32_t current_val = *current_index;
    assert (!is_unit_memory_free (current_val));

    dprintf (REGIONS_LOG, ("----DEL %d (%u units)-----", (*current_index - *region_map_left_start), current_val));
    uint32_t* region_end_index = current_index + current_val;
    uint8_t* region_end = region_address_of (region_end_index);

    int free_block_size = current_val;
    uint32_t* free_index = current_index;

    if (free_index <= region_map_left_end)
    {
        num_left_used_free_units += free_block_size;
    }
    else
    {
        assert (free_index >= region_map_right_start);
        num_right_used_free_units += free_block_size;
    }

    if ((current_index != region_map_left_start) && (current_index != region_map_right_start))
    {
        uint32_t previous_val = *(current_index - 1);
        if (is_unit_memory_free(previous_val))
        {
            uint32_t previous_size = get_num_units (previous_val);
            free_index -= previous_size;
            free_block_size += previous_size;
        }
    }
    if ((region_end != global_region_left_used) && (region_end != global_region_end))
    {
        uint32_t next_val = *region_end_index;
        if (is_unit_memory_free(next_val))
        {
            uint32_t next_size = get_num_units (next_val);
            free_block_size += next_size;
            region_end += next_size;
        }
    }
    if (region_end == global_region_left_used)
    {
        num_left_used_free_units -= free_block_size;
        region_map_left_end = free_index;
        dprintf (REGIONS_LOG, ("adjust global left used from %p to %p",
            global_region_left_used, region_address_of (free_index)));
        global_region_left_used = region_address_of (free_index);
    }
    else if (region_start == global_region_right_used)
    {
        num_right_used_free_units -= free_block_size;
        region_map_right_start = free_index + free_block_size;
        dprintf (REGIONS_LOG, ("adjust global right used from %p to %p",
            global_region_right_used, region_address_of (free_index + free_block_size)));
        global_region_right_used = region_address_of (free_index + free_block_size);
    }
    else
    {
        make_free_block (free_index, free_block_size);
    }

    total_free_units += current_val;

    print_map ("after delete");
}

void region_allocator::move_highest_free_regions (int64_t n, bool small_region_p, region_free_list to_free_list[count_free_region_kinds])
{
    assert (n > 0);

    uint32_t* current_index = region_map_left_end - 1;
    uint32_t* lowest_index = region_map_left_start;

    while (current_index >= lowest_index)
    {
        uint32_t current_val = *current_index;
        uint32_t current_num_units = get_num_units (current_val);
        bool free_p = is_unit_memory_free (current_val);
        if (!free_p && ((current_num_units == 1) == small_region_p))
        {
            uint32_t* index = current_index - (current_num_units - 1);
            heap_segment* region = get_region_info (region_address_of (index));
            if (is_free_region (region) && !region_free_list::is_on_free_list (region, to_free_list))
            {
                if (n >= current_num_units)
                {
                    n -= current_num_units;

                    region_free_list::unlink_region (region);

                    region_free_list::add_region (region, to_free_list);
                }
                else
                {
                    break;
                }
            }
        }
        current_index -= current_num_units;
    }
}
#endif //USE_REGIONS

inline
uint8_t* align_on_segment (uint8_t* add)
{
    return (uint8_t*)((size_t)(add + (((size_t)1 << gc_heap::min_segment_size_shr) - 1)) & ~(((size_t)1 << gc_heap::min_segment_size_shr) - 1));
}

inline
uint8_t* align_lower_segment (uint8_t* add)
{
    return (uint8_t*)((size_t)(add) & ~(((size_t)1 << gc_heap::min_segment_size_shr) - 1));
}

size_t size_seg_mapping_table_of (uint8_t* from, uint8_t* end)
{
    from = align_lower_segment (from);
    end = align_on_segment (end);
    dprintf (1, ("from: %p, end: %p, size: %zx", from, end,
        sizeof (seg_mapping)*(((size_t)(end - from) >> gc_heap::min_segment_size_shr))));
    return (sizeof (seg_mapping)*((size_t)(end - from) >> gc_heap::min_segment_size_shr));
}

size_t size_region_to_generation_table_of (uint8_t* from, uint8_t* end)
{
    dprintf (1, ("from: %p, end: %p, size: %zx", from, end,
        sizeof (uint8_t)*(((size_t)(end - from) >> gc_heap::min_segment_size_shr))));
    return sizeof (uint8_t)*((size_t)(end - from) >> gc_heap::min_segment_size_shr);
}

inline
size_t seg_mapping_word_of (uint8_t* add)
{
    return (size_t)add >> gc_heap::min_segment_size_shr;
}

#ifdef FEATURE_BASICFREEZE
inline
size_t ro_seg_begin_index (heap_segment* seg)
{
#ifdef USE_REGIONS
    size_t begin_index = (size_t)heap_segment_mem (seg) >> gc_heap::min_segment_size_shr;
#else
    size_t begin_index = (size_t)seg >> gc_heap::min_segment_size_shr;
#endif //USE_REGIONS
    begin_index = max (begin_index, (size_t)g_gc_lowest_address >> gc_heap::min_segment_size_shr);
    return begin_index;
}

inline
size_t ro_seg_end_index (heap_segment* seg)
{
    size_t end_index = (size_t)(heap_segment_reserved (seg) - 1) >> gc_heap::min_segment_size_shr;
    end_index = min (end_index, (size_t)g_gc_highest_address >> gc_heap::min_segment_size_shr);
    return end_index;
}

void seg_mapping_table_add_ro_segment (heap_segment* seg)
{
    if ((heap_segment_reserved (seg) <= g_gc_lowest_address) || (heap_segment_mem (seg) >= g_gc_highest_address))
        return;

    for (size_t entry_index = ro_seg_begin_index (seg); entry_index <= ro_seg_end_index (seg); entry_index++)
    {
#ifdef USE_REGIONS
        heap_segment* region = (heap_segment*)&seg_mapping_table[entry_index];
        heap_segment_allocated (region) = (uint8_t*)ro_in_entry;
#else
        seg_mapping_table[entry_index].seg1 = (heap_segment*)((size_t)seg_mapping_table[entry_index].seg1 | ro_in_entry);
#endif //USE_REGIONS
    }
}

void seg_mapping_table_remove_ro_segment (heap_segment* seg)
{
    UNREFERENCED_PARAMETER(seg);
#if 0
// POSSIBLE PERF TODO: right now we are not doing anything because we can't simply remove the flag. If it proves
// to be a perf problem, we can search in the current ro segs and see if any lands in this range and only
// remove the flag if none lands in this range.
#endif //0
}

heap_segment* ro_segment_lookup (uint8_t* o)
{
    uint8_t* ro_seg_start = o;
    heap_segment* seg = (heap_segment*)gc_heap::seg_table->lookup (ro_seg_start);

    if (ro_seg_start && in_range_for_segment (o, seg))
        return seg;
    else
        return 0;
}

#endif //FEATURE_BASICFREEZE

#ifndef USE_REGIONS
void gc_heap::seg_mapping_table_add_segment (heap_segment* seg, gc_heap* hp)
{
    size_t seg_end = (size_t)(heap_segment_reserved (seg) - 1);
    size_t begin_index = (size_t)seg >> gc_heap::min_segment_size_shr;
    seg_mapping* begin_entry = &seg_mapping_table[begin_index];
    size_t end_index = seg_end >> gc_heap::min_segment_size_shr;
    seg_mapping* end_entry = &seg_mapping_table[end_index];

    dprintf (2, ("adding seg %p(%zd)-%p(%zd)",
        seg, begin_index, heap_segment_reserved (seg), end_index));

    dprintf (2, ("before add: begin entry%zd: boundary: %p; end entry: %zd: boundary: %p",
        begin_index, (seg_mapping_table[begin_index].boundary + 1),
        end_index, (seg_mapping_table[end_index].boundary + 1)));

#ifdef MULTIPLE_HEAPS
#ifdef SIMPLE_DPRINTF
    dprintf (2, ("begin %zd: h0: %p(%d), h1: %p(%d); end %zd: h0: %p(%d), h1: %p(%d)",
        begin_index, (uint8_t*)(begin_entry->h0), (begin_entry->h0 ? begin_entry->h0->heap_number : -1),
        (uint8_t*)(begin_entry->h1), (begin_entry->h1 ? begin_entry->h1->heap_number : -1),
        end_index, (uint8_t*)(end_entry->h0), (end_entry->h0 ? end_entry->h0->heap_number : -1),
        (uint8_t*)(end_entry->h1), (end_entry->h1 ? end_entry->h1->heap_number : -1)));
#endif //SIMPLE_DPRINTF
    assert (end_entry->boundary == 0);
    assert (end_entry->h0 == 0);
    end_entry->h0 = hp;
    assert (begin_entry->h1 == 0);
    begin_entry->h1 = hp;
#else
    UNREFERENCED_PARAMETER(hp);
#endif //MULTIPLE_HEAPS

    end_entry->boundary = (uint8_t*)seg_end;

    dprintf (2, ("set entry %zd seg1 and %zd seg0 to %p", begin_index, end_index, seg));
    assert ((begin_entry->seg1 == 0) || ((size_t)(begin_entry->seg1) == ro_in_entry));
    begin_entry->seg1 = (heap_segment*)((size_t)(begin_entry->seg1) | (size_t)seg);
    end_entry->seg0 = seg;

    // for every entry inbetween we need to set its heap too.
    for (size_t entry_index = (begin_index + 1); entry_index <= (end_index - 1); entry_index++)
    {
        assert (seg_mapping_table[entry_index].boundary == 0);
#ifdef MULTIPLE_HEAPS
        assert (seg_mapping_table[entry_index].h0 == 0);
        seg_mapping_table[entry_index].h1 = hp;
#endif //MULTIPLE_HEAPS
        seg_mapping_table[entry_index].seg1 = seg;
    }

    dprintf (2, ("after add: begin entry%zd: boundary: %p; end entry: %zd: boundary: %p",
        begin_index, (seg_mapping_table[begin_index].boundary + 1),
        end_index, (seg_mapping_table[end_index].boundary + 1)));
#if defined(MULTIPLE_HEAPS) && defined(SIMPLE_DPRINTF)
    dprintf (2, ("begin %zd: h0: %p(%d), h1: %p(%d); end: %zd h0: %p(%d), h1: %p(%d)",
        begin_index, (uint8_t*)(begin_entry->h0), (begin_entry->h0 ? begin_entry->h0->heap_number : -1),
        (uint8_t*)(begin_entry->h1), (begin_entry->h1 ? begin_entry->h1->heap_number : -1),
        end_index, (uint8_t*)(end_entry->h0), (end_entry->h0 ? end_entry->h0->heap_number : -1),
        (uint8_t*)(end_entry->h1), (end_entry->h1 ? end_entry->h1->heap_number : -1)));
#endif //MULTIPLE_HEAPS && SIMPLE_DPRINTF
}

void gc_heap::seg_mapping_table_remove_segment (heap_segment* seg)
{
    size_t seg_end = (size_t)(heap_segment_reserved (seg) - 1);
    size_t begin_index = (size_t)seg >> gc_heap::min_segment_size_shr;
    seg_mapping* begin_entry = &seg_mapping_table[begin_index];
    size_t end_index = seg_end >> gc_heap::min_segment_size_shr;
    seg_mapping* end_entry = &seg_mapping_table[end_index];
    dprintf (2, ("removing seg %p(%zd)-%p(%zd)",
        seg, begin_index, heap_segment_reserved (seg), end_index));

    assert (end_entry->boundary == (uint8_t*)seg_end);
    end_entry->boundary = 0;

#ifdef MULTIPLE_HEAPS
    gc_heap* hp = heap_segment_heap (seg);
    assert (end_entry->h0 == hp);
    end_entry->h0 = 0;
    assert (begin_entry->h1 == hp);
    begin_entry->h1 = 0;
#endif //MULTIPLE_HEAPS

    assert (begin_entry->seg1 != 0);
    begin_entry->seg1 = (heap_segment*)((size_t)(begin_entry->seg1) & ro_in_entry);
    end_entry->seg0 = 0;

    // for every entry inbetween we need to reset its heap too.
    for (size_t entry_index = (begin_index + 1); entry_index <= (end_index - 1); entry_index++)
    {
        assert (seg_mapping_table[entry_index].boundary == 0);
#ifdef MULTIPLE_HEAPS
        assert (seg_mapping_table[entry_index].h0 == 0);
        assert (seg_mapping_table[entry_index].h1 == hp);
        seg_mapping_table[entry_index].h1 = 0;
#endif //MULTIPLE_HEAPS
        seg_mapping_table[entry_index].seg1 = 0;
    }

    dprintf (2, ("after remove: begin entry%zd: boundary: %p; end entry: %zd: boundary: %p",
        begin_index, (seg_mapping_table[begin_index].boundary + 1),
        end_index, (seg_mapping_table[end_index].boundary + 1)));
#ifdef MULTIPLE_HEAPS
    dprintf (2, ("begin %zd: h0: %p, h1: %p; end: %zd h0: %p, h1: %p",
        begin_index, (uint8_t*)(begin_entry->h0), (uint8_t*)(begin_entry->h1),
        end_index, (uint8_t*)(end_entry->h0), (uint8_t*)(end_entry->h1)));
#endif //MULTIPLE_HEAPS
}
#endif //!USE_REGIONS

#ifdef MULTIPLE_HEAPS
inline
gc_heap* seg_mapping_table_heap_of_worker (uint8_t* o)
{
    size_t index = (size_t)o >> gc_heap::min_segment_size_shr;
    seg_mapping* entry = &seg_mapping_table[index];

#ifdef USE_REGIONS
    gc_heap* hp = heap_segment_heap ((heap_segment*)entry);
#else
    gc_heap* hp = ((o > entry->boundary) ? entry->h1 : entry->h0);

    dprintf (2, ("checking obj %p, index is %zd, entry: boundary: %p, h0: %p, seg0: %p, h1: %p, seg1: %p",
        o, index, (entry->boundary + 1),
        (uint8_t*)(entry->h0), (uint8_t*)(entry->seg0),
        (uint8_t*)(entry->h1), (uint8_t*)(entry->seg1)));

#ifdef _DEBUG
    heap_segment* seg = ((o > entry->boundary) ? entry->seg1 : entry->seg0);
#ifdef FEATURE_BASICFREEZE
    if ((size_t)seg & ro_in_entry)
        seg = (heap_segment*)((size_t)seg & ~ro_in_entry);
#endif //FEATURE_BASICFREEZE

#ifdef TRACE_GC
    if (seg)
    {
        if (in_range_for_segment (o, seg))
        {
            dprintf (2, ("obj %p belongs to segment %p(-%p)", o, seg, (uint8_t*)heap_segment_allocated (seg)));
        }
        else
        {
            dprintf (2, ("found seg %p(-%p) for obj %p, but it's not on the seg",
                seg, (uint8_t*)heap_segment_allocated (seg), o));
        }
    }
    else
    {
        dprintf (2, ("could not find obj %p in any existing segments", o));
    }
#endif //TRACE_GC
#endif //_DEBUG
#endif //USE_REGIONS
    return hp;
}

gc_heap* seg_mapping_table_heap_of (uint8_t* o)
{
    if ((o < g_gc_lowest_address) || (o >= g_gc_highest_address))
        return 0;

    return seg_mapping_table_heap_of_worker (o);
}

gc_heap* seg_mapping_table_heap_of_gc (uint8_t* o)
{
#ifdef FEATURE_BASICFREEZE
    if ((o < g_gc_lowest_address) || (o >= g_gc_highest_address))
        return 0;
#endif //FEATURE_BASICFREEZE

    return seg_mapping_table_heap_of_worker (o);
}
#endif //MULTIPLE_HEAPS

// Only returns a valid seg if we can actually find o on the seg.
heap_segment* seg_mapping_table_segment_of (uint8_t* o)
{
#ifdef FEATURE_BASICFREEZE
    if ((o < g_gc_lowest_address) || (o >= g_gc_highest_address))
        return ro_segment_lookup (o);
#endif //FEATURE_BASICFREEZE

    size_t index = (size_t)o >> gc_heap::min_segment_size_shr;
    seg_mapping* entry = &seg_mapping_table[index];

#ifdef USE_REGIONS
    // REGIONS TODO: I think we could simplify this to having the same info for each
    // basic entry in a large region so we can get it right away instead of having to go
    // back some entries.
    ptrdiff_t first_field = (ptrdiff_t)heap_segment_allocated ((heap_segment*)entry);
    if (first_field == 0)
    {
        dprintf (REGIONS_LOG, ("asked for seg for %p, in a freed region mem: %p, committed %p",
            o, heap_segment_mem ((heap_segment*)entry),
            heap_segment_committed ((heap_segment*)entry)));
        return 0;
    }
    // Regions are never going to intersect an ro seg, so this can never be ro_in_entry.
    assert (first_field != 0);
    assert (first_field != ro_in_entry);
    if (first_field < 0)
    {
        index += first_field;
    }
    heap_segment* seg = (heap_segment*)&seg_mapping_table[index];
#else //USE_REGIONS
    dprintf (2, ("checking obj %p, index is %zd, entry: boundary: %p, seg0: %p, seg1: %p",
        o, index, (entry->boundary + 1),
        (uint8_t*)(entry->seg0), (uint8_t*)(entry->seg1)));

    heap_segment* seg = ((o > entry->boundary) ? entry->seg1 : entry->seg0);
#ifdef FEATURE_BASICFREEZE
    if ((size_t)seg & ro_in_entry)
        seg = (heap_segment*)((size_t)seg & ~ro_in_entry);
#endif //FEATURE_BASICFREEZE
#endif //USE_REGIONS

    if (seg)
    {
        if (in_range_for_segment (o, seg))
        {
            dprintf (2, ("obj %p belongs to segment %p(-%p)", o, (uint8_t*)heap_segment_mem(seg), (uint8_t*)heap_segment_reserved(seg)));
        }
        else
        {
            dprintf (2, ("found seg %p(-%p) for obj %p, but it's not on the seg, setting it to 0",
                (uint8_t*)heap_segment_mem(seg), (uint8_t*)heap_segment_reserved(seg), o));
            seg = 0;
        }
    }
    else
    {
        dprintf (2, ("could not find obj %p in any existing segments", o));
    }

#ifdef FEATURE_BASICFREEZE
    // TODO: This was originally written assuming that the seg_mapping_table would always contain entries for ro
    // segments whenever the ro segment falls into the [g_gc_lowest_address,g_gc_highest_address) range.  I.e., it had an
    // extra "&& (size_t)(entry->seg1) & ro_in_entry" expression.  However, at the moment, grow_brick_card_table does
    // not correctly go through the ro segments and add them back to the seg_mapping_table when the [lowest,highest)
    // range changes.  We should probably go ahead and modify grow_brick_card_table and put back the
    // "&& (size_t)(entry->seg1) & ro_in_entry" here.
    if (!seg)
    {
        seg = ro_segment_lookup (o);
        if (seg && !in_range_for_segment (o, seg))
            seg = 0;
    }
#endif //FEATURE_BASICFREEZE

    return seg;
}

size_t gcard_of ( uint8_t*);

#define GC_MARKED       (size_t)0x1
#ifdef DOUBLY_LINKED_FL
// This bit indicates that we'll need to set the bgc mark bit for this object during an FGC.
// We only do this when we decide to compact.
#define BGC_MARKED_BY_FGC (size_t)0x2
#define MAKE_FREE_OBJ_IN_COMPACT (size_t)0x4
#define ALLOWED_SPECIAL_HEADER_BITS (GC_MARKED|BGC_MARKED_BY_FGC|MAKE_FREE_OBJ_IN_COMPACT)
#else //DOUBLY_LINKED_FL
#define ALLOWED_SPECIAL_HEADER_BITS (GC_MARKED)
#endif //!DOUBLY_LINKED_FL

#ifdef HOST_64BIT
#define SPECIAL_HEADER_BITS (0x7)
#else
#define SPECIAL_HEADER_BITS (0x3)
#endif

#define slot(i, j) ((uint8_t**)(i))[(j)+1]

#define free_object_base_size (plug_skew + sizeof(ArrayBase))

#define free_list_slot(x) ((uint8_t**)(x))[2]
#define free_list_undo(x) ((uint8_t**)(x))[-1]
#define UNDO_EMPTY ((uint8_t*)1)

#ifdef DOUBLY_LINKED_FL
#define free_list_prev(x) ((uint8_t**)(x))[3]
#define PREV_EMPTY ((uint8_t*)1)

void check_and_clear_in_free_list (uint8_t* o, size_t size)
{
    if (size >= min_free_list)
    {
        free_list_prev (o) = PREV_EMPTY;
    }
}
// This is used when we need to clear the prev bit for a free object we made because we know
// it's not actually a free obj (it's just a temporary thing during allocation).
void clear_prev_bit (uint8_t* o, size_t size)
{
    if (size >= min_free_list)
    {
        free_list_prev (o) = 0;
    }
}
#endif //DOUBLY_LINKED_FL

class CObjectHeader : public Object
{
public:

#if defined(FEATURE_NATIVEAOT) || defined(BUILD_AS_STANDALONE)
    // The GC expects the following methods that are provided by the Object class in the CLR but not provided
    // by NativeAOT's version of Object.
    uint32_t GetNumComponents()
    {
        return ((ArrayBase *)this)->GetNumComponents();
    }

    void Validate(BOOL bDeep=TRUE, BOOL bVerifyNextHeader = FALSE, BOOL bVerifySyncBlock = FALSE)
    {
        // declaration of extra parameters just so the call site would need no #ifdefs
        UNREFERENCED_PARAMETER(bVerifyNextHeader);
        UNREFERENCED_PARAMETER(bVerifySyncBlock);

        MethodTable * pMT = GetMethodTable();

        _ASSERTE(pMT->SanityCheck());

        bool noRangeChecks =
            (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_NO_RANGE_CHECKS) == GCConfig::HEAPVERIFY_NO_RANGE_CHECKS;

        BOOL fSmallObjectHeapPtr = FALSE, fLargeObjectHeapPtr = FALSE;
        if (!noRangeChecks)
        {
            fSmallObjectHeapPtr = g_theGCHeap->IsHeapPointer(this, TRUE);
            if (!fSmallObjectHeapPtr)
                fLargeObjectHeapPtr = g_theGCHeap->IsHeapPointer(this);

            _ASSERTE(fSmallObjectHeapPtr || fLargeObjectHeapPtr);
        }

#ifdef FEATURE_STRUCTALIGN
        _ASSERTE(IsStructAligned((uint8_t *)this, GetMethodTable()->GetBaseAlignment()));
#endif // FEATURE_STRUCTALIGN

#if defined(FEATURE_64BIT_ALIGNMENT) && !defined(FEATURE_NATIVEAOT)
        if (pMT->RequiresAlign8())
        {
            _ASSERTE((((size_t)this) & 0x7) == (pMT->IsValueType() ? 4U : 0U));
        }
#endif // FEATURE_64BIT_ALIGNMENT

#ifdef VERIFY_HEAP
        if (bDeep && (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_GC))
            g_theGCHeap->ValidateObjectMember(this);
#endif
        if (fSmallObjectHeapPtr)
        {
#ifdef FEATURE_BASICFREEZE
            _ASSERTE(!g_theGCHeap->IsLargeObject(this) || g_theGCHeap->IsInFrozenSegment(this));
#else
            _ASSERTE(!g_theGCHeap->IsLargeObject(this));
#endif
        }
    }

    void ValidateHeap(BOOL bDeep)
    {
        Validate(bDeep);
    }

#endif //FEATURE_NATIVEAOT || BUILD_AS_STANDALONE

    /////
    //
    // Header Status Information
    //

    MethodTable    *GetMethodTable() const
    {
        return( (MethodTable *) (((size_t) RawGetMethodTable()) & (~SPECIAL_HEADER_BITS)));
    }

    void SetMarked()
    {
        _ASSERTE(RawGetMethodTable());
        RawSetMethodTable((MethodTable *) (((size_t) RawGetMethodTable()) | GC_MARKED));
    }

    BOOL IsMarked() const
    {
        return !!(((size_t)RawGetMethodTable()) & GC_MARKED);
    }

    void SetPinned()
    {
        assert (!(gc_heap::settings.concurrent));
        GetHeader()->SetGCBit();
    }

    BOOL IsPinned() const
    {
        return !!((((CObjectHeader*)this)->GetHeader()->GetBits()) & BIT_SBLK_GC_RESERVE);
    }

    // Now we set more bits should actually only clear the mark bit
    void ClearMarked()
    {
#ifdef DOUBLY_LINKED_FL
        RawSetMethodTable ((MethodTable *)(((size_t) RawGetMethodTable()) & (~GC_MARKED)));
#else
        RawSetMethodTable (GetMethodTable());
#endif //DOUBLY_LINKED_FL
    }

#ifdef DOUBLY_LINKED_FL
    void SetBGCMarkBit()
    {
        RawSetMethodTable((MethodTable *) (((size_t) RawGetMethodTable()) | BGC_MARKED_BY_FGC));
    }
    BOOL IsBGCMarkBitSet() const
    {
        return !!(((size_t)RawGetMethodTable()) & BGC_MARKED_BY_FGC);
    }
    void ClearBGCMarkBit()
    {
        RawSetMethodTable((MethodTable *)(((size_t) RawGetMethodTable()) & (~BGC_MARKED_BY_FGC)));
    }

    void SetFreeObjInCompactBit()
    {
        RawSetMethodTable((MethodTable *) (((size_t) RawGetMethodTable()) | MAKE_FREE_OBJ_IN_COMPACT));
    }
    BOOL IsFreeObjInCompactBitSet() const
    {
        return !!(((size_t)RawGetMethodTable()) & MAKE_FREE_OBJ_IN_COMPACT);
    }
    void ClearFreeObjInCompactBit()
    {
#ifdef _DEBUG
        // check this looks like an object, but do NOT validate pointers to other objects
        // as these may not be valid yet - we are calling this during compact_phase
        Validate(FALSE);
#endif //_DEBUG
        RawSetMethodTable((MethodTable *)(((size_t) RawGetMethodTable()) & (~MAKE_FREE_OBJ_IN_COMPACT)));
    }
#endif //DOUBLY_LINKED_FL

    size_t ClearSpecialBits()
    {
        size_t special_bits = ((size_t)RawGetMethodTable()) & SPECIAL_HEADER_BITS;
        if (special_bits != 0)
        {
            assert ((special_bits & (~ALLOWED_SPECIAL_HEADER_BITS)) == 0);
            RawSetMethodTable ((MethodTable*)(((size_t)RawGetMethodTable()) & ~(SPECIAL_HEADER_BITS)));
        }
        return special_bits;
    }

    void SetSpecialBits (size_t special_bits)
    {
        assert ((special_bits & (~ALLOWED_SPECIAL_HEADER_BITS)) == 0);
        if (special_bits != 0)
        {
            RawSetMethodTable ((MethodTable*)(((size_t)RawGetMethodTable()) | special_bits));
        }
    }

    CGCDesc *GetSlotMap ()
    {
        assert (GetMethodTable()->ContainsGCPointers());
        return CGCDesc::GetCGCDescFromMT(GetMethodTable());
    }

    void SetFree(size_t size)
    {
        assert (size >= free_object_base_size);

        assert (g_gc_pFreeObjectMethodTable->GetBaseSize() == free_object_base_size);
        assert (g_gc_pFreeObjectMethodTable->RawGetComponentSize() == 1);

        RawSetMethodTable( g_gc_pFreeObjectMethodTable );

        size_t* numComponentsPtr = (size_t*) &((uint8_t*) this)[ArrayBase::GetOffsetOfNumComponents()];
        *numComponentsPtr = size - free_object_base_size;
#ifdef VERIFY_HEAP
        //This introduces a bug in the free list management.
        //((void**) this)[-1] = 0;    // clear the sync block,
        assert (*numComponentsPtr >= 0);
        if (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_GC)
        {
            memset (((uint8_t*)this)+sizeof(ArrayBase), 0xcc, *numComponentsPtr);
#ifdef DOUBLY_LINKED_FL
            // However, in this case we can't leave the Next field uncleared because no one will clear it
            // so it remains 0xcc and that's not good for verification
            if (*numComponentsPtr > 0)
            {
                free_list_slot (this) = 0;
            }
#endif //DOUBLY_LINKED_FL
        }
#endif //VERIFY_HEAP

#ifdef DOUBLY_LINKED_FL
        // For background GC, we need to distinguish between a free object that's not on the free list
        // and one that is. So we always set its prev to PREV_EMPTY to indicate that it's a free
        // object that's not on the free list. If it should be on the free list, it will be set to the
        // appropriate non zero value.
        check_and_clear_in_free_list ((uint8_t*)this, size);
#endif //DOUBLY_LINKED_FL
    }

    void UnsetFree()
    {
        size_t size = free_object_base_size - plug_skew;

        // since we only need to clear 2 ptr size, we do it manually
        PTR_PTR m = (PTR_PTR) this;
        for (size_t i = 0; i < size / sizeof(PTR_PTR); i++)
            *(m++) = 0;
    }

    BOOL IsFree () const
    {
        return (GetMethodTable() == g_gc_pFreeObjectMethodTable);
    }

#ifdef FEATURE_STRUCTALIGN
    int GetRequiredAlignment () const
    {
        return GetMethodTable()->GetRequiredAlignment();
    }
#endif // FEATURE_STRUCTALIGN

    BOOL ContainsGCPointers() const
    {
        return GetMethodTable()->ContainsGCPointers();
    }

#ifdef COLLECTIBLE_CLASS
    BOOL Collectible() const
    {
        return GetMethodTable()->Collectible();
    }

    FORCEINLINE BOOL ContainsGCPointersOrCollectible() const
    {
        MethodTable *pMethodTable = GetMethodTable();
        return (pMethodTable->ContainsGCPointers() || pMethodTable->Collectible());
    }
#endif //COLLECTIBLE_CLASS

    Object* GetObjectBase() const
    {
        return (Object*) this;
    }
};

#define header(i) ((CObjectHeader*)(i))
#define method_table(o) ((CObjectHeader*)(o))->GetMethodTable()

#ifdef DOUBLY_LINKED_FL
inline
BOOL is_on_free_list (uint8_t* o, size_t size)
{
    if (size >= min_free_list)
    {
        if (header(o)->GetMethodTable() == g_gc_pFreeObjectMethodTable)
        {
            return (free_list_prev (o) != PREV_EMPTY);
        }
    }

    return FALSE;
}

inline
void set_plug_bgc_mark_bit (uint8_t* node)
{
    header(node)->SetBGCMarkBit();
}

inline
BOOL is_plug_bgc_mark_bit_set (uint8_t* node)
{
    return header(node)->IsBGCMarkBitSet();
}

inline
void clear_plug_bgc_mark_bit (uint8_t* node)
{
    header(node)->ClearBGCMarkBit();
}

inline
void set_free_obj_in_compact_bit (uint8_t* node)
{
    header(node)->SetFreeObjInCompactBit();
}

inline
BOOL is_free_obj_in_compact_bit_set (uint8_t* node)
{
    return header(node)->IsFreeObjInCompactBitSet();
}

inline
void clear_free_obj_in_compact_bit (uint8_t* node)
{
    header(node)->ClearFreeObjInCompactBit();
}
#endif //DOUBLY_LINKED_FL

#ifdef SHORT_PLUGS
inline
void set_plug_padded (uint8_t* node)
{
    header(node)->SetMarked();
}
inline
void clear_plug_padded (uint8_t* node)
{
    header(node)->ClearMarked();
}
inline
BOOL is_plug_padded (uint8_t* node)
{
    return header(node)->IsMarked();
}
#else //SHORT_PLUGS
inline void set_plug_padded (uint8_t* node){}
inline void clear_plug_padded (uint8_t* node){}
inline
BOOL is_plug_padded (uint8_t* node){return FALSE;}
#endif //SHORT_PLUGS

inline
size_t clear_special_bits (uint8_t* node)
{
    return header(node)->ClearSpecialBits();
}

inline
void set_special_bits (uint8_t* node, size_t special_bits)
{
    header(node)->SetSpecialBits (special_bits);
}

inline size_t unused_array_size(uint8_t * p)
{
    assert(((CObjectHeader*)p)->IsFree());

    size_t* numComponentsPtr = (size_t*)(p + ArrayBase::GetOffsetOfNumComponents());
    return free_object_base_size + *numComponentsPtr;
}

inline
heap_segment* heap_segment_non_sip (heap_segment* ns)
{
#ifdef USE_REGIONS
    if ((ns == 0) || !heap_segment_swept_in_plan (ns))
    {
        return ns;
    }
    else
    {
        do
        {
            if (heap_segment_swept_in_plan (ns))
            {
                dprintf (REGIONS_LOG, ("region %p->%p SIP",
                    heap_segment_mem (ns), heap_segment_allocated (ns)));
            }

            ns = heap_segment_next (ns);
        } while ((ns != 0) && heap_segment_swept_in_plan (ns));
        return ns;
    }
#else //USE_REGIONS
    return ns;
#endif //USE_REGIONS
}

inline
heap_segment* heap_segment_next_non_sip (heap_segment* seg)
{
    heap_segment* ns = heap_segment_next (seg);
#ifdef USE_REGIONS
    return heap_segment_non_sip (ns);
#else
    return ns;
#endif //USE_REGIONS
}

heap_segment* heap_segment_rw (heap_segment* ns)
{
    if ((ns == 0) || !heap_segment_read_only_p (ns))
    {
        return ns;
    }
    else
    {
        do
        {
            ns = heap_segment_next (ns);
        } while ((ns != 0) && heap_segment_read_only_p (ns));
        return ns;
    }
}

//returns the next non ro segment.
heap_segment* heap_segment_next_rw (heap_segment* seg)
{
    heap_segment* ns = heap_segment_next (seg);
    return heap_segment_rw (ns);
}

// returns the segment before seg.
heap_segment* heap_segment_prev_rw (heap_segment* begin, heap_segment* seg)
{
    assert (begin != 0);
    heap_segment* prev = begin;
    heap_segment* current = heap_segment_next_rw (begin);

    while (current && current != seg)
    {
        prev = current;
        current = heap_segment_next_rw (current);
    }

    if (current == seg)
    {
        return prev;
    }
    else
    {
        return 0;
    }
}

// returns the segment before seg.
heap_segment* heap_segment_prev (heap_segment* begin, heap_segment* seg)
{
    assert (begin != 0);
    heap_segment* prev = begin;
    heap_segment* current = heap_segment_next (begin);

    while (current && current != seg)
    {
        prev = current;
        current = heap_segment_next (current);
    }

    if (current == seg)
    {
        return prev;
    }
    else
    {
        return 0;
    }
}

heap_segment* heap_segment_in_range (heap_segment* ns)
{
    if ((ns == 0) || heap_segment_in_range_p (ns))
    {
        return ns;
    }
    else
    {
        do
        {
            ns = heap_segment_next (ns);
        } while ((ns != 0) && !heap_segment_in_range_p (ns));
        return ns;
    }
}

heap_segment* heap_segment_next_in_range (heap_segment* seg)
{
    heap_segment* ns = heap_segment_next (seg);
    return heap_segment_in_range (ns);
}

struct imemory_data
{
    uint8_t* memory_base;
};

struct numa_reserved_block
{
    uint8_t*        memory_base;
    size_t          block_size;

    numa_reserved_block() : memory_base(nullptr), block_size(0) { }
};

struct initial_memory_details
{
    imemory_data *initial_memory;
    imemory_data *initial_normal_heap; // points into initial_memory_array
    imemory_data *initial_large_heap;  // points into initial_memory_array
    imemory_data *initial_pinned_heap; // points into initial_memory_array

    size_t block_size_normal;
    size_t block_size_large;
    size_t block_size_pinned;

    int block_count;                // # of blocks in each
    int current_block_normal;
    int current_block_large;
    int current_block_pinned;

    enum
    {
        ALLATONCE = 1,
        EACH_GENERATION,
        EACH_BLOCK,
        ALLATONCE_SEPARATED_POH,
        EACH_NUMA_NODE
    };

    size_t allocation_pattern;

    size_t block_size(int i)
    {
        switch (i / block_count)
        {
            case 0: return block_size_normal;
            case 1: return block_size_large;
            case 2: return block_size_pinned;
            default: UNREACHABLE();
        }
    };

    void* get_initial_memory (int gen, int h_number)
    {
        switch (gen)
        {
            case soh_gen0:
            case soh_gen1:
            case soh_gen2: return initial_normal_heap[h_number].memory_base;
            case loh_generation: return initial_large_heap[h_number].memory_base;
            case poh_generation: return initial_pinned_heap[h_number].memory_base;
            default: UNREACHABLE();
        }
    };

    size_t get_initial_size (int gen)
    {
        switch (gen)
        {
            case soh_gen0:
            case soh_gen1:
            case soh_gen2: return block_size_normal;
            case loh_generation: return block_size_large;
            case poh_generation: return block_size_pinned;
            default: UNREACHABLE();
        }
    };

    int numa_reserved_block_count;
    numa_reserved_block* numa_reserved_block_table;
};

initial_memory_details memory_details;

BOOL gc_heap::reserve_initial_memory (size_t normal_size, size_t large_size, size_t pinned_size,
                                      int num_heaps, bool use_large_pages_p, bool separated_poh_p, uint16_t* heap_no_to_numa_node)
{
    BOOL reserve_success = FALSE;

    // should only be called once
    assert (memory_details.initial_memory == 0);

    // soh + loh + poh segments * num_heaps
    memory_details.initial_memory = new (nothrow) imemory_data[num_heaps * (total_generation_count - ephemeral_generation_count)];
    if (memory_details.initial_memory == 0)
    {
        dprintf (2, ("failed to reserve %zd bytes for imemory_data",
            num_heaps * (total_generation_count - ephemeral_generation_count) * sizeof (imemory_data)));
        return FALSE;
    }

    memory_details.initial_normal_heap = memory_details.initial_memory;
    memory_details.initial_large_heap = memory_details.initial_normal_heap + num_heaps;
    memory_details.initial_pinned_heap = memory_details.initial_large_heap + num_heaps;
    memory_details.block_size_normal = normal_size;
    memory_details.block_size_large = large_size;
    memory_details.block_size_pinned = pinned_size;

    memory_details.block_count = num_heaps;

    memory_details.current_block_normal = 0;
    memory_details.current_block_large = 0;
    memory_details.current_block_pinned = 0;

    g_gc_lowest_address = MAX_PTR;
    g_gc_highest_address = 0;

    if (((size_t)MAX_PTR - large_size) < normal_size)
    {
        // we are already overflowing with just one heap.
        dprintf (2, ("0x%zx + 0x%zx already overflow", normal_size, large_size));
        return FALSE;
    }

    if (((size_t)MAX_PTR / memory_details.block_count) < (normal_size + large_size + pinned_size))
    {
        dprintf (2, ("(0x%zx + 0x%zx)*0x%x overflow", normal_size, large_size, memory_details.block_count));
        return FALSE;
    }

    // figure out number of NUMA nodes and allocate additional table for NUMA local reservation
    memory_details.numa_reserved_block_count = 0;
    memory_details.numa_reserved_block_table = nullptr;
    int numa_node_count = 0;
    if (heap_no_to_numa_node != nullptr)
    {
        uint16_t highest_numa_node = 0;

        // figure out the highest NUMA node
        for (int heap_no = 0; heap_no < num_heaps; heap_no++)
        {
            uint16_t heap_numa_node = heap_no_to_numa_node[heap_no];
            highest_numa_node = max (highest_numa_node, heap_numa_node);
        }

        assert (highest_numa_node < MAX_SUPPORTED_CPUS);

        numa_node_count = highest_numa_node + 1;
        memory_details.numa_reserved_block_count = numa_node_count * (1 + separated_poh_p);
        memory_details.numa_reserved_block_table = new (nothrow) numa_reserved_block[memory_details.numa_reserved_block_count];
        if (memory_details.numa_reserved_block_table == nullptr)
        {
            // we couldn't get the memory - continue as if doing the non-NUMA case
            dprintf(2, ("failed to reserve %zd bytes for numa_reserved_block data", memory_details.numa_reserved_block_count * sizeof(numa_reserved_block)));
            memory_details.numa_reserved_block_count = 0;
        }
    }

    if (memory_details.numa_reserved_block_table != nullptr)
    {
        // figure out how much to reserve on each NUMA node
        // note this can be very different between NUMA nodes, depending on
        // which processors our heaps are associated with
        size_t merged_pinned_size = separated_poh_p ? 0 : pinned_size;
        for (int heap_no = 0; heap_no < num_heaps; heap_no++)
        {
            uint16_t heap_numa_node = heap_no_to_numa_node[heap_no];

            numa_reserved_block * block = &memory_details.numa_reserved_block_table[heap_numa_node];

            // add the size required for this heap
            block->block_size += normal_size + large_size + merged_pinned_size;

            if (separated_poh_p)
            {
                numa_reserved_block* pinned_block = &memory_details.numa_reserved_block_table[numa_node_count + heap_numa_node];

                // add the pinned size required for this heap
                pinned_block->block_size += pinned_size;
            }
        }

        // reserve the appropriate size on each NUMA node
        bool failure = false;
        for (int block_index = 0; block_index < memory_details.numa_reserved_block_count; block_index++)
        {
            numa_reserved_block * block = &memory_details.numa_reserved_block_table[block_index];

            if (block->block_size == 0)
                continue;

            int numa_node = block_index % numa_node_count;
            bool pinned_block = block_index >= numa_node_count;
            block->memory_base = (uint8_t*)virtual_alloc (block->block_size, use_large_pages_p && !pinned_block, (uint16_t)numa_node);
            if (block->memory_base == nullptr)
            {
                dprintf(2, ("failed to reserve %zd bytes for on NUMA node %u", block->block_size, numa_node));
                failure = true;
                break;
            }
            else
            {
                g_gc_lowest_address = min(g_gc_lowest_address, block->memory_base);
                g_gc_highest_address = max(g_gc_highest_address, block->memory_base + block->block_size);
            }
        }

        if (failure)
        {
            // if we had any failures, undo the work done so far
            // we will instead use one of the other allocation patterns
            // we could try to use what we did succeed to reserve, but that gets complicated
            for (int block_index = 0; block_index < memory_details.numa_reserved_block_count; block_index++)
            {
                numa_reserved_block * block = &memory_details.numa_reserved_block_table[block_index];

                if (block->memory_base != nullptr)
                {
                    virtual_free(block->memory_base, block->block_size);
                    block->memory_base = nullptr;
                }
            }
            delete [] memory_details.numa_reserved_block_table;
            memory_details.numa_reserved_block_table = nullptr;
            memory_details.numa_reserved_block_count = 0;
        }
        else
        {
            // for each NUMA node, give out the memory to its heaps
            for (uint16_t numa_node = 0; numa_node < numa_node_count; numa_node++)
            {
                numa_reserved_block * block = &memory_details.numa_reserved_block_table[numa_node];

                numa_reserved_block* pinned_block = separated_poh_p ?
                    &memory_details.numa_reserved_block_table[numa_node_count + numa_node] : nullptr;

                // if the block's size is 0, there can be no heaps on this NUMA node
                if (block->block_size == 0)
                {
                    assert((pinned_block == nullptr) || (pinned_block->block_size == 0));
                    continue;
                }

                uint8_t* memory_base = block->memory_base;
                uint8_t* pinned_memory_base = ((pinned_block == nullptr) ? nullptr : pinned_block->memory_base);
                for (int heap_no = 0; heap_no < num_heaps; heap_no++)
                {
                    uint16_t heap_numa_node = heap_no_to_numa_node[heap_no];

                    if (heap_numa_node != numa_node)
                    {
                        // this heap is on another NUMA node
                        continue;
                    }

                    memory_details.initial_normal_heap[heap_no].memory_base = memory_base;
                    memory_base += normal_size;

                    memory_details.initial_large_heap[heap_no].memory_base = memory_base;
                    memory_base += large_size;

                    if (separated_poh_p)
                    {
                        memory_details.initial_pinned_heap[heap_no].memory_base = pinned_memory_base;
                        pinned_memory_base += pinned_size;
                    }
                    else
                    {
                        memory_details.initial_pinned_heap[heap_no].memory_base = memory_base;
                        memory_base += pinned_size;
                    }
                }
                // sanity check - we should be at the end of the memory block for this NUMA node
                assert (memory_base == block->memory_base + block->block_size);
                assert ((pinned_block == nullptr) || (pinned_memory_base == pinned_block->memory_base + pinned_block->block_size));
            }
            memory_details.allocation_pattern = initial_memory_details::EACH_NUMA_NODE;
            reserve_success = TRUE;
        }
    }

    if (!reserve_success)
    {
        size_t temp_pinned_size = (separated_poh_p ? 0 : pinned_size);
        size_t separate_pinned_size = memory_details.block_count * pinned_size;
        size_t requestedMemory = memory_details.block_count * (normal_size + large_size + temp_pinned_size);

        uint8_t* allatonce_block = (uint8_t*)virtual_alloc(requestedMemory, use_large_pages_p);
        uint8_t* separated_poh_block = nullptr;
        if (allatonce_block && separated_poh_p)
        {
            separated_poh_block = (uint8_t*)virtual_alloc(separate_pinned_size, false);
            if (!separated_poh_block)
            {
                virtual_free(allatonce_block, requestedMemory);
                allatonce_block = nullptr;
            }
        }
        if (allatonce_block)
        {
            if (separated_poh_p)
            {
                g_gc_lowest_address = min(allatonce_block, separated_poh_block);
                g_gc_highest_address = max((allatonce_block + requestedMemory),
                    (separated_poh_block + separate_pinned_size));
                memory_details.allocation_pattern = initial_memory_details::ALLATONCE_SEPARATED_POH;
            }
            else
            {
                g_gc_lowest_address = allatonce_block;
                g_gc_highest_address = allatonce_block + requestedMemory;
                memory_details.allocation_pattern = initial_memory_details::ALLATONCE;
            }

            for (int i = 0; i < memory_details.block_count; i++)
            {
                memory_details.initial_normal_heap[i].memory_base = allatonce_block +
                    (i * normal_size);
                memory_details.initial_large_heap[i].memory_base = allatonce_block +
                    (memory_details.block_count * normal_size) + (i * large_size);
                if (separated_poh_p)
                {
                    memory_details.initial_pinned_heap[i].memory_base = separated_poh_block +
                        (i * pinned_size);
                }
                else
                {
                    memory_details.initial_pinned_heap[i].memory_base = allatonce_block +
                        (memory_details.block_count * (normal_size + large_size)) + (i * pinned_size);
                }
            }
            reserve_success = TRUE;
        }
        else
        {
            // try to allocate 3 blocks
            uint8_t* b1 = (uint8_t*)virtual_alloc(memory_details.block_count * normal_size, use_large_pages_p);
            uint8_t* b2 = (uint8_t*)virtual_alloc(memory_details.block_count * large_size, use_large_pages_p);
            uint8_t* b3 = (uint8_t*)virtual_alloc(memory_details.block_count * pinned_size, use_large_pages_p && !separated_poh_p);

            if (b1 && b2 && b3)
            {
                memory_details.allocation_pattern = initial_memory_details::EACH_GENERATION;
                g_gc_lowest_address = min(b1, min(b2, b3));
                g_gc_highest_address = max(b1 + memory_details.block_count * normal_size,
                    max(b2 + memory_details.block_count * large_size,
                        b3 + memory_details.block_count * pinned_size));

                for (int i = 0; i < memory_details.block_count; i++)
                {
                    memory_details.initial_normal_heap[i].memory_base = b1 + (i * normal_size);
                    memory_details.initial_large_heap[i].memory_base = b2 + (i * large_size);
                    memory_details.initial_pinned_heap[i].memory_base = b3 + (i * pinned_size);
                }

                reserve_success = TRUE;
            }
            else
            {
                // allocation failed, we'll go on to try allocating each block.
                // We could preserve the b1 alloc, but code complexity increases
                if (b1)
                    virtual_free(b1, memory_details.block_count * normal_size);
                if (b2)
                    virtual_free(b2, memory_details.block_count * large_size);
                if (b3)
                    virtual_free(b3, memory_details.block_count * pinned_size);
            }

            if ((b2 == NULL) && (memory_details.block_count > 1))
            {
                memory_details.allocation_pattern = initial_memory_details::EACH_BLOCK;

                imemory_data* current_block = memory_details.initial_memory;
                for (int i = 0; i < (memory_details.block_count * (total_generation_count - ephemeral_generation_count)); i++, current_block++)
                {
                    size_t block_size = memory_details.block_size(i);
                    uint16_t numa_node = NUMA_NODE_UNDEFINED;
                    if (heap_no_to_numa_node != nullptr)
                    {
                        int heap_no = i % memory_details.block_count;
                        numa_node = heap_no_to_numa_node[heap_no];
                    }
                    current_block->memory_base =
                        (uint8_t*)virtual_alloc(block_size, use_large_pages_p, numa_node);
                    if (current_block->memory_base == 0)
                    {
                        // Free the blocks that we've allocated so far
                        current_block = memory_details.initial_memory;
                        for (int j = 0; j < i; j++, current_block++) {
                            if (current_block->memory_base != 0) {
                                block_size = memory_details.block_size(i);
                                virtual_free(current_block->memory_base, block_size);
                            }
                        }
                        reserve_success = FALSE;
                        break;
                    }
                    else
                    {
                        if (current_block->memory_base < g_gc_lowest_address)
                            g_gc_lowest_address = current_block->memory_base;
                        if (((uint8_t*)current_block->memory_base + block_size) > g_gc_highest_address)
                            g_gc_highest_address = (current_block->memory_base + block_size);
                    }
                    reserve_success = TRUE;
                }
            }
        }
    }

    if (reserve_success && separated_poh_p)
    {
        for (int heap_no = 0; (reserve_success && (heap_no < num_heaps)); heap_no++)
        {
            if (!GCToOSInterface::VirtualCommit(memory_details.initial_pinned_heap[heap_no].memory_base, pinned_size))
            {
                reserve_success = FALSE;
            }
        }
    }

    return reserve_success;
}

void gc_heap::destroy_initial_memory()
{
    if (memory_details.initial_memory != NULL)
    {
        switch (memory_details.allocation_pattern)
        {
            case initial_memory_details::ALLATONCE:
                virtual_free (memory_details.initial_memory[0].memory_base,
                    memory_details.block_count*(memory_details.block_size_normal +
                    memory_details.block_size_large + memory_details.block_size_pinned));
                break;

            case initial_memory_details::ALLATONCE_SEPARATED_POH:
                virtual_free(memory_details.initial_memory[0].memory_base,
                    memory_details.block_count * (memory_details.block_size_normal +
                        memory_details.block_size_large));
                virtual_free(memory_details.initial_pinned_heap[0].memory_base,
                    memory_details.block_count * (memory_details.block_size_pinned));
                break;

            case initial_memory_details::EACH_GENERATION:
                virtual_free (memory_details.initial_normal_heap[0].memory_base,
                    memory_details.block_count*memory_details.block_size_normal);

                virtual_free (memory_details.initial_large_heap[0].memory_base,
                    memory_details.block_count*memory_details.block_size_large);

                virtual_free (memory_details.initial_pinned_heap[0].memory_base,
                    memory_details.block_count*memory_details.block_size_pinned);
                break;

            case initial_memory_details::EACH_BLOCK:
            {
                imemory_data* current_block = memory_details.initial_memory;
                int total_block_count = memory_details.block_count *
                    (total_generation_count - ephemeral_generation_count);
                for (int i = 0; i < total_block_count; i++, current_block++)
                {
                    size_t block_size = memory_details.block_size (i);
                    if (current_block->memory_base != NULL)
                    {
                        virtual_free (current_block->memory_base, block_size);
                    }
                }
                break;
            }
            case initial_memory_details::EACH_NUMA_NODE:
                for (int block_index = 0; block_index < memory_details.numa_reserved_block_count; block_index++)
                {
                    numa_reserved_block * block = &memory_details.numa_reserved_block_table[block_index];

                    if (block->memory_base != nullptr)
                    {
                        virtual_free (block->memory_base, block->block_size);
                    }
                }
                delete [] memory_details.numa_reserved_block_table;
                break;

            default:
                assert (!"unexpected allocation_pattern");
                break;
        }

        delete [] memory_details.initial_memory;
        memory_details.initial_memory = NULL;
        memory_details.initial_normal_heap = NULL;
        memory_details.initial_large_heap = NULL;
        memory_details.initial_pinned_heap = NULL;
    }
}

heap_segment* make_initial_segment (int gen, int h_number, gc_heap* hp)
{
    void* mem = memory_details.get_initial_memory (gen, h_number);
    size_t size = memory_details.get_initial_size (gen);
    heap_segment* res = gc_heap::make_heap_segment ((uint8_t*)mem, size, hp, gen);

    return res;
}

void* virtual_alloc (size_t size)
{
    return virtual_alloc(size, false);
}

void* virtual_alloc (size_t size, bool use_large_pages_p, uint16_t numa_node)
{
    size_t requested_size = size;

    if ((gc_heap::reserved_memory_limit - gc_heap::reserved_memory) < requested_size)
    {
        gc_heap::reserved_memory_limit = gc_heap::reserved_memory_limit + requested_size;
        if ((gc_heap::reserved_memory_limit - gc_heap::reserved_memory) < requested_size)
        {
            return 0;
        }
    }

    uint32_t flags = VirtualReserveFlags::None;
#ifndef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
    if (virtual_alloc_hardware_write_watch)
    {
        flags = VirtualReserveFlags::WriteWatch;
    }
#endif // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP

    void* prgmem = use_large_pages_p ?
        GCToOSInterface::VirtualReserveAndCommitLargePages(requested_size, numa_node) :
        GCToOSInterface::VirtualReserve(requested_size, card_size * card_word_width, flags, numa_node);
    void *aligned_mem = prgmem;

    // We don't want (prgmem + size) to be right at the end of the address space
    // because we'd have to worry about that everytime we do (address + size).
    // We also want to make sure that we leave loh_size_threshold at the end
    // so we allocate a small object we don't need to worry about overflow there
    // when we do alloc_ptr+size.
    if (prgmem)
    {
        uint8_t* end_mem = (uint8_t*)prgmem + requested_size;

        if ((end_mem == 0) || ((size_t)(MAX_PTR - end_mem) <= END_SPACE_AFTER_GC))
        {
            GCToOSInterface::VirtualRelease (prgmem, requested_size);
            dprintf (2, ("Virtual Alloc size %zd returned memory right against 4GB [%zx, %zx[ - discarding",
                        requested_size, (size_t)prgmem, (size_t)((uint8_t*)prgmem+requested_size)));
            prgmem = 0;
            aligned_mem = 0;
        }
    }

    if (prgmem)
    {
        gc_heap::reserved_memory += requested_size;
    }

    dprintf (2, ("Virtual Alloc size %zd: [%zx, %zx[",
                 requested_size, (size_t)prgmem, (size_t)((uint8_t*)prgmem+requested_size)));

    return aligned_mem;
}

static size_t get_valid_segment_size (BOOL large_seg=FALSE)
{
    size_t seg_size, initial_seg_size;

    if (!large_seg)
    {
        initial_seg_size = INITIAL_ALLOC;
        seg_size = static_cast<size_t>(GCConfig::GetSegmentSize());
    }
    else
    {
        initial_seg_size = LHEAP_ALLOC;
        seg_size = static_cast<size_t>(GCConfig::GetSegmentSize()) / 2;
    }

#ifdef MULTIPLE_HEAPS
#ifdef HOST_64BIT
    if (!large_seg)
#endif // HOST_64BIT
    {
        if (g_num_processors > 4)
            initial_seg_size /= 2;
        if (g_num_processors > 8)
            initial_seg_size /= 2;
    }
#endif //MULTIPLE_HEAPS

    // if seg_size is small but not 0 (0 is default if config not set)
    // then set the segment to the minimum size
    if (!g_theGCHeap->IsValidSegmentSize(seg_size))
    {
        // if requested size is between 1 byte and 4MB, use min
        if ((seg_size >> 1) && !(seg_size >> 22))
            seg_size = 1024*1024*4;
        else
            seg_size = initial_seg_size;
    }

#ifdef HOST_64BIT
    seg_size = round_up_power2 (seg_size);
#else
    seg_size = round_down_power2 (seg_size);
#endif // HOST_64BIT

    return (seg_size);
}

#ifndef USE_REGIONS
void
gc_heap::compute_new_ephemeral_size()
{
    int eph_gen_max = max_generation - 1 - (settings.promotion ? 1 : 0);
    size_t padding_size = 0;

    for (int i = 0; i <= eph_gen_max; i++)
    {
        dynamic_data* dd = dynamic_data_of (i);
        total_ephemeral_size += (dd_survived_size (dd) - dd_pinned_survived_size (dd));
#ifdef RESPECT_LARGE_ALIGNMENT
        total_ephemeral_size += dd_num_npinned_plugs (dd) * switch_alignment_size (FALSE);
#endif //RESPECT_LARGE_ALIGNMENT
#ifdef FEATURE_STRUCTALIGN
        total_ephemeral_size += dd_num_npinned_plugs (dd) * MAX_STRUCTALIGN;
#endif //FEATURE_STRUCTALIGN

#ifdef SHORT_PLUGS
        padding_size += dd_padding_size (dd);
#endif //SHORT_PLUGS
    }

    total_ephemeral_size += eph_gen_starts_size;

#ifdef RESPECT_LARGE_ALIGNMENT
    size_t planned_ephemeral_size = heap_segment_plan_allocated (ephemeral_heap_segment) -
                                       generation_plan_allocation_start (generation_of (max_generation-1));
    total_ephemeral_size = min (total_ephemeral_size, planned_ephemeral_size);
#endif //RESPECT_LARGE_ALIGNMENT

#ifdef SHORT_PLUGS
    total_ephemeral_size = Align ((size_t)((double)total_ephemeral_size * short_plugs_pad_ratio) + 1);
    total_ephemeral_size += Align (DESIRED_PLUG_LENGTH);
#endif //SHORT_PLUGS

    dprintf (3, ("total ephemeral size is %zx, padding %zx(%zx)",
        total_ephemeral_size,
        padding_size, (total_ephemeral_size - padding_size)));
}

heap_segment*
gc_heap::soh_get_segment_to_expand()
{
    size_t size = soh_segment_size;

    ordered_plug_indices_init = FALSE;
    use_bestfit = FALSE;

    //compute the size of the new ephemeral heap segment.
    compute_new_ephemeral_size();

    if ((settings.pause_mode != pause_low_latency) &&
        (settings.pause_mode != pause_no_gc)
#ifdef BACKGROUND_GC
        && (!gc_heap::background_running_p())
#endif //BACKGROUND_GC
        )
    {
        assert (settings.condemned_generation <= max_generation);
        allocator*  gen_alloc = ((settings.condemned_generation == max_generation) ? nullptr :
                              generation_allocator (generation_of (max_generation)));
        dprintf (2, ("(gen%d)soh_get_segment_to_expand", settings.condemned_generation));

        // try to find one in the gen 2 segment list, search backwards because the first segments
        // tend to be more compact than the later ones.
        heap_segment* fseg = heap_segment_rw (generation_start_segment (generation_of (max_generation)));

        _ASSERTE(fseg != NULL);

#ifdef SEG_REUSE_STATS
        int try_reuse = 0;
#endif //SEG_REUSE_STATS

        heap_segment* seg = ephemeral_heap_segment;
        while ((seg = heap_segment_prev_rw (fseg, seg)) && (seg != fseg))
        {
#ifdef SEG_REUSE_STATS
        try_reuse++;
#endif //SEG_REUSE_STATS

            if (can_expand_into_p (seg, size/3, total_ephemeral_size, gen_alloc))
            {
                get_gc_data_per_heap()->set_mechanism (gc_heap_expand,
                    (use_bestfit ? expand_reuse_bestfit : expand_reuse_normal));
                if (settings.condemned_generation == max_generation)
                {
                    if (use_bestfit)
                    {
                        build_ordered_free_spaces (seg);
                        dprintf (GTC_LOG, ("can use best fit"));
                    }

#ifdef SEG_REUSE_STATS
                    dprintf (SEG_REUSE_LOG_0, ("(gen%d)soh_get_segment_to_expand: found seg #%d to reuse",
                        settings.condemned_generation, try_reuse));
#endif //SEG_REUSE_STATS
                    dprintf (GTC_LOG, ("max_gen: Found existing segment to expand into %zx", (size_t)seg));
                    return seg;
                }
                else
                {
#ifdef SEG_REUSE_STATS
                    dprintf (SEG_REUSE_LOG_0, ("(gen%d)soh_get_segment_to_expand: found seg #%d to reuse - returning",
                        settings.condemned_generation, try_reuse));
#endif //SEG_REUSE_STATS
                    dprintf (GTC_LOG, ("max_gen-1: Found existing segment to expand into %zx", (size_t)seg));

                    // If we return 0 here, the allocator will think since we are short on end
                    // of seg we need to trigger a full compacting GC. So if sustained low latency
                    // is set we should acquire a new seg instead, that way we wouldn't be short.
                    // The real solution, of course, is to actually implement seg reuse in gen1.
                    if (settings.pause_mode != pause_sustained_low_latency)
                    {
                        dprintf (GTC_LOG, ("max_gen-1: SustainedLowLatency is set, acquire a new seg"));
                        get_gc_data_per_heap()->set_mechanism (gc_heap_expand, expand_next_full_gc);
                        return 0;
                    }
                }
            }
        }
    }

    heap_segment* result = get_segment (size, gc_oh_num::soh);

    if(result)
    {
#ifdef BACKGROUND_GC
        if (current_c_gc_state == c_gc_state_planning)
        {
            // When we expand heap during bgc sweep, we set the seg to be swept so
            // we'll always look at cards for objects on the new segment.
            result->flags |= heap_segment_flags_swept;
        }
#endif //BACKGROUND_GC

        FIRE_EVENT(GCCreateSegment_V1, heap_segment_mem(result),
                                  (size_t)(heap_segment_reserved (result) - heap_segment_mem(result)),
                                  gc_etw_segment_small_object_heap);
    }

    get_gc_data_per_heap()->set_mechanism (gc_heap_expand, (result ? expand_new_seg : expand_no_memory));

    if (result == 0)
    {
        dprintf (2, ("h%d: failed to allocate a new segment!", heap_number));
    }
    else
    {
#ifdef MULTIPLE_HEAPS
        heap_segment_heap (result) = this;
#endif //MULTIPLE_HEAPS
    }

    dprintf (GTC_LOG, ("(gen%d)creating new segment %p", settings.condemned_generation, result));
    return result;
}

//returns 0 in case of allocation failure
heap_segment*
gc_heap::get_segment (size_t size, gc_oh_num oh)
{
    assert(oh != gc_oh_num::unknown);
    BOOL uoh_p = (oh == gc_oh_num::loh) || (oh == gc_oh_num::poh);
    if (heap_hard_limit)
        return NULL;

    heap_segment* result = 0;

    if (segment_standby_list != 0)
    {
        result = segment_standby_list;
        heap_segment* last = 0;
        while (result)
        {
            size_t hs = (size_t)(heap_segment_reserved (result) - (uint8_t*)result);
            if ((hs >= size) && ((hs / 2) < size))
            {
                dprintf (2, ("Hoarded segment %zx found", (size_t) result));
                if (last)
                {
                    heap_segment_next (last) = heap_segment_next (result);
                }
                else
                {
                    segment_standby_list = heap_segment_next (result);
                }
                break;
            }
            else
            {
                last = result;
                result = heap_segment_next (result);
            }
        }
    }

    if (result)
    {
        init_heap_segment (result, __this);
#ifdef BACKGROUND_GC
        if (is_bgc_in_progress())
        {
            dprintf (GC_TABLE_LOG, ("hoarded seg %p, mark_array is %p", result, mark_array));
            if (!commit_mark_array_new_seg (__this, result))
            {
                dprintf (GC_TABLE_LOG, ("failed to commit mark array for hoarded seg"));
                // If we can't use it we need to thread it back.
                if (segment_standby_list != 0)
                {
                    heap_segment_next (result) = segment_standby_list;
                    segment_standby_list = result;
                }
                else
                {
                    segment_standby_list = result;
                }

                result = 0;
            }
        }
#endif //BACKGROUND_GC

        if (result)
            seg_mapping_table_add_segment (result, __this);
    }

    if (!result)
    {
        void* mem = virtual_alloc (size);
        if (!mem)
        {
            fgm_result.set_fgm (fgm_reserve_segment, size, uoh_p);
            return 0;
        }

        result = make_heap_segment ((uint8_t*)mem, size, __this, (oh + max_generation));

        if (result)
        {
            uint8_t* start;
            uint8_t* end;
            if (mem < g_gc_lowest_address)
            {
                start =  (uint8_t*)mem;
            }
            else
            {
                start = (uint8_t*)g_gc_lowest_address;
            }

            if (((uint8_t*)mem + size) > g_gc_highest_address)
            {
                end = (uint8_t*)mem + size;
            }
            else
            {
                end = (uint8_t*)g_gc_highest_address;
            }

            if (gc_heap::grow_brick_card_tables (start, end, size, result, __this, uoh_p) != 0)
            {
                // release_segment needs the flags to decrement the proper bucket
                size_t flags = 0;
                if (oh == poh)
                {
                    flags = heap_segment_flags_poh;
                }
                else if (oh == loh)
                {
                    flags = heap_segment_flags_loh;
                }
                result->flags |= flags;
                release_segment (result);
                return 0;
            }
        }
        else
        {
            fgm_result.set_fgm (fgm_commit_segment_beg, SEGMENT_INITIAL_COMMIT, uoh_p);
            virtual_free (mem, size);
        }

        if (result)
        {
            seg_mapping_table_add_segment (result, __this);
        }
    }

#ifdef BACKGROUND_GC
    if (result)
    {
        ::record_changed_seg ((uint8_t*)result, heap_segment_reserved (result),
                            settings.gc_index, current_bgc_state,
                            seg_added);
        bgc_verify_mark_array_cleared (result);
    }
#endif //BACKGROUND_GC

    dprintf (GC_TABLE_LOG, ("h%d: new seg: %p-%p (%zd)", heap_number, result, ((uint8_t*)result + size), size));
    return result;
}

void gc_heap::release_segment (heap_segment* sg)
{
    ptrdiff_t delta = 0;
    FIRE_EVENT(GCFreeSegment_V1, heap_segment_mem(sg));
    size_t reserved_size = (uint8_t*)heap_segment_reserved (sg) - (uint8_t*)sg;
    reduce_committed_bytes (
        sg,
        ((uint8_t*)heap_segment_committed (sg) - (uint8_t*)sg),
        (int) heap_segment_oh (sg)
#ifdef MULTIPLE_HEAPS
        , heap_segment_heap (sg)->heap_number
#else
        , -1
#endif
        , true
        );
    virtual_free (sg, reserved_size, sg);
}

BOOL gc_heap::set_ro_segment_in_range (heap_segment* seg)
{
    seg->flags |= heap_segment_flags_inrange;
    ro_segments_in_range = TRUE;
    return TRUE;
}
#endif //!USE_REGIONS

heap_segment* gc_heap::get_segment_for_uoh (int gen_number, size_t size
#ifdef MULTIPLE_HEAPS
                                           , gc_heap* hp
#endif //MULTIPLE_HEAPS
                                           )
{
#ifndef MULTIPLE_HEAPS
    gc_heap* hp = 0;
#endif //MULTIPLE_HEAPS

#ifdef USE_REGIONS
    heap_segment* res = hp->get_new_region (gen_number, size);
#else //USE_REGIONS
    gc_oh_num oh = gen_to_oh (gen_number);
    heap_segment* res = hp->get_segment (size, oh);
#endif //USE_REGIONS

    if (res != 0)
    {
#ifdef MULTIPLE_HEAPS
        heap_segment_heap (res) = hp;
#endif //MULTIPLE_HEAPS

        size_t flags = (gen_number == poh_generation) ?
            heap_segment_flags_poh :
            heap_segment_flags_loh;

#ifdef USE_REGIONS
        // in the regions case, flags are set by get_new_region
        assert ((res->flags & (heap_segment_flags_loh | heap_segment_flags_poh)) == flags);
#else //USE_REGIONS
        res->flags |= flags;

        FIRE_EVENT(GCCreateSegment_V1,
            heap_segment_mem(res),
            (size_t)(heap_segment_reserved (res) - heap_segment_mem(res)),
            (gen_number == poh_generation) ?
                gc_etw_segment_pinned_object_heap :
                gc_etw_segment_large_object_heap);

#ifdef MULTIPLE_HEAPS
        hp->thread_uoh_segment (gen_number, res);
#else
        thread_uoh_segment (gen_number, res);
#endif //MULTIPLE_HEAPS
#endif //USE_REGIONS
        GCToEEInterface::DiagAddNewRegion(
                            gen_number,
                            heap_segment_mem (res),
                            heap_segment_allocated (res),
                            heap_segment_reserved (res)
                        );
    }

    return res;
}

void gc_heap::thread_uoh_segment (int gen_number, heap_segment* new_seg)
{
    heap_segment* seg = generation_allocation_segment (generation_of (gen_number));

    while (heap_segment_next_rw (seg))
        seg = heap_segment_next_rw (seg);

    heap_segment_next (seg) = new_seg;
}

heap_segment*
gc_heap::get_uoh_segment (int gen_number, size_t size, BOOL* did_full_compact_gc, enter_msl_status* msl_status)
{
    *did_full_compact_gc = FALSE;
    size_t last_full_compact_gc_count = get_full_compact_gc_count();

    //access to get_segment needs to be serialized
    add_saved_spinlock_info (true, me_release, mt_get_large_seg, msl_entered);
    leave_spin_lock (&more_space_lock_uoh);
    enter_spin_lock (&gc_heap::gc_lock);
    dprintf (SPINLOCK_LOG, ("[%d]Seg: Egc", heap_number));
    // if a GC happened between here and before we ask for a segment in
    // get_uoh_segment, we need to count that GC.
    size_t current_full_compact_gc_count = get_full_compact_gc_count();

    if (current_full_compact_gc_count > last_full_compact_gc_count)
    {
        *did_full_compact_gc = TRUE;
    }

    if (should_move_heap (&more_space_lock_uoh))
    {
        *msl_status = msl_retry_different_heap;
        leave_spin_lock (&gc_heap::gc_lock);
        return NULL;
    }

    heap_segment* res = get_segment_for_uoh (gen_number, size
#ifdef MULTIPLE_HEAPS
                                            , this
#endif //MULTIPLE_HEAPS
                                            );

    dprintf (SPINLOCK_LOG, ("[%d]Seg: A Lgc", heap_number));
    leave_spin_lock (&gc_heap::gc_lock);
    *msl_status = enter_spin_lock_msl (&more_space_lock_uoh);
    if (*msl_status == msl_retry_different_heap)
        return NULL;

    add_saved_spinlock_info (true, me_acquire, mt_get_large_seg, *msl_status);

    return res;
}


#ifdef MULTIPLE_HEAPS
#ifdef HOST_X86
#ifdef _MSC_VER
#pragma warning(disable:4035)
    static ptrdiff_t  get_cycle_count()
    {
        __asm   rdtsc
    }
#pragma warning(default:4035)
#elif defined(__GNUC__)
    static ptrdiff_t  get_cycle_count()
    {
        ptrdiff_t cycles;
        ptrdiff_t cyclesHi;
        __asm__ __volatile__
        ("rdtsc":"=a" (cycles), "=d" (cyclesHi));
        return cycles;
    }
#else //_MSC_VER
#error Unknown compiler
#endif //_MSC_VER
#elif defined(TARGET_AMD64)
#ifdef _MSC_VER
extern "C" uint64_t __rdtsc();
#pragma intrinsic(__rdtsc)
    static ptrdiff_t get_cycle_count()
    {
        return (ptrdiff_t)__rdtsc();
    }
#elif defined(__GNUC__)
    static ptrdiff_t get_cycle_count()
    {
        ptrdiff_t cycles;
        ptrdiff_t cyclesHi;
        __asm__ __volatile__
        ("rdtsc":"=a" (cycles), "=d" (cyclesHi));
        return (cyclesHi << 32) | cycles;
    }
#else // _MSC_VER
    extern "C" ptrdiff_t get_cycle_count(void);
#endif // _MSC_VER
#elif defined(TARGET_LOONGARCH64)
    static ptrdiff_t get_cycle_count()
    {
        ////FIXME: TODO for LOONGARCH64:
        //ptrdiff_t  cycle;
        __asm__ volatile ("break 0 \n");
        return 0;
    }
#else
    static ptrdiff_t get_cycle_count()
    {
        // @ARMTODO, @ARM64TODO, @WASMTODO: cycle counter is not exposed to user mode. For now (until we can show this
        // makes a difference on the configurations on which we'll run) just return 0. This will result in
        // all buffer access times being reported as equal in access_time().
        return 0;
    }
#endif //TARGET_X86

// We may not be on contiguous numa nodes so need to store
// the node index as well.
struct node_heap_count
{
    int node_no;
    int heap_count;
};

class heap_select
{
    heap_select() {}
public:
    static uint8_t* sniff_buffer;
    static unsigned n_sniff_buffers;
    static unsigned cur_sniff_index;

    static uint16_t proc_no_to_heap_no[MAX_SUPPORTED_CPUS];
    static uint16_t heap_no_to_proc_no[MAX_SUPPORTED_CPUS];
    static uint16_t heap_no_to_numa_node[MAX_SUPPORTED_CPUS];
    static uint16_t numa_node_to_heap_map[MAX_SUPPORTED_CPUS+4];

#ifdef HEAP_BALANCE_INSTRUMENTATION
    // Note this is the total numa nodes GC heaps are on. There might be
    // more on the machine if GC threads aren't using all of them.
    static uint16_t total_numa_nodes;
    static node_heap_count heaps_on_node[MAX_SUPPORTED_NODES];
#endif

    static int access_time(uint8_t *sniff_buffer, int heap_number, unsigned sniff_index, unsigned n_sniff_buffers)
    {
        ptrdiff_t start_cycles = get_cycle_count();
        uint8_t sniff = sniff_buffer[(1 + heap_number*n_sniff_buffers + sniff_index)*HS_CACHE_LINE_SIZE];
        assert (sniff == 0);
        ptrdiff_t elapsed_cycles = get_cycle_count() - start_cycles;
        // add sniff here just to defeat the optimizer
        elapsed_cycles += sniff;
        return (int) elapsed_cycles;
    }

public:
    static BOOL init(int n_heaps)
    {
        assert (sniff_buffer == NULL && n_sniff_buffers == 0);
        if (!GCToOSInterface::CanGetCurrentProcessorNumber())
        {
            n_sniff_buffers = n_heaps*2+1;
            size_t n_cache_lines = 1 + n_heaps * n_sniff_buffers + 1;
            size_t sniff_buf_size = n_cache_lines * HS_CACHE_LINE_SIZE;
            if (sniff_buf_size / HS_CACHE_LINE_SIZE != n_cache_lines) // check for overlow
            {
                return FALSE;
            }

            sniff_buffer = new (nothrow) uint8_t[sniff_buf_size];
            if (sniff_buffer == 0)
                return FALSE;
            memset(sniff_buffer, 0, sniff_buf_size*sizeof(uint8_t));
        }

        bool do_numa = GCToOSInterface::CanEnableGCNumaAware();

        // we want to assign heap indices such that there is a contiguous
        // range of heap numbers for each numa node

        // we do this in two passes:
        // 1. gather processor numbers and numa node numbers for all heaps
        // 2. assign heap numbers for each numa node

        // Pass 1: gather processor numbers and numa node numbers
        uint16_t proc_no[MAX_SUPPORTED_CPUS];
        uint16_t node_no[MAX_SUPPORTED_CPUS];
        uint16_t max_node_no = 0;
        uint16_t heap_num;
        for (heap_num = 0; heap_num < n_heaps; heap_num++)
        {
            if (!GCToOSInterface::GetProcessorForHeap (heap_num, &proc_no[heap_num], &node_no[heap_num]))
                break;
            assert(proc_no[heap_num] < MAX_SUPPORTED_CPUS);
            if (!do_numa || node_no[heap_num] == NUMA_NODE_UNDEFINED)
                node_no[heap_num] = 0;
            max_node_no = max(max_node_no, node_no[heap_num]);
        }

        // Pass 2: assign heap numbers by numa node
        int cur_heap_no = 0;
        for (uint16_t cur_node_no = 0; cur_node_no <= max_node_no; cur_node_no++)
        {
            for (int i = 0; i < heap_num; i++)
            {
                if (node_no[i] != cur_node_no)
                    continue;

                // we found a heap on cur_node_no
                heap_no_to_proc_no[cur_heap_no] = proc_no[i];
                heap_no_to_numa_node[cur_heap_no] = cur_node_no;

                cur_heap_no++;
            }
        }

        return TRUE;
    }

    static void init_cpu_mapping(int heap_number)
    {
        if (GCToOSInterface::CanGetCurrentProcessorNumber())
        {
            uint32_t proc_no = GCToOSInterface::GetCurrentProcessorNumber();
            // For a 32-bit process running on a machine with > 64 procs,
            // even though the process can only use up to 32 procs, the processor
            // index can be >= 64; or in the cpu group case, if the process is not running in cpu group #0,
            // the GetCurrentProcessorNumber will return a number that's >= 64.
            proc_no_to_heap_no[proc_no % MAX_SUPPORTED_CPUS] = (uint16_t)heap_number;
        }
    }

    static void mark_heap(int heap_number)
    {
        if (GCToOSInterface::CanGetCurrentProcessorNumber())
            return;

        for (unsigned sniff_index = 0; sniff_index < n_sniff_buffers; sniff_index++)
            sniff_buffer[(1 + heap_number*n_sniff_buffers + sniff_index)*HS_CACHE_LINE_SIZE] &= 1;
    }

    static int select_heap(alloc_context* acontext)
    {
#ifndef TRACE_GC
        UNREFERENCED_PARAMETER(acontext); // only referenced by dprintf
#endif //TRACE_GC

        if (GCToOSInterface::CanGetCurrentProcessorNumber())
        {
            uint32_t proc_no = GCToOSInterface::GetCurrentProcessorNumber();
            // For a 32-bit process running on a machine with > 64 procs,
            // even though the process can only use up to 32 procs, the processor
            // index can be >= 64; or in the cpu group case, if the process is not running in cpu group #0,
            // the GetCurrentProcessorNumber will return a number that's >= 64.
            int adjusted_heap = proc_no_to_heap_no[proc_no % MAX_SUPPORTED_CPUS];
            // with dynamic heap count, need to make sure the value is in range.
            if (adjusted_heap >= gc_heap::n_heaps)
            {
                adjusted_heap %= gc_heap::n_heaps;
            }
            return adjusted_heap;
        }

        unsigned sniff_index = Interlocked::Increment(&cur_sniff_index);
        sniff_index %= n_sniff_buffers;

        int best_heap = 0;
        int best_access_time = 1000*1000*1000;
        int second_best_access_time = best_access_time;

        uint8_t *l_sniff_buffer = sniff_buffer;
        unsigned l_n_sniff_buffers = n_sniff_buffers;
        for (int heap_number = 0; heap_number < gc_heap::n_heaps; heap_number++)
        {
            int this_access_time = access_time(l_sniff_buffer, heap_number, sniff_index, l_n_sniff_buffers);
            if (this_access_time < best_access_time)
            {
                second_best_access_time = best_access_time;
                best_access_time = this_access_time;
                best_heap = heap_number;
            }
            else if (this_access_time < second_best_access_time)
            {
                second_best_access_time = this_access_time;
            }
        }

        if (best_access_time*2 < second_best_access_time)
        {
            sniff_buffer[(1 + best_heap*n_sniff_buffers + sniff_index)*HS_CACHE_LINE_SIZE] &= 1;

            dprintf (3, ("select_heap yields crisp %d for context %p\n", best_heap, (void *)acontext));
        }
        else
        {
            dprintf (3, ("select_heap yields vague %d for context %p\n", best_heap, (void *)acontext ));
        }

        return best_heap;
    }

    static bool can_find_heap_fast()
    {
        return GCToOSInterface::CanGetCurrentProcessorNumber();
    }

    static uint16_t find_proc_no_from_heap_no(int heap_number)
    {
        return heap_no_to_proc_no[heap_number];
    }

    static uint16_t find_numa_node_from_heap_no(int heap_number)
    {
        return heap_no_to_numa_node[heap_number];
    }

    static void init_numa_node_to_heap_map(int nheaps)
    {
        // Called right after GCHeap::Init() for each heap
        // For each NUMA node used by the heaps, the
        // numa_node_to_heap_map[numa_node] is set to the first heap number on that node and
        // numa_node_to_heap_map[numa_node + 1] is set to the first heap number not on that node
        // Set the start of the heap number range for the first NUMA node
        numa_node_to_heap_map[heap_no_to_numa_node[0]] = 0;
#ifdef HEAP_BALANCE_INSTRUMENTATION
        total_numa_nodes = 0;
        memset (heaps_on_node, 0, sizeof (heaps_on_node));
        heaps_on_node[0].node_no = heap_no_to_numa_node[0];
        heaps_on_node[0].heap_count = 1;
#endif //HEAP_BALANCE_INSTRUMENTATION

        for (int i=1; i < nheaps; i++)
        {
            if (heap_no_to_numa_node[i] != heap_no_to_numa_node[i-1])
            {
#ifdef HEAP_BALANCE_INSTRUMENTATION
                total_numa_nodes++;
                heaps_on_node[total_numa_nodes].node_no = heap_no_to_numa_node[i];
#endif

                // Set the end of the heap number range for the previous NUMA node
                numa_node_to_heap_map[heap_no_to_numa_node[i-1] + 1] =
                // Set the start of the heap number range for the current NUMA node
                numa_node_to_heap_map[heap_no_to_numa_node[i]] = (uint16_t)i;
            }
#ifdef HEAP_BALANCE_INSTRUMENTATION
            (heaps_on_node[total_numa_nodes].heap_count)++;
#endif
        }

        // Set the end of the heap range for the last NUMA node
        numa_node_to_heap_map[heap_no_to_numa_node[nheaps-1] + 1] = (uint16_t)nheaps; //mark the end with nheaps

#ifdef HEAP_BALANCE_INSTRUMENTATION
        total_numa_nodes++;
#endif
    }

    static bool get_info_proc (int index, uint16_t* proc_no, uint16_t* node_no, int* start_heap, int* end_heap)
    {
        if (!GCToOSInterface::GetProcessorForHeap ((uint16_t)index, proc_no, node_no))
            return false;

        if (*node_no == NUMA_NODE_UNDEFINED)
            *node_no = 0;

        *start_heap = (int)numa_node_to_heap_map[*node_no];
        *end_heap = (int)(numa_node_to_heap_map[*node_no + 1]);

        return true;
    }

    static void distribute_other_procs (bool distribute_all_p)
    {
        if (affinity_config_specified_p)
            return;

        if (distribute_all_p)
        {
            uint16_t current_heap_no_on_node[MAX_SUPPORTED_CPUS];
            memset (current_heap_no_on_node, 0, sizeof (current_heap_no_on_node));
            uint16_t current_heap_no = 0;

            uint16_t proc_no = 0;
            uint16_t node_no = 0;

            for (int i = gc_heap::n_heaps; i < (int)g_num_active_processors; i++)
            {
                int start_heap, end_heap;
                if (!get_info_proc (i, &proc_no, &node_no, &start_heap, &end_heap))
                    break;

                // This indicates there are heaps on this node
                if ((end_heap - start_heap) > 0)
                {
                    proc_no_to_heap_no[proc_no] = (current_heap_no_on_node[node_no] % (uint16_t)(end_heap - start_heap)) + (uint16_t)start_heap;
                    (current_heap_no_on_node[node_no])++;
                }
                else
                {
                    proc_no_to_heap_no[proc_no] = current_heap_no % gc_heap::n_heaps;
                    (current_heap_no)++;
                }
            }
        }
        else
        {
            // This is for scenarios where GCHeapCount is specified as something like
            // (g_num_active_processors - 2) to allow less randomization to the Server GC threads.
            // In this case we want to assign the right heaps to those procs, ie if they share
            // the same numa node we want to assign local heaps to those procs. Otherwise we
            // let the heap balancing mechanism take over for now.
            uint16_t proc_no = 0;
            uint16_t node_no = 0;
            int current_node_no = -1;
            int current_heap_on_node = -1;

            for (int i = gc_heap::n_heaps; i < (int)g_num_active_processors; i++)
            {
                int start_heap, end_heap;
                if (!get_info_proc (i, &proc_no, &node_no, &start_heap, &end_heap))
                    break;

                if ((end_heap - start_heap) > 0)
                {
                    if (node_no == current_node_no)
                    {
                        // We already iterated through all heaps on this node, don't add more procs to these
                        // heaps.
                        if (current_heap_on_node >= end_heap)
                        {
                            continue;
                        }
                    }
                    else
                    {
                        current_node_no = node_no;
                        current_heap_on_node = start_heap;
                    }

                    proc_no_to_heap_no[proc_no] = (uint16_t)current_heap_on_node;

                    current_heap_on_node++;
                }
            }
        }
    }

    static void get_heap_range_for_heap(int hn, int* start, int* end)
    {
        uint16_t numa_node = heap_no_to_numa_node[hn];
        *start = (int)numa_node_to_heap_map[numa_node];
        *end   = (int)(numa_node_to_heap_map[numa_node+1]);
#ifdef HEAP_BALANCE_INSTRUMENTATION
        dprintf(HEAP_BALANCE_TEMP_LOG, ("TEMPget_heap_range: %d is in numa node %d, start = %d, end = %d", hn, numa_node, *start, *end));
#endif //HEAP_BALANCE_INSTRUMENTATION
    }
};
uint8_t* heap_select::sniff_buffer;
unsigned heap_select::n_sniff_buffers;
unsigned heap_select::cur_sniff_index;
uint16_t heap_select::proc_no_to_heap_no[MAX_SUPPORTED_CPUS];
uint16_t heap_select::heap_no_to_proc_no[MAX_SUPPORTED_CPUS];
uint16_t heap_select::heap_no_to_numa_node[MAX_SUPPORTED_CPUS];
uint16_t heap_select::numa_node_to_heap_map[MAX_SUPPORTED_CPUS+4];
#ifdef HEAP_BALANCE_INSTRUMENTATION
uint16_t  heap_select::total_numa_nodes;
node_heap_count heap_select::heaps_on_node[MAX_SUPPORTED_NODES];
#endif

#ifdef HEAP_BALANCE_INSTRUMENTATION
// This records info we use to look at effect of different strategies
// for heap balancing.
struct heap_balance_info
{
    uint64_t timestamp;
    // This also encodes when we detect the thread runs on
    // different proc during a balance attempt. Sometimes
    // I observe this happens multiple times during one attempt!
    // If this happens, I just record the last proc we observe
    // and set MSB.
    int tid;
    // This records the final alloc_heap for the thread.
    //
    // This also encodes the reason why we needed to set_home_heap
    // in balance_heaps.
    // If we set it because the home heap is not the same as the proc,
    // we set MSB.
    //
    // If we set ideal proc, we set the 2nd MSB.
    int alloc_heap;
    int ideal_proc_no;
};

// This means inbetween each GC we can log at most this many entries per proc.
// This is usually enough. Most of the time we only need to log something every 128k
// of allocations in balance_heaps and gen0 budget is <= 200mb.
#define default_max_hb_heap_balance_info 4096

struct heap_balance_info_proc
{
    int count;
    int index;
    heap_balance_info hb_info[default_max_hb_heap_balance_info];
};

struct heap_balance_info_numa
{
    heap_balance_info_proc* hb_info_procs;
};

uint64_t start_raw_ts = 0;
bool cpu_group_enabled_p = false;
uint32_t procs_per_numa_node = 0;
uint16_t total_numa_nodes_on_machine = 0;
uint32_t procs_per_cpu_group = 0;
uint16_t total_cpu_groups_on_machine = 0;
// Note this is still on one of the numa nodes, so we'll incur a remote access
// no matter what.
heap_balance_info_numa* hb_info_numa_nodes = NULL;

// TODO: This doesn't work for multiple nodes per CPU group yet.
int get_proc_index_numa (int proc_no, int* numa_no)
{
    if (total_numa_nodes_on_machine == 1)
    {
        *numa_no = 0;
        return proc_no;
    }
    else
    {
        if (cpu_group_enabled_p)
        {
            // see vm\gcenv.os.cpp GroupProcNo implementation.
            *numa_no = proc_no >> 6;
            return (proc_no % 64);
        }
        else
        {
            *numa_no = proc_no / procs_per_numa_node;
            return (proc_no % procs_per_numa_node);
        }
    }
}

// We could consider optimizing it so we don't need to get the tid
// everytime but it's not very expensive to get.
void add_to_hb_numa (
    int proc_no,
    int ideal_proc_no,
    int alloc_heap,
    bool multiple_procs_p,
    bool alloc_count_p,
    bool set_ideal_p)
{
    int tid = (int)GCToOSInterface::GetCurrentThreadIdForLogging ();
    uint64_t timestamp = RawGetHighPrecisionTimeStamp ();

    int saved_proc_no = proc_no;
    int numa_no = -1;
    proc_no = get_proc_index_numa (proc_no, &numa_no);

    heap_balance_info_numa* hb_info_numa_node = &hb_info_numa_nodes[numa_no];

    heap_balance_info_proc* hb_info_proc = &(hb_info_numa_node->hb_info_procs[proc_no]);
    int index = hb_info_proc->index;
    int count = hb_info_proc->count;

    if (index == count)
    {
        // Too much info inbetween GCs. This can happen if the thread is scheduled on a different
        // processor very often so it caused us to log many entries due to that reason. You could
        // increase default_max_hb_heap_balance_info but this usually indicates a problem that
        // should be investigated.
        dprintf (HEAP_BALANCE_LOG, ("too much info between GCs, already logged %d entries", index));
        GCToOSInterface::DebugBreak ();
    }
    heap_balance_info* hb_info = &(hb_info_proc->hb_info[index]);

    dprintf (HEAP_BALANCE_TEMP_LOG, ("TEMP[p%3d->%3d(i:%3d), N%d] #%4d: %zd, tid %d, ah: %d, m: %d, p: %d, i: %d",
        saved_proc_no, proc_no, ideal_proc_no, numa_no, index,
        (timestamp - start_raw_ts) / 1000, tid, alloc_heap, (int)multiple_procs_p, (int)(!alloc_count_p), (int)set_ideal_p));

    if (multiple_procs_p)
    {
        tid |= (1 << (sizeof (tid) * 8 - 1));
    }

    if (!alloc_count_p)
    {
        alloc_heap |= (1 << (sizeof (alloc_heap) * 8 - 1));
    }

    if (set_ideal_p)
    {
        alloc_heap |= (1 << (sizeof (alloc_heap) * 8 - 2));
    }

    hb_info->timestamp = timestamp;
    hb_info->tid = tid;
    hb_info->alloc_heap = alloc_heap;
    hb_info->ideal_proc_no = ideal_proc_no;
    (hb_info_proc->index)++;
}

const int hb_log_buffer_size = 4096;
static char hb_log_buffer[hb_log_buffer_size];
int last_hb_recorded_gc_index = -1;
#endif //HEAP_BALANCE_INSTRUMENTATION

// This logs what we recorded in balance_heaps
// The format for this is
//
// [ms since last GC end]
// [cpu index]
// all elements we stored before this GC for this CPU in the format
// timestamp,tid, alloc_heap_no
// repeat this for each CPU
//
// the timestamp here is just the result of calling QPC,
// it's not converted to ms. The conversion will be done when we process
// the log.
void gc_heap::hb_log_balance_activities()
{
#ifdef HEAP_BALANCE_INSTRUMENTATION
    char* log_buffer = hb_log_buffer;

    uint64_t now = GetHighPrecisionTimeStamp();
    size_t time_since_last_gc_ms = (size_t)((now - last_gc_end_time_us) / 1000);
    dprintf (HEAP_BALANCE_TEMP_LOG, ("TEMP%zd - %zd = %zd", now, last_gc_end_time_ms, time_since_last_gc_ms));

    // We want to get the min and the max timestamp for all procs because it helps with our post processing
    // to know how big an array to allocate to display the history inbetween the GCs.
    uint64_t min_timestamp = 0xffffffffffffffff;
    uint64_t max_timestamp = 0;

    for (int numa_node_index = 0; numa_node_index < total_numa_nodes_on_machine; numa_node_index++)
    {
        heap_balance_info_proc* hb_info_procs = hb_info_numa_nodes[numa_node_index].hb_info_procs;
        for (int proc_index = 0; proc_index < (int)procs_per_numa_node; proc_index++)
        {
            heap_balance_info_proc* hb_info_proc = &hb_info_procs[proc_index];
            int total_entries_on_proc = hb_info_proc->index;

            if (total_entries_on_proc > 0)
            {
                min_timestamp = min (min_timestamp, hb_info_proc->hb_info[0].timestamp);
                max_timestamp = max (max_timestamp, hb_info_proc->hb_info[total_entries_on_proc - 1].timestamp);
            }
        }
    }

    dprintf (HEAP_BALANCE_LOG, ("[GCA#%zd %zd-%zd-%zd]",
        settings.gc_index, time_since_last_gc_ms, (min_timestamp - start_raw_ts), (max_timestamp - start_raw_ts)));

    if (last_hb_recorded_gc_index == (int)settings.gc_index)
    {
        GCToOSInterface::DebugBreak ();
    }

    last_hb_recorded_gc_index = (int)settings.gc_index;

    // When we print out the proc index we need to convert it to the actual proc index (this is contiguous).
    // It helps with post processing.
    for (int numa_node_index = 0; numa_node_index < total_numa_nodes_on_machine; numa_node_index++)
    {
        heap_balance_info_proc* hb_info_procs = hb_info_numa_nodes[numa_node_index].hb_info_procs;
        for (int proc_index = 0; proc_index < (int)procs_per_numa_node; proc_index++)
        {
            heap_balance_info_proc* hb_info_proc = &hb_info_procs[proc_index];
            int total_entries_on_proc = hb_info_proc->index;
            if (total_entries_on_proc > 0)
            {
                int total_exec_time_ms =
                    (int)((double)(hb_info_proc->hb_info[total_entries_on_proc - 1].timestamp -
                                   hb_info_proc->hb_info[0].timestamp) * qpf_ms);
                dprintf (HEAP_BALANCE_LOG, ("[p%d]-%d-%dms",
                    (proc_index + numa_node_index * procs_per_numa_node),
                    total_entries_on_proc, total_exec_time_ms));
            }

            for (int i = 0; i < hb_info_proc->index; i++)
            {
                heap_balance_info* hb_info = &hb_info_proc->hb_info[i];
                bool multiple_procs_p = false;
                bool alloc_count_p = true;
                bool set_ideal_p = false;
                int tid = hb_info->tid;
                int alloc_heap = hb_info->alloc_heap;

                if (tid & (1 << (sizeof (tid) * 8 - 1)))
                {
                    multiple_procs_p = true;
                    tid &= ~(1 << (sizeof (tid) * 8 - 1));
                }

                if (alloc_heap & (1 << (sizeof (alloc_heap) * 8 - 1)))
                {
                    alloc_count_p = false;
                    alloc_heap &= ~(1 << (sizeof (alloc_heap) * 8 - 1));
                }

                if (alloc_heap & (1 << (sizeof (alloc_heap) * 8 - 2)))
                {
                    set_ideal_p = true;
                    alloc_heap &= ~(1 << (sizeof (alloc_heap) * 8 - 2));
                }

                // TODO - This assumes ideal proc is in the same cpu group which is not true
                // when we don't have CPU groups.
                int ideal_proc_no = hb_info->ideal_proc_no;
                int ideal_node_no = -1;
                ideal_proc_no = get_proc_index_numa (ideal_proc_no, &ideal_node_no);
                ideal_proc_no = ideal_proc_no + ideal_node_no * procs_per_numa_node;

                dprintf (HEAP_BALANCE_LOG, ("%zd,%d,%d,%d%s%s%s",
                    (hb_info->timestamp - start_raw_ts),
                    tid,
                    ideal_proc_no,
                    (int)alloc_heap,
                    (multiple_procs_p ? "|m" : ""), (!alloc_count_p ? "|p" : ""), (set_ideal_p ? "|i" : "")));
            }
        }
    }

    for (int numa_node_index = 0; numa_node_index < total_numa_nodes_on_machine; numa_node_index++)
    {
        heap_balance_info_proc* hb_info_procs = hb_info_numa_nodes[numa_node_index].hb_info_procs;
        for (int proc_index = 0; proc_index < (int)procs_per_numa_node; proc_index++)
        {
            heap_balance_info_proc* hb_info_proc = &hb_info_procs[proc_index];
            hb_info_proc->index = 0;
        }
    }
#endif //HEAP_BALANCE_INSTRUMENTATION
}

// The format for this is
//
// [GC_alloc_mb]
// h0_new_alloc, h1_new_alloc, ...
//
void gc_heap::hb_log_new_allocation()
{
#ifdef HEAP_BALANCE_INSTRUMENTATION
    char* log_buffer = hb_log_buffer;

    int desired_alloc_mb = (int)(dd_desired_allocation (g_heaps[0]->dynamic_data_of (0)) / 1024 / 1024);

    int buffer_pos = sprintf_s (hb_log_buffer, hb_log_buffer_size, "[GC_alloc_mb]\n");
    for (int numa_node_index = 0; numa_node_index < heap_select::total_numa_nodes; numa_node_index++)
    {
        int node_allocated_mb = 0;

        // I'm printing out the budget here instead of the numa node index so we know how much
        // of the budget we consumed.
        buffer_pos += sprintf_s (hb_log_buffer + buffer_pos, hb_log_buffer_size - buffer_pos, "[N#%3d]",
            //numa_node_index);
            desired_alloc_mb);

        int heaps_on_node = heap_select::heaps_on_node[numa_node_index].heap_count;

        for (int heap_index = 0; heap_index < heaps_on_node; heap_index++)
        {
            int actual_heap_index = heap_index + numa_node_index * heaps_on_node;
            gc_heap* hp = g_heaps[actual_heap_index];
            dynamic_data* dd0 = hp->dynamic_data_of (0);
            int allocated_mb = (int)((dd_desired_allocation (dd0) - dd_new_allocation (dd0)) / 1024 / 1024);
            node_allocated_mb += allocated_mb;
            buffer_pos += sprintf_s (hb_log_buffer + buffer_pos, hb_log_buffer_size - buffer_pos, "%d,",
                allocated_mb);
        }

        dprintf (HEAP_BALANCE_TEMP_LOG, ("TEMPN#%d a %dmb(%dmb)",
            numa_node_index, node_allocated_mb, desired_alloc_mb));

        buffer_pos += sprintf_s (hb_log_buffer + buffer_pos, hb_log_buffer_size - buffer_pos, "\n");
    }

    dprintf (HEAP_BALANCE_LOG, ("%s", hb_log_buffer));
#endif //HEAP_BALANCE_INSTRUMENTATION
}

BOOL gc_heap::create_thread_support (int number_of_heaps)
{
    BOOL ret = FALSE;
    if (!gc_start_event.CreateOSManualEventNoThrow (FALSE))
    {
        goto cleanup;
    }
    if (!ee_suspend_event.CreateOSAutoEventNoThrow (FALSE))
    {
        goto cleanup;
    }
    if (!gc_t_join.init (number_of_heaps, join_flavor_server_gc))
    {
        goto cleanup;
    }

    ret = TRUE;

cleanup:

    if (!ret)
    {
        destroy_thread_support();
    }

    return ret;
}

void gc_heap::destroy_thread_support ()
{
    if (ee_suspend_event.IsValid())
    {
        ee_suspend_event.CloseEvent();
    }
    if (gc_start_event.IsValid())
    {
        gc_start_event.CloseEvent();
    }
}

void set_thread_affinity_for_heap (int heap_number, uint16_t proc_no)
{
    if (!GCToOSInterface::SetThreadAffinity (proc_no))
    {
        dprintf (1, ("Failed to set thread affinity for GC thread %d on proc #%d", heap_number, proc_no));
    }
}

bool gc_heap::create_gc_thread ()
{
    dprintf (3, ("Creating gc thread\n"));
    return GCToEEInterface::CreateThread(gc_thread_stub, this, false, ".NET Server GC");
}

#ifdef _MSC_VER
#pragma warning(disable:4715) //IA64 xcompiler recognizes that without the 'break;' the while(1) will never end and therefore not return a value for that code path
#endif //_MSC_VER
void gc_heap::gc_thread_function ()
{
    assert (gc_done_event.IsValid());
    assert (gc_start_event.IsValid());
    dprintf (3, ("gc thread started"));

    heap_select::init_cpu_mapping(heap_number);

    while (1)
    {
#ifdef DYNAMIC_HEAP_COUNT
        if (gc_heap::dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes)
        {
            // Inactive GC threads may observe gc_t_join.joined() being true here.
            // Before the 1st GC happens, h0's GC thread can also observe gc_t_join.joined() being true because it's
            // also inactive as the main thread (that inits the GC) will act as h0 (to call change_heap_count).
            assert (((heap_number == 0) && (VolatileLoadWithoutBarrier (&settings.gc_index) == 0)) ||
                    (n_heaps <= heap_number) ||
                    !gc_t_join.joined());
        }
        else
#endif //DYNAMIC_HEAP_COUNT
        {
            assert (!gc_t_join.joined());
        }

        if (heap_number == 0)
        {
            bool wait_on_time_out_p = gradual_decommit_in_progress_p;
            uint32_t wait_time = DECOMMIT_TIME_STEP_MILLISECONDS;
#ifdef DYNAMIC_HEAP_COUNT
            // background_running_p can only change from false to true during suspension.
            if (
#ifdef BACKGROUND_GC
                !gc_heap::background_running_p () &&
#endif
                dynamic_heap_count_data.should_change_heap_count)
            {
                assert (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes);

                wait_on_time_out_p = true;
                dynamic_heap_count_data_t::sample& sample = dynamic_heap_count_data.samples[dynamic_heap_count_data.sample_index];
                wait_time = min (wait_time, (uint32_t)(sample.elapsed_between_gcs / 1000 / 3));
                wait_time = max (wait_time, 1u);

                dprintf (6666, ("gc#0 thread waiting for %d ms (betwen GCs %I64d)", wait_time, sample.elapsed_between_gcs));
            }
#endif //DYNAMIC_HEAP_COUNT
            uint32_t wait_result = gc_heap::ee_suspend_event.Wait(wait_on_time_out_p ? wait_time : INFINITE, FALSE);
#ifdef DYNAMIC_HEAP_COUNT
            dprintf (9999, ("waiting for ee done res %d (timeout %d, %I64d ms since last suspend end)(should_change_heap_count is %d) (gradual_decommit_in_progress_p %d)",
                wait_result, wait_time, ((GetHighPrecisionTimeStamp() - last_suspended_end_time) / 1000),
                dynamic_heap_count_data.should_change_heap_count, gradual_decommit_in_progress_p));
#endif //DYNAMIC_HEAP_COUNT
            if (wait_result == WAIT_TIMEOUT)
            {
#ifdef DYNAMIC_HEAP_COUNT
                if (dynamic_heap_count_data.should_change_heap_count)
                {
#ifdef BACKGROUND_GC
                    if (!gc_heap::background_running_p ())
#endif //BACKGROUND_GC
                    {
                        dprintf (6666, ("changing heap count due to timeout"));
                        add_to_hc_history (hc_record_before_check_timeout);
                        check_heap_count();
                    }
                }
#endif //DYNAMIC_HEAP_COUNT

                if (gradual_decommit_in_progress_p)
                {
#ifdef COMMITTED_BYTES_SHADOW
                    decommit_lock.Enter ();
#endif //COMMITTED_BYTES_SHADOW
                    gradual_decommit_in_progress_p = decommit_step (DECOMMIT_TIME_STEP_MILLISECONDS);
#ifdef COMMITTED_BYTES_SHADOW
                    decommit_lock.Leave ();
#endif //COMMITTED_BYTES_SHADOW
                }
                continue;
            }

#ifdef DYNAMIC_HEAP_COUNT
            // We might want to consider also doing this when a BGC finishes.
            if (dynamic_heap_count_data.should_change_heap_count)
            {
#ifdef BACKGROUND_GC
                if (!gc_heap::background_running_p ())
#endif //BACKGROUND_GC
                {
                    // this was a request to do a GC so make sure we follow through with one.
                    dprintf (6666, ("changing heap count at a GC start"));
                    add_to_hc_history (hc_record_before_check_gc_start);
                    check_heap_count ();
                }
            }

            // wait till the threads that should have gone idle at least reached the place where they are about to wait on the idle event.
            if ((gc_heap::dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes) &&
                (n_heaps != dynamic_heap_count_data.last_n_heaps))
            {
                int spin_count = 1024;
                int idle_thread_count = n_max_heaps - n_heaps;
                dprintf (9999, ("heap count changed %d->%d, idle should be %d and is %d", dynamic_heap_count_data.last_n_heaps, n_heaps,
                    idle_thread_count, VolatileLoadWithoutBarrier (&dynamic_heap_count_data.idle_thread_count)));
                if (idle_thread_count != dynamic_heap_count_data.idle_thread_count)
                {
                    spin_and_wait (spin_count, (idle_thread_count == dynamic_heap_count_data.idle_thread_count));
                    dprintf (9999, ("heap count changed %d->%d, now idle is %d", dynamic_heap_count_data.last_n_heaps, n_heaps,
                        VolatileLoadWithoutBarrier (&dynamic_heap_count_data.idle_thread_count)));
                }

                add_to_hc_history (hc_record_set_last_heaps);

                dynamic_heap_count_data.last_n_heaps = n_heaps;
            }
#endif //DYNAMIC_HEAP_COUNT

            suspended_start_time = GetHighPrecisionTimeStamp();
            BEGIN_TIMING(suspend_ee_during_log);
            dprintf (9999, ("h0 suspending EE in GC!"));
            GCToEEInterface::SuspendEE(SUSPEND_FOR_GC);
            dprintf (9999, ("h0 suspended EE in GC!"));
            END_TIMING(suspend_ee_during_log);

            proceed_with_gc_p = TRUE;

            if (!should_proceed_with_gc())
            {
                update_collection_counts_for_no_gc();
                proceed_with_gc_p = FALSE;
            }
            else
            {
                settings.init_mechanisms();
#ifdef DYNAMIC_HEAP_COUNT
                if (gc_heap::dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes)
                {
                    // make sure the other gc threads cannot see this as a request to change heap count
                    // see explanation below about the cases when we return from gc_start_event.Wait
                    assert (dynamic_heap_count_data.new_n_heaps == n_heaps);
                }
#endif //DYNAMIC_HEAP_COUNT
                dprintf (9999, ("GC thread %d setting_gc_start_in_gc(h%d)", heap_number, n_heaps));
                gc_start_event.Set();
            }
            dprintf (3, (ThreadStressLog::gcServerThread0StartMsg(), heap_number));
        }
        else
        {
            dprintf (9999, ("GC thread %d waiting_for_gc_start(%d)(gc%Id)", heap_number, n_heaps, VolatileLoadWithoutBarrier(&settings.gc_index)));
            gc_start_event.Wait(INFINITE, FALSE);
#ifdef DYNAMIC_HEAP_COUNT
            dprintf (9999, ("GC thread %d waiting_done_gc_start(%d-%d)(i: %d)(gc%Id)",
                heap_number, n_heaps, dynamic_heap_count_data.new_n_heaps, dynamic_heap_count_data.init_only_p, VolatileLoadWithoutBarrier (&settings.gc_index)));

            if ((gc_heap::dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes) &&
                (dynamic_heap_count_data.new_n_heaps != n_heaps))
            {
                // The reason why we need to do this is -
                // + for threads that were participating, we need them to do work for change_heap_count
                // + for threads that were not participating but will need to participate, we need to make sure they are woken now instead of
                // randomly sometime later.
                int old_n_heaps = n_heaps;
                int new_n_heaps = dynamic_heap_count_data.new_n_heaps;
                int num_threads_to_wake = max (new_n_heaps, old_n_heaps);
                if (heap_number < num_threads_to_wake)
                {
                    dprintf (9999, ("h%d < %d, calling change", heap_number, num_threads_to_wake));
                    change_heap_count (dynamic_heap_count_data.new_n_heaps);
                    if (new_n_heaps < old_n_heaps)
                    {
                        dprintf (9999, ("h%d after change", heap_number));
                        // at the end of change_heap_count we've changed join's heap count to the new one if it's smaller. So we need to make sure
                        // only that many threads will participate in the following GCs.
                        if (heap_number < new_n_heaps)
                        {
                            add_to_hc_history (hc_record_still_active);
                            dprintf (9999, ("h%d < %d participating (dec)", heap_number, new_n_heaps));
                        }
                        else
                        {
                            Interlocked::Increment (&dynamic_heap_count_data.idle_thread_count);
                            add_to_hc_history (hc_record_became_inactive);

                            dprintf (9999, ("GC thread %d wait_on_idle(%d < %d)(gc%Id), total idle %d", heap_number, old_n_heaps, new_n_heaps,
                                VolatileLoadWithoutBarrier (&settings.gc_index), VolatileLoadWithoutBarrier (&dynamic_heap_count_data.idle_thread_count)));
                            gc_idle_thread_event.Wait (INFINITE, FALSE);
                            dprintf (9999, ("GC thread %d waking_from_idle(%d)(gc%Id) after doing change", heap_number, n_heaps, VolatileLoadWithoutBarrier (&settings.gc_index)));
                        }
                    }
                    else
                    {
                        add_to_hc_history ((heap_number < old_n_heaps) ? hc_record_still_active : hc_record_became_active);
                        dprintf (9999, ("h%d < %d participating (inc)", heap_number, new_n_heaps));
                    }
                }
                else
                {
                    Interlocked::Increment (&dynamic_heap_count_data.idle_thread_count);
                    add_to_hc_history (hc_record_inactive_waiting);
                    dprintf (9999, ("GC thread %d wait_on_idle(< max %d)(gc%Id), total  idle %d", heap_number, num_threads_to_wake,
                        VolatileLoadWithoutBarrier (&settings.gc_index), VolatileLoadWithoutBarrier (&dynamic_heap_count_data.idle_thread_count)));
                    gc_idle_thread_event.Wait (INFINITE, FALSE);
                    dprintf (9999, ("GC thread %d waking_from_idle(%d)(gc%Id)", heap_number, n_heaps, VolatileLoadWithoutBarrier (&settings.gc_index)));
                }

                continue;
            }
#endif //DYNAMIC_HEAP_COUNT
            dprintf (3, (ThreadStressLog::gcServerThreadNStartMsg(), heap_number));
        }

        assert ((heap_number == 0) || proceed_with_gc_p);

        if (proceed_with_gc_p)
        {
            garbage_collect (GCHeap::GcCondemnedGeneration);

            if (pm_trigger_full_gc)
            {
                garbage_collect_pm_full_gc();
            }
        }

        if (heap_number == 0)
        {
            if (proceed_with_gc_p && (!settings.concurrent))
            {
                do_post_gc();
            }

#ifdef BACKGROUND_GC
            recover_bgc_settings();
#endif //BACKGROUND_GC

#ifdef MULTIPLE_HEAPS
#ifdef STRESS_DYNAMIC_HEAP_COUNT
            dynamic_heap_count_data.lowest_heap_with_msl_uoh = -1;
#endif //STRESS_DYNAMIC_HEAP_COUNT
            for (int i = 0; i < gc_heap::n_heaps; i++)
            {
                gc_heap* hp = gc_heap::g_heaps[i];
                leave_spin_lock(&hp->more_space_lock_soh);

#ifdef STRESS_DYNAMIC_HEAP_COUNT
                if ((dynamic_heap_count_data.lowest_heap_with_msl_uoh == -1) && (hp->uoh_msl_before_gc_p))
                {
                    dynamic_heap_count_data.lowest_heap_with_msl_uoh = i;
                }

                if (hp->uoh_msl_before_gc_p)
                {
                    dprintf (5555, ("h%d uoh msl was taken before GC", i));
                    hp->uoh_msl_before_gc_p = false;
                }
#endif //STRESS_DYNAMIC_HEAP_COUNT
            }
#endif //MULTIPLE_HEAPS

            gc_heap::gc_started = FALSE;

#ifdef BACKGROUND_GC
            gc_heap::add_bgc_pause_duration_0();
#endif //BACKGROUND_GC
            BEGIN_TIMING(restart_ee_during_log);
            GCToEEInterface::RestartEE(TRUE);
            END_TIMING(restart_ee_during_log);
            process_sync_log_stats();

            dprintf (SPINLOCK_LOG, ("GC Lgc"));
            leave_spin_lock (&gc_heap::gc_lock);

            gc_heap::internal_gc_done = true;

            if (proceed_with_gc_p)
                set_gc_done();
            else
            {
                // If we didn't actually do a GC, it means we didn't wait up the other threads,
                // we still need to set the gc_done_event for those threads.
                for (int i = 0; i < gc_heap::n_heaps; i++)
                {
                    gc_heap* hp = gc_heap::g_heaps[i];
                    hp->set_gc_done();
                }
            }

            // check if we should do some decommitting
            if (gradual_decommit_in_progress_p)
            {
#ifdef COMMITTED_BYTES_SHADOW
                decommit_lock.Enter ();
#endif //COMMITTED_BYTES_SHADOW
                gradual_decommit_in_progress_p = decommit_step (DECOMMIT_TIME_STEP_MILLISECONDS);
#ifdef COMMITTED_BYTES_SHADOW
                decommit_lock.Leave ();
#endif //COMMITTED_BYTES_SHADOW
            }
        }
        else
        {
            int spin_count = 32 * (gc_heap::n_heaps - 1);

            // wait until RestartEE has progressed to a stage where we can restart user threads
            while (!gc_heap::internal_gc_done && !GCHeap::SafeToRestartManagedThreads())
            {
                spin_and_switch (spin_count, (gc_heap::internal_gc_done || GCHeap::SafeToRestartManagedThreads()));
            }
            set_gc_done();
        }
    }
}
#ifdef _MSC_VER
#pragma warning(default:4715) //IA64 xcompiler recognizes that without the 'break;' the while(1) will never end and therefore not return a value for that code path
#endif //_MSC_VER

#endif //MULTIPLE_HEAPS

bool gc_heap::virtual_alloc_commit_for_heap (void* addr, size_t size, int h_number)
{
#ifdef MULTIPLE_HEAPS
    if (GCToOSInterface::CanEnableGCNumaAware())
    {
        uint16_t numa_node = heap_select::find_numa_node_from_heap_no(h_number);
        if (GCToOSInterface::VirtualCommit (addr, size, numa_node))
            return true;
    }
#else //MULTIPLE_HEAPS
    UNREFERENCED_PARAMETER(h_number);
#endif //MULTIPLE_HEAPS

    //numa aware not enabled, or call failed --> fallback to VirtualCommit()
    return GCToOSInterface::VirtualCommit(addr, size);
}

bool gc_heap::virtual_commit (void* address, size_t size, int bucket, int h_number, bool* hard_limit_exceeded_p)
{
    /**
     * Here are all the possible cases for the commits:
     *
     * Case 1: This is for a particular generation - the bucket will be one of the gc_oh_num != unknown, and the h_number will be the right heap
     * Case 2: This is for bookkeeping - the bucket will be recorded_committed_bookkeeping_bucket, and the h_number will be -1
     *
     * Note  : We never commit into free directly, so bucket != recorded_committed_free_bucket
     */

    assert(0 <= bucket && bucket < recorded_committed_bucket_counts);
    assert(bucket < total_oh_count || h_number == -1);
#ifdef USE_REGIONS
    assert(bucket != recorded_committed_free_bucket);
#endif //USE_REGIONS

    dprintf(3, ("commit-accounting:  commit in %d [%p, %p) for heap %d", bucket, address, ((uint8_t*)address + size), h_number));
    bool should_count =
#ifdef USE_REGIONS
        true;
#else
        (bucket != recorded_committed_ignored_bucket);
#endif //USE_REGIONS

    if (should_count)
    {
        check_commit_cs.Enter();
        bool exceeded_p = false;

        if (heap_hard_limit_oh[soh] != 0)
        {
            if ((bucket < total_oh_count) && (committed_by_oh[bucket] + size) > heap_hard_limit_oh[bucket])
            {
                exceeded_p = true;
            }
        }
        else
        {
            size_t base = current_total_committed;
            size_t limit = heap_hard_limit;

            if ((base + size) > limit)
            {
                dprintf (2, ("%zd + %zd = %zd > limit %zd ", base, size, (base + size), limit));
                exceeded_p = true;
            }
        }

        if (!heap_hard_limit) {
            exceeded_p = false;
        }

        if (!exceeded_p)
        {
#if defined(MULTIPLE_HEAPS) && defined(_DEBUG)
            if ((h_number != -1) && (bucket < total_oh_count))
            {
                g_heaps[h_number]->committed_by_oh_per_heap[bucket] += size;
            }
#endif // MULTIPLE_HEAPS && _DEBUG
            committed_by_oh[bucket] += size;
            current_total_committed += size;
            if (h_number < 0)
                current_total_committed_bookkeeping += size;
        }

        check_commit_cs.Leave();

        if (hard_limit_exceeded_p)
            *hard_limit_exceeded_p = exceeded_p;

        if (exceeded_p)
        {
            dprintf (1, ("can't commit %zx for %zd bytes > HARD LIMIT %zd", (size_t)address, size, heap_hard_limit));
            return false;
        }
    }

    // If it's a valid heap number it means it's commiting for memory on the GC heap.
    // In addition if large pages is enabled, we set commit_succeeded_p to true because memory is already committed.
    bool commit_succeeded_p = ((h_number >= 0) ? (use_large_pages_p ? true :
                              virtual_alloc_commit_for_heap (address, size, h_number)) :
                              GCToOSInterface::VirtualCommit(address, size));

    if (!commit_succeeded_p && should_count)
    {
        check_commit_cs.Enter();
        committed_by_oh[bucket] -= size;
#if defined(MULTIPLE_HEAPS) && defined(_DEBUG)
        if ((h_number != -1) && (bucket < total_oh_count))
        {
            assert (g_heaps[h_number]->committed_by_oh_per_heap[bucket] >= size);
            g_heaps[h_number]->committed_by_oh_per_heap[bucket] -= size;
        }
#endif // MULTIPLE_HEAPS && _DEBUG
        dprintf (1, ("commit failed, updating %zd to %zd",
                current_total_committed, (current_total_committed - size)));
        current_total_committed -= size;
        if (h_number < 0)
        {
            assert (current_total_committed_bookkeeping >= size);
            current_total_committed_bookkeeping -= size;
        }

        check_commit_cs.Leave();
    }
    return commit_succeeded_p;
}

void gc_heap::reduce_committed_bytes (void* address, size_t size, int bucket, int h_number, bool decommit_succeeded_p)
{
    assert(0 <= bucket && bucket < recorded_committed_bucket_counts);
    assert(bucket < total_oh_count || h_number == -1);

    dprintf(3, ("commit-accounting:  decommit in %d [%p, %p) for heap %d", bucket, address, ((uint8_t*)address + size), h_number));

#ifndef USE_REGIONS
    if (bucket != recorded_committed_ignored_bucket)
#endif
    if (decommit_succeeded_p)
    {
        check_commit_cs.Enter();
        assert (committed_by_oh[bucket] >= size);
        committed_by_oh[bucket] -= size;
#if defined(MULTIPLE_HEAPS) && defined(_DEBUG)
        if ((h_number != -1) && (bucket < total_oh_count))
        {
            assert (g_heaps[h_number]->committed_by_oh_per_heap[bucket] >= size);
            g_heaps[h_number]->committed_by_oh_per_heap[bucket] -= size;
        }
#endif // MULTIPLE_HEAPS && _DEBUG
        assert (current_total_committed >= size);
        current_total_committed -= size;
        if (bucket == recorded_committed_bookkeeping_bucket)
        {
            assert (current_total_committed_bookkeeping >= size);
            current_total_committed_bookkeeping -= size;
        }
        check_commit_cs.Leave();
    }
}

bool gc_heap::virtual_decommit (void* address, size_t size, int bucket, int h_number)
{
    /**
     * Here are all possible cases for the decommits:
     *
     * Case 1: This is for a particular generation - the bucket will be one of the gc_oh_num != unknown, and the h_number will be the right heap
     * Case 2: This is for bookkeeping - the bucket will be recorded_committed_bookkeeping_bucket, and the h_number will be -1
     * Case 3: This is for free - the bucket will be recorded_committed_free_bucket, and the h_number will be -1
     */

    bool decommit_succeeded_p = ((bucket != recorded_committed_bookkeeping_bucket) && use_large_pages_p) ? true : GCToOSInterface::VirtualDecommit (address, size);

    reduce_committed_bytes (address, size, bucket, h_number, decommit_succeeded_p);

    return decommit_succeeded_p;
}

void gc_heap::virtual_free (void* add, size_t allocated_size, heap_segment* sg)
{
    bool release_succeeded_p = GCToOSInterface::VirtualRelease (add, allocated_size);
    if (release_succeeded_p)
    {
        reserved_memory -= allocated_size;
        dprintf (2, ("Virtual Free size %zd: [%zx, %zx[",
                    allocated_size, (size_t)add, (size_t)((uint8_t*)add + allocated_size)));
    }
}

class mark
{
public:
    uint8_t* first;
    size_t len;

    // If we want to save space we can have a pool of plug_and_gap's instead of
    // always having 2 allocated for each pinned plug.
    gap_reloc_pair saved_pre_plug;
    // If we decide to not compact, we need to restore the original values.
    gap_reloc_pair saved_pre_plug_reloc;

    gap_reloc_pair saved_post_plug;

    // Supposedly Pinned objects cannot have references but we are seeing some from pinvoke
    // frames. Also if it's an artificially pinned plug created by us, it can certainly
    // have references.
    // We know these cases will be rare so we can optimize this to be only allocated on demand.
    gap_reloc_pair saved_post_plug_reloc;

    // We need to calculate this after we are done with plan phase and before compact
    // phase because compact phase will change the bricks so relocate_address will no
    // longer work.
    uint8_t* saved_pre_plug_info_reloc_start;

    // We need to save this because we will have no way to calculate it, unlike the
    // pre plug info start which is right before this plug.
    uint8_t* saved_post_plug_info_start;

#ifdef SHORT_PLUGS
    uint8_t* allocation_context_start_region;
#endif //SHORT_PLUGS

    // How the bits in these bytes are organized:
    // MSB --> LSB
    // bit to indicate whether it's a short obj | 3 bits for refs in this short obj | 2 unused bits | bit to indicate if it's collectible | last bit
    // last bit indicates if there's pre or post info associated with this plug. If it's not set all other bits will be 0.
    BOOL saved_pre_p;
    BOOL saved_post_p;

#ifdef _DEBUG
    // We are seeing this is getting corrupted for a PP with a NP after.
    // Save it when we first set it and make sure it doesn't change.
    gap_reloc_pair saved_post_plug_debug;
#endif //_DEBUG

    size_t get_max_short_bits()
    {
        return (sizeof (gap_reloc_pair) / sizeof (uint8_t*));
    }

    // pre bits
    size_t get_pre_short_start_bit ()
    {
        return (sizeof (saved_pre_p) * 8 - 1 - (sizeof (gap_reloc_pair) / sizeof (uint8_t*)));
    }

    BOOL pre_short_p()
    {
        return (saved_pre_p & (1 << (sizeof (saved_pre_p) * 8 - 1)));
    }

    void set_pre_short()
    {
        saved_pre_p |= (1 << (sizeof (saved_pre_p) * 8 - 1));
    }

    void set_pre_short_bit (size_t bit)
    {
        saved_pre_p |= 1 << (get_pre_short_start_bit() + bit);
    }

    BOOL pre_short_bit_p (size_t bit)
    {
        return (saved_pre_p & (1 << (get_pre_short_start_bit() + bit)));
    }

#ifdef COLLECTIBLE_CLASS
    void set_pre_short_collectible()
    {
        saved_pre_p |= 2;
    }

    BOOL pre_short_collectible_p()
    {
        return (saved_pre_p & 2);
    }
#endif //COLLECTIBLE_CLASS

    // post bits
    size_t get_post_short_start_bit ()
    {
        return (sizeof (saved_post_p) * 8 - 1 - (sizeof (gap_reloc_pair) / sizeof (uint8_t*)));
    }

    BOOL post_short_p()
    {
        return (saved_post_p & (1 << (sizeof (saved_post_p) * 8 - 1)));
    }

    void set_post_short()
    {
        saved_post_p |= (1 << (sizeof (saved_post_p) * 8 - 1));
    }

    void set_post_short_bit (size_t bit)
    {
        saved_post_p |= 1 << (get_post_short_start_bit() + bit);
    }

    BOOL post_short_bit_p (size_t bit)
    {
        return (saved_post_p & (1 << (get_post_short_start_bit() + bit)));
    }

#ifdef COLLECTIBLE_CLASS
    void set_post_short_collectible()
    {
        saved_post_p |= 2;
    }

    BOOL post_short_collectible_p()
    {
        return (saved_post_p & 2);
    }
#endif //COLLECTIBLE_CLASS

    uint8_t* get_plug_address() { return first; }

    BOOL has_pre_plug_info() { return saved_pre_p; }
    BOOL has_post_plug_info() { return saved_post_p; }

    gap_reloc_pair* get_pre_plug_reloc_info() { return &saved_pre_plug_reloc; }
    gap_reloc_pair* get_post_plug_reloc_info() { return &saved_post_plug_reloc; }
    void set_pre_plug_info_reloc_start (uint8_t* reloc) { saved_pre_plug_info_reloc_start = reloc; }
    uint8_t* get_post_plug_info_start() { return saved_post_plug_info_start; }

    // We need to temporarily recover the shortened plugs for compact phase so we can
    // copy over the whole plug and their related info (mark bits/cards). But we will
    // need to set the artificial gap back so compact phase can keep reading the plug info.
    // We also need to recover the saved info because we'll need to recover it later.
    //
    // So we would call swap_p*_plug_and_saved once to recover the object info; then call
    // it again to recover the artificial gap.
    void swap_pre_plug_and_saved()
    {
        gap_reloc_pair temp;
        memcpy (&temp, (first - sizeof (plug_and_gap)), sizeof (temp));
        memcpy ((first - sizeof (plug_and_gap)), &saved_pre_plug_reloc, sizeof (saved_pre_plug_reloc));
        saved_pre_plug_reloc = temp;
    }

    void swap_post_plug_and_saved()
    {
        gap_reloc_pair temp;
        memcpy (&temp, saved_post_plug_info_start, sizeof (temp));
        memcpy (saved_post_plug_info_start, &saved_post_plug_reloc, sizeof (saved_post_plug_reloc));
        saved_post_plug_reloc = temp;
    }

    void swap_pre_plug_and_saved_for_profiler()
    {
        gap_reloc_pair temp;
        memcpy (&temp, (first - sizeof (plug_and_gap)), sizeof (temp));
        memcpy ((first - sizeof (plug_and_gap)), &saved_pre_plug, sizeof (saved_pre_plug));
        saved_pre_plug = temp;
    }

    void swap_post_plug_and_saved_for_profiler()
    {
        gap_reloc_pair temp;
        memcpy (&temp, saved_post_plug_info_start, sizeof (temp));
        memcpy (saved_post_plug_info_start, &saved_post_plug, sizeof (saved_post_plug));
        saved_post_plug = temp;
    }

    // We should think about whether it's really necessary to have to copy back the pre plug
    // info since it was already copied during compacting plugs. But if a plug doesn't move
    // by >= 3 ptr size (the size of gap_reloc_pair), it means we'd have to recover pre plug info.
    size_t recover_plug_info()
    {
        // We need to calculate the size for sweep case in order to correctly record the
        // free_obj_space - sweep would've made these artificial gaps into free objects and
        // we would need to deduct the size because now we are writing into those free objects.
        size_t recovered_sweep_size = 0;

        if (saved_pre_p)
        {
            if (gc_heap::settings.compaction)
            {
                dprintf (3, ("%p: REC Pre: %p-%p",
                    first,
                    &saved_pre_plug_reloc,
                    saved_pre_plug_info_reloc_start));
                memcpy (saved_pre_plug_info_reloc_start, &saved_pre_plug_reloc, sizeof (saved_pre_plug_reloc));
            }
            else
            {
                dprintf (3, ("%p: REC Pre: %p-%p",
                    first,
                    &saved_pre_plug,
                    (first - sizeof (plug_and_gap))));
                memcpy ((first - sizeof (plug_and_gap)), &saved_pre_plug, sizeof (saved_pre_plug));
                recovered_sweep_size += sizeof (saved_pre_plug);
            }
        }

        if (saved_post_p)
        {
            if (gc_heap::settings.compaction)
            {
                dprintf (3, ("%p: REC Post: %p-%p",
                    first,
                    &saved_post_plug_reloc,
                    saved_post_plug_info_start));
                memcpy (saved_post_plug_info_start, &saved_post_plug_reloc, sizeof (saved_post_plug_reloc));
            }
            else
            {
                dprintf (3, ("%p: REC Post: %p-%p",
                    first,
                    &saved_post_plug,
                    saved_post_plug_info_start));
                memcpy (saved_post_plug_info_start, &saved_post_plug, sizeof (saved_post_plug));
                recovered_sweep_size += sizeof (saved_post_plug);
            }
        }

        return recovered_sweep_size;
    }
};


void gc_mechanisms::init_mechanisms()
{
    condemned_generation = 0;
    promotion = FALSE;//TRUE;
    compaction = TRUE;
#ifdef FEATURE_LOH_COMPACTION
    loh_compaction = gc_heap::loh_compaction_requested();
#else
    loh_compaction = FALSE;
#endif //FEATURE_LOH_COMPACTION
    heap_expansion = FALSE;
    concurrent = FALSE;
    demotion = FALSE;
    elevation_reduced = FALSE;
    found_finalizers = FALSE;
#ifdef BACKGROUND_GC
    background_p = gc_heap::background_running_p() != FALSE;
#endif //BACKGROUND_GC

    entry_memory_load = 0;
    entry_available_physical_mem = 0;
    exit_memory_load = 0;

#ifdef STRESS_HEAP
    stress_induced = FALSE;
#endif // STRESS_HEAP
}

void gc_mechanisms::first_init()
{
    gc_index = 0;
    gen0_reduction_count = 0;
    should_lock_elevation = FALSE;
    elevation_locked_count = 0;
    reason = reason_empty;
#ifdef BACKGROUND_GC
    pause_mode = gc_heap::gc_can_use_concurrent ? pause_interactive : pause_batch;
#ifdef _DEBUG
    int debug_pause_mode = static_cast<int>(GCConfig::GetLatencyMode());
    if (debug_pause_mode >= 0)
    {
        assert (debug_pause_mode <= pause_sustained_low_latency);
        pause_mode = (gc_pause_mode)debug_pause_mode;
    }
#endif //_DEBUG
#else //BACKGROUND_GC
    pause_mode = pause_batch;
#endif //BACKGROUND_GC

    init_mechanisms();
}

void gc_mechanisms::record (gc_history_global* history)
{
#ifdef MULTIPLE_HEAPS
    history->num_heaps = gc_heap::n_heaps;
#else
    history->num_heaps = 1;
#endif //MULTIPLE_HEAPS

    history->condemned_generation = condemned_generation;
    history->gen0_reduction_count = gen0_reduction_count;
    history->reason = reason;
    history->pause_mode = (int)pause_mode;
    history->mem_pressure = entry_memory_load;
    history->global_mechanisms_p = 0;

    // start setting the boolean values.
    if (concurrent)
        history->set_mechanism_p (global_concurrent);

    if (compaction)
        history->set_mechanism_p (global_compaction);

    if (promotion)
        history->set_mechanism_p (global_promotion);

    if (demotion)
        history->set_mechanism_p (global_demotion);

    if (card_bundles)
        history->set_mechanism_p (global_card_bundles);

    if (elevation_reduced)
        history->set_mechanism_p (global_elevation);
}

/**********************************
   called at the beginning of GC to fix the allocated size to
   what is really allocated, or to turn the free area into an unused object
   It needs to be called after all of the other allocation contexts have been
   fixed since it relies on alloc_allocated.
 ********************************/

//for_gc_p indicates that the work is being done for GC,
//as opposed to concurrent heap verification
void gc_heap::fix_youngest_allocation_area()
{
    // The gen 0 alloc context is never used for allocation in the allocator path. It's
    // still used in the allocation path during GCs.
    assert (generation_allocation_pointer (youngest_generation) == nullptr);
    assert (generation_allocation_limit (youngest_generation) == nullptr);
    heap_segment_allocated (ephemeral_heap_segment) = alloc_allocated;
    assert (heap_segment_mem (ephemeral_heap_segment) <= heap_segment_allocated (ephemeral_heap_segment));
    assert (heap_segment_allocated (ephemeral_heap_segment) <= heap_segment_reserved (ephemeral_heap_segment));
}

//for_gc_p indicates that the work is being done for GC,
//as opposed to concurrent heap verification
void gc_heap::fix_allocation_context (alloc_context* acontext, BOOL for_gc_p,
                                      BOOL record_ac_p)
{
    dprintf (3, ("Fixing allocation context %zx: ptr: %zx, limit: %zx",
                 (size_t)acontext,
                 (size_t)acontext->alloc_ptr, (size_t)acontext->alloc_limit));

    if (acontext->alloc_ptr == 0)
    {
        return;
    }
    int align_const = get_alignment_constant (TRUE);
#ifdef USE_REGIONS
    bool is_ephemeral_heap_segment = in_range_for_segment (acontext->alloc_limit, ephemeral_heap_segment);
#else // USE_REGIONS
    bool is_ephemeral_heap_segment = true;
#endif // USE_REGIONS
    if ((!is_ephemeral_heap_segment) || ((size_t)(alloc_allocated - acontext->alloc_limit) > Align (min_obj_size, align_const)) ||
        !for_gc_p)
    {
        uint8_t*  point = acontext->alloc_ptr;
        size_t  size = (acontext->alloc_limit - acontext->alloc_ptr);
        // the allocation area was from the free list
        // it was shortened by Align (min_obj_size) to make room for
        // at least the shortest unused object
        size += Align (min_obj_size, align_const);
        assert ((size >= Align (min_obj_size)));

        dprintf(3,("Making unused area [%zx, %zx[", (size_t)point,
                    (size_t)point + size ));
        make_unused_array (point, size);

        if (for_gc_p)
        {
            generation_free_obj_space (generation_of (0)) += size;
            if (record_ac_p)
                alloc_contexts_used ++;
        }
    }
    else if (for_gc_p)
    {
        assert (is_ephemeral_heap_segment);
        alloc_allocated = acontext->alloc_ptr;
        assert (heap_segment_allocated (ephemeral_heap_segment) <=
                heap_segment_committed (ephemeral_heap_segment));
        if (record_ac_p)
            alloc_contexts_used ++;
    }

    if (for_gc_p)
    {
        // We need to update the alloc_bytes to reflect the portion that we have not used
        acontext->alloc_bytes -= (acontext->alloc_limit - acontext->alloc_ptr);
        total_alloc_bytes_soh -= (acontext->alloc_limit - acontext->alloc_ptr);

        acontext->alloc_ptr = 0;
        acontext->alloc_limit = acontext->alloc_ptr;
    }
}

//used by the heap verification for concurrent gc.
//it nulls out the words set by fix_allocation_context for heap_verification
void repair_allocation (gc_alloc_context* acontext, void*)
{
    uint8_t*  point = acontext->alloc_ptr;

    if (point != 0)
    {
        dprintf (3, ("Clearing [%zx, %zx[", (size_t)acontext->alloc_ptr,
                     (size_t)acontext->alloc_limit+Align(min_obj_size)));
        memclr (acontext->alloc_ptr - plug_skew,
                (acontext->alloc_limit - acontext->alloc_ptr)+Align (min_obj_size));
    }
}

void void_allocation (gc_alloc_context* acontext, void*)
{
    uint8_t*  point = acontext->alloc_ptr;

    if (point != 0)
    {
        dprintf (3, ("Void [%zx, %zx[", (size_t)acontext->alloc_ptr,
                     (size_t)acontext->alloc_limit+Align(min_obj_size)));
        acontext->alloc_ptr = 0;
        acontext->alloc_limit = acontext->alloc_ptr;
    }
}

void gc_heap::repair_allocation_contexts (BOOL repair_p)
{
    GCToEEInterface::GcEnumAllocContexts (repair_p ? repair_allocation : void_allocation, NULL);
}

struct fix_alloc_context_args
{
    BOOL for_gc_p;
    void* heap;
};

void fix_alloc_context (gc_alloc_context* acontext, void* param)
{
    fix_alloc_context_args* args = (fix_alloc_context_args*)param;
    g_theGCHeap->FixAllocContext(acontext, (void*)(size_t)(args->for_gc_p), args->heap);
}

void gc_heap::fix_allocation_contexts (BOOL for_gc_p)
{
    fix_alloc_context_args args;
    args.for_gc_p = for_gc_p;
    args.heap = __this;

    GCToEEInterface::GcEnumAllocContexts(fix_alloc_context, &args);
    fix_youngest_allocation_area();
}

void gc_heap::fix_older_allocation_area (generation* older_gen)
{
    heap_segment* older_gen_seg = generation_allocation_segment (older_gen);
    if (generation_allocation_limit (older_gen) !=
        heap_segment_plan_allocated (older_gen_seg))
    {
        uint8_t*  point = generation_allocation_pointer (older_gen);

        size_t  size = (generation_allocation_limit (older_gen) - generation_allocation_pointer (older_gen));
        if (size != 0)
        {
            assert ((size >= Align (min_obj_size)));
            dprintf(3,("Making unused area [%zx, %zx[", (size_t)point, (size_t)point+size));
            make_unused_array (point, size);
            if (size >= min_free_list)
            {
                generation_allocator (older_gen)->thread_item_front (point, size);
                add_gen_free (older_gen->gen_num, size);
                generation_free_list_space (older_gen) += size;
            }
            else
            {
                generation_free_obj_space (older_gen) += size;
            }
        }
    }
    else
    {
        assert (older_gen_seg != ephemeral_heap_segment);
        heap_segment_plan_allocated (older_gen_seg) =
            generation_allocation_pointer (older_gen);
        generation_allocation_limit (older_gen) =
            generation_allocation_pointer (older_gen);
    }

    generation_allocation_pointer (older_gen) = 0;
    generation_allocation_limit (older_gen) = 0;
}

#ifdef MULTIPLE_HEAPS
// make sure this allocation context does not point to idle heaps
void gc_heap::fix_allocation_context_heaps (gc_alloc_context* gc_context, void*)
{
    alloc_context* acontext = (alloc_context*)gc_context;
    GCHeap* pHomeHeap = acontext->get_home_heap ();
    int home_hp_num = pHomeHeap ? pHomeHeap->pGenGCHeap->heap_number : 0;
    if (home_hp_num >= gc_heap::n_heaps)
    {
        home_hp_num %= gc_heap::n_heaps;
        acontext->set_home_heap (GCHeap::GetHeap (home_hp_num));
    }
    GCHeap* pAllocHeap = acontext->get_alloc_heap ();
    int alloc_hp_num = pAllocHeap ? pAllocHeap->pGenGCHeap->heap_number : 0;
    if (alloc_hp_num >= gc_heap::n_heaps)
    {
        alloc_hp_num %= gc_heap::n_heaps;
        acontext->set_alloc_heap (GCHeap::GetHeap (alloc_hp_num));
        gc_heap* hp = acontext->get_alloc_heap ()->pGenGCHeap;
        hp->alloc_context_count++;
    }
}

// make sure no allocation contexts point to idle heaps
void gc_heap::fix_allocation_contexts_heaps()
{
    GCToEEInterface::GcEnumAllocContexts (fix_allocation_context_heaps, nullptr);
}
#endif //MULTIPLE_HEAPS

void gc_heap::set_allocation_heap_segment (generation* gen)
{
#ifdef USE_REGIONS
    heap_segment* seg = heap_segment_rw (generation_start_segment (gen));
    dprintf (REGIONS_LOG, ("set gen%d alloc seg to start seg %p", gen->gen_num, heap_segment_mem (seg)));
#else
    uint8_t* p = generation_allocation_start (gen);
    assert (p);
    heap_segment* seg = generation_allocation_segment (gen);
    if (in_range_for_segment (p, seg))
        return;

    // try ephemeral heap segment in case of heap expansion
    seg = ephemeral_heap_segment;
    if (!in_range_for_segment (p, seg))
    {
        seg = heap_segment_rw (generation_start_segment (gen));

        _ASSERTE(seg != NULL);

        while (!in_range_for_segment (p, seg))
        {
            seg = heap_segment_next_rw (seg);
            _ASSERTE(seg != NULL);
        }
    }
#endif //USE_REGIONS

    generation_allocation_segment (gen) = seg;
}

void gc_heap::reset_allocation_pointers (generation* gen, uint8_t* start)
{
    assert (start);
    assert (Align ((size_t)start) == (size_t)start);
#ifndef USE_REGIONS
    generation_allocation_start (gen) = start;
#endif //!USE_REGIONS
    generation_allocation_pointer (gen) =  0;//start + Align (min_obj_size);
    generation_allocation_limit (gen) = 0;//generation_allocation_pointer (gen);
    set_allocation_heap_segment (gen);
}

bool gc_heap::new_allocation_allowed (int gen_number)
{
    if (dd_new_allocation (dynamic_data_of (gen_number)) < 0)
    {
        return FALSE;
    }
#ifndef MULTIPLE_HEAPS
    else if ((settings.pause_mode != pause_no_gc) && (gen_number == 0))
    {
        dynamic_data* dd0 = dynamic_data_of (0);
        dprintf (3, ("evaluating, running amount %zd - new %zd = %zd",
            allocation_running_amount, dd_new_allocation (dd0),
            (allocation_running_amount - dd_new_allocation (dd0))));
        if ((allocation_running_amount - dd_new_allocation (dd0)) >
            dd_min_size (dd0))
        {
            uint64_t ctime = GCToOSInterface::GetLowPrecisionTimeStamp();
            if ((ctime - allocation_running_time) > 1000)
            {
                dprintf (2, (">1s since last gen0 gc"));
                return FALSE;
            }
            else
            {
                allocation_running_amount = dd_new_allocation (dd0);
            }
        }
    }
#endif //MULTIPLE_HEAPS
    return TRUE;
}

inline
ptrdiff_t gc_heap::get_desired_allocation (int gen_number)
{
    return dd_desired_allocation (dynamic_data_of (gen_number));
}

inline
ptrdiff_t  gc_heap::get_new_allocation (int gen_number)
{
    return dd_new_allocation (dynamic_data_of (gen_number));
}

//return the amount allocated so far in gen_number
inline
ptrdiff_t  gc_heap::get_allocation (int gen_number)
{
    dynamic_data* dd = dynamic_data_of (gen_number);

    return dd_desired_allocation (dd) - dd_new_allocation (dd);
}

inline
BOOL grow_mark_stack (mark*& m, size_t& len, size_t init_len)
{
    size_t new_size = max (init_len, 2*len);
    mark* tmp = new (nothrow) mark [new_size];
    if (tmp)
    {
        memcpy (tmp, m, len * sizeof (mark));
        delete[] m;
        m = tmp;
        len = new_size;
        return TRUE;
    }
    else
    {
        dprintf (1, ("Failed to allocate %zd bytes for mark stack", (len * sizeof (mark))));
        return FALSE;
    }
}

inline
uint8_t* pinned_plug (mark* m)
{
   return m->first;
}

inline
size_t& pinned_len (mark* m)
{
    return m->len;
}

inline
void set_new_pin_info (mark* m, uint8_t* pin_free_space_start)
{
    m->len = pinned_plug (m) - pin_free_space_start;
#ifdef SHORT_PLUGS
    m->allocation_context_start_region = pin_free_space_start;
#endif //SHORT_PLUGS
}

#ifdef SHORT_PLUGS
inline
uint8_t*& pin_allocation_context_start_region (mark* m)
{
    return m->allocation_context_start_region;
}

uint8_t* get_plug_start_in_saved (uint8_t* old_loc, mark* pinned_plug_entry)
{
    uint8_t* saved_pre_plug_info = (uint8_t*)(pinned_plug_entry->get_pre_plug_reloc_info());
    uint8_t* plug_start_in_saved = saved_pre_plug_info + (old_loc - (pinned_plug (pinned_plug_entry) - sizeof (plug_and_gap)));
    //dprintf (2, ("detected a very short plug: %zx before PP %zx, pad %zx",
    //    old_loc, pinned_plug (pinned_plug_entry), plug_start_in_saved));
    dprintf (2, ("EP: %p(%p), %p", old_loc, pinned_plug (pinned_plug_entry), plug_start_in_saved));
    return plug_start_in_saved;
}

inline
void set_padding_in_expand (uint8_t* old_loc,
                            BOOL set_padding_on_saved_p,
                            mark* pinned_plug_entry)
{
    if (set_padding_on_saved_p)
    {
        set_plug_padded (get_plug_start_in_saved (old_loc, pinned_plug_entry));
    }
    else
    {
        set_plug_padded (old_loc);
    }
}

inline
void clear_padding_in_expand (uint8_t* old_loc,
                              BOOL set_padding_on_saved_p,
                              mark* pinned_plug_entry)
{
    if (set_padding_on_saved_p)
    {
        clear_plug_padded (get_plug_start_in_saved (old_loc, pinned_plug_entry));
    }
    else
    {
        clear_plug_padded (old_loc);
    }
}
#endif //SHORT_PLUGS

void gc_heap::reset_pinned_queue()
{
    mark_stack_tos = 0;
    mark_stack_bos = 0;
}

void gc_heap::reset_pinned_queue_bos()
{
    mark_stack_bos = 0;
}

// last_pinned_plug is only for asserting purpose.
void gc_heap::merge_with_last_pinned_plug (uint8_t* last_pinned_plug, size_t plug_size)
{
    if (last_pinned_plug)
    {
        mark& last_m = mark_stack_array[mark_stack_tos - 1];
        assert (last_pinned_plug == last_m.first);
        if (last_m.saved_post_p)
        {
            last_m.saved_post_p = FALSE;
            dprintf (3, ("setting last plug %p post to false", last_m.first));
            // We need to recover what the gap has overwritten.
            memcpy ((last_m.first + last_m.len - sizeof (plug_and_gap)), &(last_m.saved_post_plug), sizeof (gap_reloc_pair));
        }
        last_m.len += plug_size;
        dprintf (3, ("recovered the last part of plug %p, setting its plug size to %zx", last_m.first, last_m.len));
    }
}

void gc_heap::set_allocator_next_pin (generation* gen)
{
    dprintf (3, ("SANP: gen%d, ptr; %p, limit: %p", gen->gen_num, generation_allocation_pointer (gen), generation_allocation_limit (gen)));
    if (!(pinned_plug_que_empty_p()))
    {
        mark*  oldest_entry = oldest_pin();
        uint8_t* plug = pinned_plug (oldest_entry);
        if ((plug >= generation_allocation_pointer (gen)) &&
            (plug <  generation_allocation_limit (gen)))
        {
#ifdef USE_REGIONS
            assert (region_of (generation_allocation_pointer (gen)) ==
                    region_of (generation_allocation_limit (gen) - 1));
#endif //USE_REGIONS
            generation_allocation_limit (gen) = pinned_plug (oldest_entry);
            dprintf (3, ("SANP: get next pin free space in gen%d for alloc: %p->%p(%zd)",
                gen->gen_num,
                generation_allocation_pointer (gen), generation_allocation_limit (gen),
                (generation_allocation_limit (gen) - generation_allocation_pointer (gen))));
        }
        else
            assert (!((plug < generation_allocation_pointer (gen)) &&
                      (plug >= heap_segment_mem (generation_allocation_segment (gen)))));
    }
}

// After we set the info, we increase tos.
void gc_heap::set_pinned_info (uint8_t* last_pinned_plug, size_t plug_len, generation* gen)
{
#ifndef _DEBUG
    UNREFERENCED_PARAMETER(last_pinned_plug);
#endif //_DEBUG

    mark& m = mark_stack_array[mark_stack_tos];
    assert (m.first == last_pinned_plug);

    m.len = plug_len;
    mark_stack_tos++;
    assert (gen != 0);
    // Why are we checking here? gen is never 0.
    if (gen != 0)
    {
        set_allocator_next_pin (gen);
    }
}

size_t gc_heap::deque_pinned_plug ()
{
    size_t m = mark_stack_bos;
    dprintf (3, ("deque: %zd->%p", mark_stack_bos, pinned_plug (pinned_plug_of (m))));
    mark_stack_bos++;
    return m;
}

inline
mark* gc_heap::pinned_plug_of (size_t bos)
{
    return &mark_stack_array [ bos ];
}

inline
mark* gc_heap::oldest_pin ()
{
    return pinned_plug_of (mark_stack_bos);
}

inline
BOOL gc_heap::pinned_plug_que_empty_p ()
{
    return (mark_stack_bos == mark_stack_tos);
}

inline
mark* gc_heap::before_oldest_pin()
{
    if (mark_stack_bos >= 1)
        return pinned_plug_of (mark_stack_bos-1);
    else
        return 0;
}

inline
BOOL gc_heap::ephemeral_pointer_p (uint8_t* o)
{
#ifdef USE_REGIONS
    int gen_num = object_gennum ((uint8_t*)o);
    assert (gen_num >= 0);
    return (gen_num < max_generation);
#else
    return ((o >= ephemeral_low) && (o < ephemeral_high));
#endif //USE_REGIONS
}

// This needs to check the range that's covered by bookkeeping because find_object will
// need to look at the brick table.
inline
bool gc_heap::is_in_find_object_range (uint8_t* o)
{
    if (o == nullptr)
    {
        return false;
    }
#if defined(USE_REGIONS) && defined(FEATURE_CONSERVATIVE_GC)
    return ((o >= g_gc_lowest_address) && (o < bookkeeping_covered_committed));
#else //USE_REGIONS && FEATURE_CONSERVATIVE_GC
    if ((o >= g_gc_lowest_address) && (o < g_gc_highest_address))
    {
#ifdef USE_REGIONS
        assert ((o >= g_gc_lowest_address) && (o < bookkeeping_covered_committed));
#endif //USE_REGIONS
        return true;
    }
    else
    {
        return false;
    }
#endif //USE_REGIONS && FEATURE_CONSERVATIVE_GC
}

#ifdef USE_REGIONS
// This assumes o is guaranteed to be in a region.
inline
bool gc_heap::is_in_condemned_gc (uint8_t* o)
{
    assert ((o >= g_gc_lowest_address) && (o < g_gc_highest_address));

    int condemned_gen = settings.condemned_generation;
    if (condemned_gen < max_generation)
    {
        int gen = get_region_gen_num (o);
        if (gen > condemned_gen)
        {
            return false;
        }
    }

    return true;
}

inline
bool gc_heap::should_check_brick_for_reloc (uint8_t* o)
{
    assert ((o >= g_gc_lowest_address) && (o < g_gc_highest_address));

    size_t skewed_basic_region_index = get_skewed_basic_region_index_for_address (o);

    // return true if the region is not SIP and the generation is <= condemned generation
    return (map_region_to_generation_skewed[skewed_basic_region_index] & (RI_SIP|RI_GEN_MASK)) <= settings.condemned_generation;
}
#endif //USE_REGIONS

#ifdef MH_SC_MARK
inline
int& gc_heap::mark_stack_busy()
{
    return  g_mark_stack_busy [(heap_number+2)*HS_CACHE_LINE_SIZE/sizeof(int)];
}
#endif //MH_SC_MARK

void gc_heap::make_mark_stack (mark* arr)
{
    reset_pinned_queue();
    mark_stack_array = arr;
    mark_stack_array_length = MARK_STACK_INITIAL_LENGTH;
#ifdef MH_SC_MARK
    mark_stack_busy() = 0;
#endif //MH_SC_MARK
}

#ifdef BACKGROUND_GC
inline
size_t& gc_heap::bpromoted_bytes(int thread)
{
#ifdef MULTIPLE_HEAPS
    return g_bpromoted [thread*16];
#else //MULTIPLE_HEAPS
    UNREFERENCED_PARAMETER(thread);
    return g_bpromoted;
#endif //MULTIPLE_HEAPS
}

void gc_heap::make_background_mark_stack (uint8_t** arr)
{
    background_mark_stack_array = arr;
    background_mark_stack_array_length = MARK_STACK_INITIAL_LENGTH;
    background_mark_stack_tos = arr;
}

void gc_heap::make_c_mark_list (uint8_t** arr)
{
    c_mark_list = arr;
    c_mark_list_index = 0;
    c_mark_list_length = 1 + (OS_PAGE_SIZE / MIN_OBJECT_SIZE);
}
#endif //BACKGROUND_GC

#ifdef CARD_BUNDLE
// The card bundle keeps track of groups of card words.
static const size_t card_bundle_word_width = 32;

// How do we express the fact that 32 bits (card_word_width) is one uint32_t?
static const size_t card_bundle_size = (size_t)(GC_PAGE_SIZE / (sizeof(uint32_t)*card_bundle_word_width));

inline
size_t card_bundle_word (size_t cardb)
{
    return cardb / card_bundle_word_width;
}

inline
uint32_t card_bundle_bit (size_t cardb)
{
    return (uint32_t)(cardb % card_bundle_word_width);
}

size_t align_cardw_on_bundle (size_t cardw)
{
    return ((size_t)(cardw + card_bundle_size - 1) & ~(card_bundle_size - 1 ));
}

// Get the card bundle representing a card word
size_t cardw_card_bundle (size_t cardw)
{
    return cardw / card_bundle_size;
}

// Get the first card word in a card bundle
size_t card_bundle_cardw (size_t cardb)
{
    return cardb * card_bundle_size;
}

// Clear the specified card bundle
void gc_heap::card_bundle_clear (size_t cardb)
{
    uint32_t bit = (uint32_t)(1 << card_bundle_bit (cardb));
    uint32_t* bundle = &card_bundle_table[card_bundle_word (cardb)];
#ifdef MULTIPLE_HEAPS
    // card bundles may straddle segments and heaps, thus bits may be cleared concurrently
    if ((*bundle & bit) != 0)
    {
        Interlocked::And (bundle, ~bit);
    }
#else
    *bundle &= ~bit;
#endif

    // check for races
    assert ((*bundle & bit) == 0);

    dprintf (2, ("Cleared card bundle %zx [%zx, %zx[", cardb, (size_t)card_bundle_cardw (cardb),
              (size_t)card_bundle_cardw (cardb+1)));
}

inline void set_bundle_bits (uint32_t* bundle, uint32_t bits)
{
#ifdef MULTIPLE_HEAPS
    // card bundles may straddle segments and heaps, thus bits may be set concurrently
    if ((*bundle & bits) != bits)
    {
        Interlocked::Or (bundle, bits);
    }
#else
    *bundle |= bits;
#endif

    // check for races
    assert ((*bundle & bits) == bits);
}

void gc_heap::card_bundle_set (size_t cardb)
{
    uint32_t bits = (1 << card_bundle_bit (cardb));
    set_bundle_bits (&card_bundle_table [card_bundle_word (cardb)], bits);
}

// Set the card bundle bits between start_cardb and end_cardb
void gc_heap::card_bundles_set (size_t start_cardb, size_t end_cardb)
{
    if (start_cardb == end_cardb)
    {
        card_bundle_set(start_cardb);
        return;
    }

    size_t start_word = card_bundle_word (start_cardb);
    size_t end_word = card_bundle_word (end_cardb);

    if (start_word < end_word)
    {
        // Set the partial words
        uint32_t bits = highbits (~0u, card_bundle_bit (start_cardb));
        set_bundle_bits (&card_bundle_table [start_word], bits);

        if (card_bundle_bit (end_cardb))
        {
            bits = lowbits (~0u, card_bundle_bit (end_cardb));
            set_bundle_bits (&card_bundle_table [end_word], bits);
        }

        // Set the full words
        for (size_t i = start_word + 1; i < end_word; i++)
        {
            card_bundle_table [i] = ~0u;
        }
    }
    else
    {
        uint32_t bits = (highbits (~0u, card_bundle_bit (start_cardb)) &
                          lowbits (~0u, card_bundle_bit (end_cardb)));
        set_bundle_bits (&card_bundle_table [start_word], bits);
    }
}

// Indicates whether the specified bundle is set.
BOOL gc_heap::card_bundle_set_p (size_t cardb)
{
    return (card_bundle_table[card_bundle_word(cardb)] & (1 << card_bundle_bit (cardb)));
}

// Returns the size (in bytes) of a card bundle representing the region from 'from' to 'end'
size_t size_card_bundle_of (uint8_t* from, uint8_t* end)
{
    // Number of heap bytes represented by a card bundle word
    size_t cbw_span = card_size * card_word_width * card_bundle_size * card_bundle_word_width;

    // Align the start of the region down
    from = (uint8_t*)((size_t)from & ~(cbw_span - 1));

    // Align the end of the region up
    end = (uint8_t*)((size_t)(end + (cbw_span - 1)) & ~(cbw_span - 1));

    // Make sure they're really aligned
    assert (((size_t)from & (cbw_span - 1)) == 0);
    assert (((size_t)end  & (cbw_span - 1)) == 0);

    return ((end - from) / cbw_span) * sizeof (uint32_t);
}

// Takes a pointer to a card bundle table and an address, and returns a pointer that represents
// where a theoretical card bundle table that represents every address (starting from 0) would
// start if the bundle word representing the address were to be located at the pointer passed in.
// The returned 'translated' pointer makes it convenient/fast to calculate where the card bundle
// for a given address is using a simple shift operation on the address.
uint32_t* translate_card_bundle_table (uint32_t* cb, uint8_t* lowest_address)
{
    // The number of bytes of heap memory represented by a card bundle word
    const size_t heap_bytes_for_bundle_word = card_size * card_word_width * card_bundle_size * card_bundle_word_width;

    // Each card bundle word is 32 bits
    return (uint32_t*)((uint8_t*)cb - (((size_t)lowest_address / heap_bytes_for_bundle_word) * sizeof (uint32_t)));
}

void gc_heap::enable_card_bundles ()
{
    if (can_use_write_watch_for_card_table() && (!card_bundles_enabled()))
    {
        dprintf (1, ("Enabling card bundles"));

        // We initially set all of the card bundles
        card_bundles_set (cardw_card_bundle (card_word (card_of (lowest_address))),
                          cardw_card_bundle (align_cardw_on_bundle (card_word (card_of (highest_address)))));
        settings.card_bundles = TRUE;
    }
}

BOOL gc_heap::card_bundles_enabled ()
{
    return settings.card_bundles;
}
#endif // CARD_BUNDLE

#if defined (HOST_64BIT)
#define brick_size ((size_t)4096)
#else
#define brick_size ((size_t)2048)
#endif //HOST_64BIT

inline
size_t gc_heap::brick_of (uint8_t* add)
{
    return (size_t)(add - lowest_address) / brick_size;
}

inline
uint8_t* gc_heap::brick_address (size_t brick)
{
    return lowest_address + (brick_size * brick);
}


void gc_heap::clear_brick_table (uint8_t* from, uint8_t* end)
{
    size_t from_brick = brick_of (from);
    size_t end_brick = brick_of (end);
    memset (&brick_table[from_brick], 0, sizeof(brick_table[from_brick])*(end_brick-from_brick));
}

//codes for the brick entries:
//entry == 0 -> not assigned
//entry >0 offset is entry-1
//entry <0 jump back entry bricks


inline
void gc_heap::set_brick (size_t index, ptrdiff_t val)
{
    if (val < -32767)
    {
        val = -32767;
    }
    assert (val < 32767);
    if (val >= 0)
        brick_table [index] = (short)val+1;
    else
        brick_table [index] = (short)val;

    dprintf (3, ("set brick[%zx] to %d\n", index, (short)val));
}

inline
int gc_heap::get_brick_entry (size_t index)
{
#ifdef MULTIPLE_HEAPS
    return VolatileLoadWithoutBarrier(&brick_table [index]);
#else
    return brick_table[index];
#endif
}


inline
uint8_t* align_on_brick (uint8_t* add)
{
    return (uint8_t*)((size_t)(add + brick_size - 1) & ~(brick_size - 1));
}

inline
uint8_t* align_lower_brick (uint8_t* add)
{
    return (uint8_t*)(((size_t)add) & ~(brick_size - 1));
}

size_t size_brick_of (uint8_t* from, uint8_t* end)
{
    assert (((size_t)from & (brick_size-1)) == 0);
    assert (((size_t)end  & (brick_size-1)) == 0);

    return ((end - from) / brick_size) * sizeof (short);
}

inline
uint8_t* gc_heap::card_address (size_t card)
{
    return  (uint8_t*) (card_size * card);
}

inline
size_t gc_heap::card_of ( uint8_t* object)
{
    return (size_t)(object) / card_size;
}

inline
uint8_t* align_on_card (uint8_t* add)
{
    return (uint8_t*)((size_t)(add + card_size - 1) & ~(card_size - 1 ));
}
inline
uint8_t* align_on_card_word (uint8_t* add)
{
    return (uint8_t*) ((size_t)(add + (card_size*card_word_width)-1) & ~(card_size*card_word_width - 1));
}

inline
uint8_t* align_lower_card (uint8_t* add)
{
    return (uint8_t*)((size_t)add & ~(card_size-1));
}

inline
void gc_heap::clear_card (size_t card)
{
    card_table [card_word (card)] =
        (card_table [card_word (card)] & ~(1 << card_bit (card)));
    dprintf (3,("Cleared card %zx [%zx, %zx[", card, (size_t)card_address (card),
              (size_t)card_address (card+1)));
}

inline
void gc_heap::set_card (size_t card)
{
    size_t word = card_word (card);
    card_table[word] = (card_table [word] | (1 << card_bit (card)));

#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
    // Also set the card bundle that corresponds to the card
    size_t bundle_to_set = cardw_card_bundle(word);

    card_bundle_set(bundle_to_set);

    dprintf (3,("Set card %zx [%zx, %zx[ and bundle %zx", card, (size_t)card_address (card), (size_t)card_address (card+1), bundle_to_set));
#endif
}

inline
BOOL  gc_heap::card_set_p (size_t card)
{
    return ( card_table [ card_word (card) ] & (1 << card_bit (card)));
}

// Returns the number of DWORDs in the card table that cover the
// range of addresses [from, end[.
size_t count_card_of (uint8_t* from, uint8_t* end)
{
    return card_word (gcard_of (end - 1)) - card_word (gcard_of (from)) + 1;
}

// Returns the number of bytes to allocate for a card table
// that covers the range of addresses [from, end[.
size_t size_card_of (uint8_t* from, uint8_t* end)
{
    return count_card_of (from, end) * sizeof(uint32_t);
}

// We don't store seg_mapping_table in card_table_info because there's only always one view.
class card_table_info
{
public:
    unsigned    recount;
    size_t      size;
    uint32_t*   next_card_table;

    uint8_t*    lowest_address;
    uint8_t*    highest_address;
    short*      brick_table;

#ifdef CARD_BUNDLE
    uint32_t*   card_bundle_table;
#endif //CARD_BUNDLE

    // mark_array is always at the end of the data structure because we
    // want to be able to make one commit call for everything before it.
#ifdef BACKGROUND_GC
    uint32_t*   mark_array;
#endif //BACKGROUND_GC
};

static_assert(offsetof(dac_card_table_info, size) == offsetof(card_table_info, size), "DAC card_table_info layout mismatch");
static_assert(offsetof(dac_card_table_info, next_card_table) == offsetof(card_table_info, next_card_table), "DAC card_table_info layout mismatch");

//These are accessors on untranslated cardtable
inline
unsigned& card_table_refcount (uint32_t* c_table)
{
    return *(unsigned*)((char*)c_table - sizeof (card_table_info));
}

inline
uint8_t*& card_table_lowest_address (uint32_t* c_table)
{
    return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->lowest_address;
}

uint32_t* translate_card_table (uint32_t* ct)
{
    return (uint32_t*)((uint8_t*)ct - card_word (gcard_of (card_table_lowest_address (ct))) * sizeof(uint32_t));
}

inline
uint8_t*& card_table_highest_address (uint32_t* c_table)
{
    return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->highest_address;
}

inline
short*& card_table_brick_table (uint32_t* c_table)
{
    return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->brick_table;
}

#ifdef CARD_BUNDLE
inline
uint32_t*& card_table_card_bundle_table (uint32_t* c_table)
{
    return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->card_bundle_table;
}
#endif //CARD_BUNDLE

#ifdef BACKGROUND_GC
inline
uint32_t*& card_table_mark_array (uint32_t* c_table)
{
    return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->mark_array;
}

#ifdef HOST_64BIT
#define mark_bit_pitch ((size_t)16)
#else
#define mark_bit_pitch ((size_t)8)
#endif // HOST_64BIT
#define mark_word_width ((size_t)32)
#define mark_word_size (mark_word_width * mark_bit_pitch)

inline
uint8_t* align_on_mark_bit (uint8_t* add)
{
    return (uint8_t*)((size_t)(add + (mark_bit_pitch - 1)) & ~(mark_bit_pitch - 1));
}

inline
uint8_t* align_lower_mark_bit (uint8_t* add)
{
    return (uint8_t*)((size_t)(add) & ~(mark_bit_pitch - 1));
}

inline
BOOL is_aligned_on_mark_word (uint8_t* add)
{
    return ((size_t)add == ((size_t)(add) & ~(mark_word_size - 1)));
}

inline
uint8_t* align_on_mark_word (uint8_t* add)
{
    return (uint8_t*)((size_t)(add + mark_word_size - 1) & ~(mark_word_size - 1));
}

inline
uint8_t* align_lower_mark_word (uint8_t* add)
{
    return (uint8_t*)((size_t)(add) & ~(mark_word_size - 1));
}

inline
size_t mark_bit_of (uint8_t* add)
{
    return ((size_t)add / mark_bit_pitch);
}

inline
unsigned int mark_bit_bit (size_t mark_bit)
{
    return (unsigned int)(mark_bit % mark_word_width);
}

inline
size_t mark_bit_word (size_t mark_bit)
{
    return (mark_bit / mark_word_width);
}

inline
size_t mark_word_of (uint8_t* add)
{
    return ((size_t)add) / mark_word_size;
}

uint8_t* mark_word_address (size_t wd)
{
    return (uint8_t*)(wd*mark_word_size);
}

uint8_t* mark_bit_address (size_t mark_bit)
{
    return (uint8_t*)(mark_bit*mark_bit_pitch);
}

inline
size_t mark_bit_bit_of (uint8_t* add)
{
    return  (((size_t)add / mark_bit_pitch) % mark_word_width);
}

inline
unsigned int gc_heap::mark_array_marked(uint8_t* add)
{
    return mark_array [mark_word_of (add)] & (1 << mark_bit_bit_of (add));
}

inline
BOOL gc_heap::is_mark_bit_set (uint8_t* add)
{
    return (mark_array [mark_word_of (add)] & (1 << mark_bit_bit_of (add)));
}

inline
void gc_heap::mark_array_set_marked (uint8_t* add)
{
    size_t index = mark_word_of (add);
    uint32_t val = (1 << mark_bit_bit_of (add));
#ifdef MULTIPLE_HEAPS
    Interlocked::Or (&(mark_array [index]), val);
#else
    mark_array [index] |= val;
#endif
}

inline
void gc_heap::mark_array_clear_marked (uint8_t* add)
{
    mark_array [mark_word_of (add)] &= ~(1 << mark_bit_bit_of (add));
}

size_t size_mark_array_of (uint8_t* from, uint8_t* end)
{
    assert (((size_t)from & ((mark_word_size)-1)) == 0);
    assert (((size_t)end  & ((mark_word_size)-1)) == 0);
    return sizeof (uint32_t)*(((end - from) / mark_word_size));
}

//In order to eliminate the lowest_address in the mark array
//computations (mark_word_of, etc) mark_array is offset
// according to the lowest_address.
uint32_t* translate_mark_array (uint32_t* ma)
{
    return (uint32_t*)((uint8_t*)ma - size_mark_array_of (0, g_gc_lowest_address));
}

#ifdef FEATURE_BASICFREEZE
// end must be page aligned addresses.
void gc_heap::clear_mark_array (uint8_t* from, uint8_t* end)
{
    assert (gc_can_use_concurrent);
    assert (end == align_on_mark_word (end));

    uint8_t* current_lowest_address = background_saved_lowest_address;
    uint8_t* current_highest_address = background_saved_highest_address;

    //there is a possibility of the addresses to be
    //outside of the covered range because of a newly allocated
    //large object segment
    if ((end <= current_highest_address) && (from >= current_lowest_address))
    {
        size_t beg_word = mark_word_of (align_on_mark_word (from));
        //align end word to make sure to cover the address
        size_t end_word = mark_word_of (align_on_mark_word (end));
        dprintf (3, ("Calling clearing mark array [%zx, %zx[ for addresses [%zx, %zx[",
                     (size_t)mark_word_address (beg_word),
                     (size_t)mark_word_address (end_word),
                     (size_t)from, (size_t)end));

        uint8_t* op = from;
        while (op < mark_word_address (beg_word))
        {
            mark_array_clear_marked (op);
            op += mark_bit_pitch;
        }

        memset (&mark_array[beg_word], 0, (end_word - beg_word)*sizeof (uint32_t));

#ifdef _DEBUG
        //Beware, it is assumed that the mark array word straddling
        //start has been cleared before
        //verify that the array is empty.
        size_t  markw = mark_word_of (align_on_mark_word (from));
        size_t  markw_end = mark_word_of (align_on_mark_word (end));
        while (markw < markw_end)
        {
            assert (!(mark_array [markw]));
            markw++;
        }
        uint8_t* p = mark_word_address (markw_end);
        while (p < end)
        {
            assert (!(mark_array_marked (p)));
            p++;
        }
#endif //_DEBUG
    }
}
#endif // FEATURE_BASICFREEZE
#endif //BACKGROUND_GC

//These work on untranslated card tables
inline
uint32_t*& card_table_next (uint32_t* c_table)
{
    // NOTE:  The dac takes a dependency on card_table_info being right before c_table.
    //        It's 100% ok to change this implementation detail as long as a matching change
    //        is made to DacGCBookkeepingEnumerator::Init in daccess.cpp.
    return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->next_card_table;
}

inline
size_t& card_table_size (uint32_t* c_table)
{
    return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->size;
}

void own_card_table (uint32_t* c_table)
{
    card_table_refcount (c_table) += 1;
}

void destroy_card_table (uint32_t* c_table);

void delete_next_card_table (uint32_t* c_table)
{
    uint32_t* n_table = card_table_next (c_table);
    if (n_table)
    {
        if (card_table_next (n_table))
        {
            delete_next_card_table (n_table);
        }
        if (card_table_refcount (n_table) == 0)
        {
            destroy_card_table (n_table);
            card_table_next (c_table) = 0;
        }
    }
}

void release_card_table (uint32_t* c_table)
{
    assert (card_table_refcount (c_table) >0);
    card_table_refcount (c_table) -= 1;
    if (card_table_refcount (c_table) == 0)
    {
        delete_next_card_table (c_table);
        if (card_table_next (c_table) == 0)
        {
            destroy_card_table (c_table);
            // sever the link from the parent
            if (&g_gc_card_table[card_word (gcard_of(g_gc_lowest_address))] == c_table)
            {
                g_gc_card_table = 0;

#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
                g_gc_card_bundle_table = 0;
#endif
#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
                SoftwareWriteWatch::StaticClose();
#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
            }
            else
            {
                uint32_t* p_table = &g_gc_card_table[card_word (gcard_of(g_gc_lowest_address))];
                if (p_table)
                {
                    while (p_table && (card_table_next (p_table) != c_table))
                        p_table = card_table_next (p_table);
                    card_table_next (p_table) = 0;
                }
            }
        }
    }
}

void destroy_card_table (uint32_t* c_table)
{
//  delete (uint32_t*)&card_table_refcount(c_table);

    size_t size = card_table_size(c_table);
    gc_heap::destroy_card_table_helper (c_table);
    GCToOSInterface::VirtualRelease (&card_table_refcount(c_table), size);
    dprintf (2, ("Table Virtual Free : %zx", (size_t)&card_table_refcount(c_table)));
}

void gc_heap::destroy_card_table_helper (uint32_t* c_table)
{
    uint8_t* lowest = card_table_lowest_address (c_table);
    uint8_t* highest = card_table_highest_address (c_table);
    get_card_table_element_layout(lowest, highest, card_table_element_layout);
    size_t result = card_table_element_layout[seg_mapping_table_element + 1];
    gc_heap::reduce_committed_bytes (&card_table_refcount(c_table), result, recorded_committed_bookkeeping_bucket, -1, true);

    // If we don't put the mark array committed in the ignored bucket, then this is where to account for the decommit of it
}

void gc_heap::get_card_table_element_sizes (uint8_t* start, uint8_t* end, size_t sizes[total_bookkeeping_elements])
{
    memset (sizes, 0, sizeof(size_t) * total_bookkeeping_elements);
    sizes[card_table_element] = size_card_of (start, end);
    sizes[brick_table_element] = size_brick_of (start, end);
#ifdef CARD_BUNDLE
    if (can_use_write_watch_for_card_table())
    {
        sizes[card_bundle_table_element] = size_card_bundle_of (start, end);
    }
#endif //CARD_BUNDLE
#if defined(FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP) && defined (BACKGROUND_GC)
    if (gc_can_use_concurrent)
    {
        sizes[software_write_watch_table_element] = SoftwareWriteWatch::GetTableByteSize(start, end);
    }
#endif //FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP && BACKGROUND_GC
#ifdef USE_REGIONS
    sizes[region_to_generation_table_element] = size_region_to_generation_table_of (start, end);
#endif //USE_REGIONS
    sizes[seg_mapping_table_element] = size_seg_mapping_table_of (start, end);
#ifdef BACKGROUND_GC
    if (gc_can_use_concurrent)
    {
        sizes[mark_array_element] = size_mark_array_of (start, end);
    }
#endif //BACKGROUND_GC
}

void gc_heap::get_card_table_element_layout (uint8_t* start, uint8_t* end, size_t layout[total_bookkeeping_elements + 1])
{
    size_t sizes[total_bookkeeping_elements];
    get_card_table_element_sizes(start, end, sizes);

    const size_t alignment[total_bookkeeping_elements + 1] =
    {
        sizeof (uint32_t), // card_table_element
        sizeof (short),    // brick_table_element
#ifdef CARD_BUNDLE
        sizeof (uint32_t), // card_bundle_table_element
#endif //CARD_BUNDLE
#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
        sizeof(size_t),    // software_write_watch_table_element
#endif //FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
#ifdef USE_REGIONS
        sizeof (uint8_t),  // region_to_generation_table_element
#endif //USE_REGIONS
        sizeof (uint8_t*), // seg_mapping_table_element
#ifdef BACKGROUND_GC
        // In order to avoid a dependency between commit_mark_array_by_range and this logic, it is easier to make sure
        // pages for mark array never overlaps with pages in the seg mapping table. That way commit_mark_array_by_range
        // will never commit a page that is already committed here for the seg mapping table.
        OS_PAGE_SIZE,      // mark_array_element
#endif //BACKGROUND_GC
        // commit_mark_array_by_range extends the end pointer of the commit to the next page boundary, we better make sure it
        // is reserved
        OS_PAGE_SIZE       // total_bookkeeping_elements
    };

    layout[card_table_element] = ALIGN_UP(sizeof(card_table_info), alignment[card_table_element]);
    for (int element = brick_table_element; element <= total_bookkeeping_elements; element++)
    {
        layout[element] = layout[element - 1] + sizes[element - 1];
        if ((element != total_bookkeeping_elements) && (sizes[element] != 0))
        {
            layout[element] = ALIGN_UP(layout[element], alignment[element]);
        }
    }
}

#ifdef USE_REGIONS
bool gc_heap::on_used_changed (uint8_t* new_used)
{
#if defined(WRITE_BARRIER_CHECK) && !defined (SERVER_GC)
    if (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_BARRIERCHECK)
    {
        size_t shadow_covered = g_GCShadowEnd - g_GCShadow;
        size_t used_heap_range = new_used - g_gc_lowest_address;
        if (used_heap_range > shadow_covered)
        {
            size_t extra = used_heap_range - shadow_covered;
            if (!GCToOSInterface::VirtualCommit (g_GCShadowEnd, extra))
            {
                _ASSERTE(!"Not enough memory to run HeapVerify level 2");
                // If after the assert we decide to allow the program to continue
                // running we need to be in a state that will not trigger any
                // additional AVs while we fail to allocate a shadow segment, i.e.
                // ensure calls to updateGCShadow() checkGCWriteBarrier() don't AV
                deleteGCShadow();
            }
            else
            {
                g_GCShadowEnd += extra;
            }
        }
    }
#endif //WRITE_BARRIER_CHECK && !SERVER_GC

    if (new_used > bookkeeping_covered_committed)
    {
        bool speculative_commit_tried = false;
#ifdef STRESS_REGIONS
        if (gc_rand::get_rand(10) > 3)
        {
            dprintf (REGIONS_LOG, ("skipping speculative commit under stress regions"));
            speculative_commit_tried = true;
        }
#endif
        while (true)
        {
            uint8_t* new_bookkeeping_covered_committed = nullptr;
            if (speculative_commit_tried)
            {
                new_bookkeeping_covered_committed = new_used;
            }
            else
            {
                uint64_t committed_size = (uint64_t)(bookkeeping_covered_committed - g_gc_lowest_address);
                uint64_t total_size = (uint64_t)(g_gc_highest_address - g_gc_lowest_address);
                assert (committed_size <= total_size);
                assert (committed_size < (UINT64_MAX / 2));
                uint64_t new_committed_size = min(committed_size * 2, total_size);
                assert ((UINT64_MAX - new_committed_size) > (uint64_t)g_gc_lowest_address);
                uint8_t* double_commit = g_gc_lowest_address + new_committed_size;
                new_bookkeeping_covered_committed = max(double_commit, new_used);
                dprintf (REGIONS_LOG, ("committed_size                           = %zd", committed_size));
                dprintf (REGIONS_LOG, ("total_size                               = %zd", total_size));
                dprintf (REGIONS_LOG, ("new_committed_size                       = %zd", new_committed_size));
                dprintf (REGIONS_LOG, ("double_commit                            = %p", double_commit));
            }
            dprintf (REGIONS_LOG, ("bookkeeping_covered_committed     = %p", bookkeeping_covered_committed));
            dprintf (REGIONS_LOG, ("new_bookkeeping_covered_committed = %p", new_bookkeeping_covered_committed));

            if (inplace_commit_card_table (bookkeeping_covered_committed, new_bookkeeping_covered_committed))
            {
                bookkeeping_covered_committed = new_bookkeeping_covered_committed;
                break;
            }
            else
            {
                if (new_bookkeeping_covered_committed == new_used)
                {
                    dprintf (REGIONS_LOG, ("The minimal commit for the GC bookkeeping data structure failed, giving up"));
                    return false;
                }
                dprintf (REGIONS_LOG, ("The speculative commit for the GC bookkeeping data structure failed, retry for minimal commit"));
                speculative_commit_tried = true;
            }
        }
    }
    return true;
}

bool gc_heap::get_card_table_commit_layout (uint8_t* from, uint8_t* to,
                    uint8_t* commit_begins[total_bookkeeping_elements],
                    size_t commit_sizes[total_bookkeeping_elements],
                    size_t new_sizes[total_bookkeeping_elements])
{
    uint8_t* start = g_gc_lowest_address;
    uint8_t* end = g_gc_highest_address;

    bool initial_commit = (from == start);
    bool additional_commit = !initial_commit && (to > from);

    if (!initial_commit && !additional_commit)
    {
        return false;
    }
#ifdef _DEBUG
    size_t offsets[total_bookkeeping_elements + 1];
    get_card_table_element_layout(start, end, offsets);

    dprintf (REGIONS_LOG, ("layout"));
    for (int i = card_table_element; i <= total_bookkeeping_elements; i++)
    {
        assert (offsets[i] == card_table_element_layout[i]);
        dprintf (REGIONS_LOG, ("%zd", card_table_element_layout[i]));
    }
#endif //_DEBUG
    get_card_table_element_sizes (start, to, new_sizes);
#ifdef _DEBUG
    dprintf (REGIONS_LOG, ("new_sizes"));
    for (int i = card_table_element; i < total_bookkeeping_elements; i++)
    {
        dprintf (REGIONS_LOG, ("%zd", new_sizes[i]));
    }
    if (additional_commit)
    {
        size_t current_sizes[total_bookkeeping_elements];
        get_card_table_element_sizes (start, from, current_sizes);
        dprintf (REGIONS_LOG, ("old_sizes"));
        for (int i = card_table_element; i < total_bookkeeping_elements; i++)
        {
            assert (current_sizes[i] == bookkeeping_sizes[i]);
            dprintf (REGIONS_LOG, ("%zd", bookkeeping_sizes[i]));
        }
    }
#endif //_DEBUG
    for (int i = card_table_element; i <= seg_mapping_table_element; i++)
    {
        uint8_t* required_begin = nullptr;
        uint8_t* required_end = nullptr;
        uint8_t* commit_begin = nullptr;
        uint8_t* commit_end = nullptr;
        if (initial_commit)
        {
            required_begin = bookkeeping_start + ((i == card_table_element) ? 0 : card_table_element_layout[i]);
            required_end = bookkeeping_start + card_table_element_layout[i] + new_sizes[i];
            commit_begin = align_lower_page(required_begin);
        }
        else
        {
            assert (additional_commit);
            required_begin = bookkeeping_start + card_table_element_layout[i] + bookkeeping_sizes[i];
            required_end = required_begin + new_sizes[i] - bookkeeping_sizes[i];
            commit_begin = align_on_page(required_begin);
        }
        assert (required_begin <= required_end);
        commit_end = align_on_page(required_end);

        commit_end = min (commit_end, align_lower_page(bookkeeping_start + card_table_element_layout[i + 1]));
        commit_begin = min (commit_begin, commit_end);
        assert (commit_begin <= commit_end);

        dprintf (REGIONS_LOG, ("required = [%p, %p), size = %zd", required_begin, required_end, required_end - required_begin));
        dprintf (REGIONS_LOG, ("commit   = [%p, %p), size = %zd", commit_begin, commit_end, commit_end - commit_begin));

        commit_begins[i] = commit_begin;
        commit_sizes[i] = (size_t)(commit_end - commit_begin);
    }
    dprintf (REGIONS_LOG, ("---------------------------------------"));
    return true;
}

bool gc_heap::inplace_commit_card_table (uint8_t* from, uint8_t* to)
{
    dprintf (REGIONS_LOG, ("inplace_commit_card_table(%p, %p), size = %zd", from, to, to - from));

    uint8_t* start = g_gc_lowest_address;
    uint8_t* end = g_gc_highest_address;

    uint8_t* commit_begins[total_bookkeeping_elements];
    size_t commit_sizes[total_bookkeeping_elements];
    size_t new_sizes[total_bookkeeping_elements];

    if (!get_card_table_commit_layout(from, to, commit_begins, commit_sizes, new_sizes))
    {
        return true;
    }
    int failed_commit = -1;
    for (int i = card_table_element; i <= seg_mapping_table_element; i++)
    {
        bool succeed;
        if (commit_sizes[i] > 0)
        {
            succeed = virtual_commit (commit_begins[i], commit_sizes[i], recorded_committed_bookkeeping_bucket);
            if (!succeed)
            {
                log_init_error_to_host ("Committing %zd bytes (%.3f mb) for GC bookkeeping element#%d failed", commit_sizes[i], mb (commit_sizes[i]), i);
                failed_commit = i;
                break;
            }
        }
    }
    if (failed_commit == -1)
    {
        for (int i = card_table_element; i < total_bookkeeping_elements; i++)
        {
            bookkeeping_sizes[i] = new_sizes[i];
        }
    }
    else
    {
        for (int i = card_table_element; i < failed_commit; i++)
        {
            bool succeed;
            if (commit_sizes[i] > 0)
            {
                succeed = virtual_decommit (commit_begins[i], commit_sizes[i], recorded_committed_bookkeeping_bucket);
                assert (succeed);
            }
        }
        return false;
    }
    return true;
}
#endif //USE_REGIONS

uint32_t* gc_heap::make_card_table (uint8_t* start, uint8_t* end)
{
    assert (g_gc_lowest_address == start);
    assert (g_gc_highest_address == end);

    uint32_t virtual_reserve_flags = VirtualReserveFlags::None;
#ifdef CARD_BUNDLE
    if (can_use_write_watch_for_card_table())
    {
#ifndef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
        // If we're not manually managing the card bundles, we will need to use OS write
        // watch APIs over this region to track changes.
        virtual_reserve_flags |= VirtualReserveFlags::WriteWatch;
#endif
    }
#endif //CARD_BUNDLE

    get_card_table_element_layout(start, end, card_table_element_layout);

    size_t alloc_size = card_table_element_layout[total_bookkeeping_elements];
    uint8_t* mem = (uint8_t*)GCToOSInterface::VirtualReserve (alloc_size, 0, virtual_reserve_flags);
    bookkeeping_start = mem;

    if (!mem)
    {
        log_init_error_to_host ("Reserving %zd bytes (%.3f mb) for GC bookkeeping failed", alloc_size, mb (alloc_size));
        return 0;
    }

    dprintf (2, ("Init - Card table alloc for %zd bytes: [%zx, %zx[",
                 alloc_size, (size_t)mem, (size_t)(mem+alloc_size)));

#ifdef USE_REGIONS
    if (!inplace_commit_card_table (g_gc_lowest_address, global_region_allocator.get_left_used_unsafe()))
    {
        dprintf (1, ("Card table commit failed"));
        GCToOSInterface::VirtualRelease (mem, alloc_size);
        return 0;
    }
    bookkeeping_covered_committed = global_region_allocator.get_left_used_unsafe();
#else
    // in case of background gc, the mark array will be committed separately (per segment).
    size_t commit_size = card_table_element_layout[seg_mapping_table_element + 1];

    if (!virtual_commit (mem, commit_size, recorded_committed_bookkeeping_bucket))
    {
        dprintf (1, ("Card table commit failed"));
        GCToOSInterface::VirtualRelease (mem, alloc_size);
        return 0;
    }
#endif //USE_REGIONS

    // initialize the ref count
    uint32_t* ct = (uint32_t*)(mem + card_table_element_layout[card_table_element]);
    card_table_refcount (ct) = 0;
    card_table_lowest_address (ct) = start;
    card_table_highest_address (ct) = end;
    card_table_brick_table (ct) = (short*)(mem + card_table_element_layout[brick_table_element]);
    card_table_size (ct) = alloc_size;
    card_table_next (ct) = 0;

#ifdef CARD_BUNDLE
    card_table_card_bundle_table (ct) = (uint32_t*)(mem + card_table_element_layout[card_bundle_table_element]);

#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
    g_gc_card_bundle_table = translate_card_bundle_table(card_table_card_bundle_table(ct), g_gc_lowest_address);
#endif
#endif //CARD_BUNDLE

#if defined(FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP) && defined (BACKGROUND_GC)
    if (gc_can_use_concurrent)
    {
        SoftwareWriteWatch::InitializeUntranslatedTable(mem + card_table_element_layout[software_write_watch_table_element], start);
    }
#endif //FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP && BACKGROUND_GC

#ifdef USE_REGIONS
    map_region_to_generation = (region_info*)(mem + card_table_element_layout[region_to_generation_table_element]);
    map_region_to_generation_skewed = map_region_to_generation - size_region_to_generation_table_of (0, g_gc_lowest_address);
#endif //USE_REGIONS

    seg_mapping_table = (seg_mapping*)(mem + card_table_element_layout[seg_mapping_table_element]);
    seg_mapping_table = (seg_mapping*)((uint8_t*)seg_mapping_table -
                                        size_seg_mapping_table_of (0, (align_lower_segment (g_gc_lowest_address))));

#ifdef BACKGROUND_GC
    if (gc_can_use_concurrent)
        card_table_mark_array (ct) = (uint32_t*)(mem + card_table_element_layout[mark_array_element]);
    else
        card_table_mark_array (ct) = NULL;
#endif //BACKGROUND_GC

    return translate_card_table(ct);
}

void gc_heap::set_fgm_result (failure_get_memory f, size_t s, BOOL loh_p)
{
#ifdef MULTIPLE_HEAPS
    for (int hn = 0; hn < gc_heap::n_heaps; hn++)
    {
        gc_heap* hp = gc_heap::g_heaps [hn];
        hp->fgm_result.set_fgm (f, s, loh_p);
    }
#else //MULTIPLE_HEAPS
    fgm_result.set_fgm (f, s, loh_p);
#endif //MULTIPLE_HEAPS
}

#ifndef USE_REGIONS
//returns 0 for success, -1 otherwise
// We are doing all the decommitting here because we want to make sure we have
// enough memory to do so - if we do this during copy_brick_card_table and
// and fail to decommit it would make the failure case very complicated to
// handle. This way we can waste some decommit if we call this multiple
// times before the next FGC but it's easier to handle the failure case.
int gc_heap::grow_brick_card_tables (uint8_t* start,
                                     uint8_t* end,
                                     size_t size,
                                     heap_segment* new_seg,
                                     gc_heap* hp,
                                     BOOL uoh_p)
{
    uint8_t* la = g_gc_lowest_address;
    uint8_t* ha = g_gc_highest_address;
    uint8_t* saved_g_lowest_address = min (start, g_gc_lowest_address);
    uint8_t* saved_g_highest_address = max (end, g_gc_highest_address);
    seg_mapping* new_seg_mapping_table = nullptr;
#ifdef BACKGROUND_GC
    // This value is only for logging purpose - it's not necessarily exactly what we
    // would commit for mark array but close enough for diagnostics purpose.
    size_t logging_ma_commit_size = size_mark_array_of (0, (uint8_t*)size);
#endif //BACKGROUND_GC

    // See if the address is already covered
    if ((la != saved_g_lowest_address ) || (ha != saved_g_highest_address))
    {
        {
            //modify the highest address so the span covered
            //is twice the previous one.
            uint8_t* top = (uint8_t*)0 + Align (GCToOSInterface::GetVirtualMemoryMaxAddress());
            // On non-Windows systems, we get only an approximate value that can possibly be
            // slightly lower than the saved_g_highest_address.
            // In such case, we set the top to the saved_g_highest_address so that the
            // card and brick tables always cover the whole new range.
            if (top < saved_g_highest_address)
            {
                top = saved_g_highest_address;
            }
            size_t ps = ha-la;
#ifdef HOST_64BIT
            if (ps > (uint64_t)200*1024*1024*1024)
                ps += (uint64_t)100*1024*1024*1024;
            else
#endif // HOST_64BIT
                ps *= 2;

            if (saved_g_lowest_address < g_gc_lowest_address)
            {
                if (ps > (size_t)g_gc_lowest_address)
                    saved_g_lowest_address = (uint8_t*)(size_t)OS_PAGE_SIZE;
                else
                {
                    assert (((size_t)g_gc_lowest_address - ps) >= OS_PAGE_SIZE);
                    saved_g_lowest_address = min (saved_g_lowest_address, (g_gc_lowest_address - ps));
                }
            }

            if (saved_g_highest_address > g_gc_highest_address)
            {
                saved_g_highest_address = max ((saved_g_lowest_address + ps), saved_g_highest_address);
                if (saved_g_highest_address > top)
                    saved_g_highest_address = top;
            }
        }
        dprintf (GC_TABLE_LOG, ("Growing card table [%zx, %zx[",
                                (size_t)saved_g_lowest_address,
                                (size_t)saved_g_highest_address));

        bool write_barrier_updated = false;
        uint32_t virtual_reserve_flags = VirtualReserveFlags::None;
        uint32_t* saved_g_card_table = g_gc_card_table;

#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
        uint32_t* saved_g_card_bundle_table = g_gc_card_bundle_table;
#endif
        get_card_table_element_layout(saved_g_lowest_address, saved_g_highest_address, card_table_element_layout);
        size_t cb = 0;
        uint32_t* ct = 0;
        uint32_t* translated_ct = 0;

#ifdef CARD_BUNDLE
        if (can_use_write_watch_for_card_table())
        {
            cb = size_card_bundle_of (saved_g_lowest_address, saved_g_highest_address);

#ifndef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
            // If we're not manually managing the card bundles, we will need to use OS write
            // watch APIs over this region to track changes.
            virtual_reserve_flags |= VirtualReserveFlags::WriteWatch;
#endif
        }
#endif //CARD_BUNDLE

        size_t alloc_size = card_table_element_layout[total_bookkeeping_elements];
        size_t commit_size = 0;
        uint8_t* mem = (uint8_t*)GCToOSInterface::VirtualReserve (alloc_size, 0, virtual_reserve_flags);

        if (!mem)
        {
            set_fgm_result (fgm_grow_table, alloc_size, uoh_p);
            goto fail;
        }

        dprintf (GC_TABLE_LOG, ("Table alloc for %zd bytes: [%zx, %zx[",
                                 alloc_size, (size_t)mem, (size_t)((uint8_t*)mem+alloc_size)));

        {
            // in case of background gc, the mark array will be committed separately (per segment).
            commit_size = card_table_element_layout[seg_mapping_table_element + 1];

            if (!virtual_commit (mem, commit_size, recorded_committed_bookkeeping_bucket))
            {
                commit_size = 0;
                dprintf (GC_TABLE_LOG, ("Table commit failed"));
                set_fgm_result (fgm_commit_table, commit_size, uoh_p);
                goto fail;
            }

        }

        ct = (uint32_t*)(mem + card_table_element_layout[card_table_element]);
        card_table_refcount (ct) = 0;
        card_table_lowest_address (ct) = saved_g_lowest_address;
        card_table_highest_address (ct) = saved_g_highest_address;
        card_table_next (ct) = &g_gc_card_table[card_word (gcard_of (la))];

        //clear the card table
/*
        memclr ((uint8_t*)ct,
                (((saved_g_highest_address - saved_g_lowest_address)*sizeof (uint32_t) /
                  (card_size * card_word_width))
                 + sizeof (uint32_t)));
*/
        // No initialization needed, will be done in copy_brick_card

        card_table_brick_table (ct) = (short*)(mem + card_table_element_layout[brick_table_element]);

#ifdef CARD_BUNDLE
        card_table_card_bundle_table (ct) = (uint32_t*)(mem + card_table_element_layout[card_bundle_table_element]);
        //set all bundle to look at all of the cards
        memset(card_table_card_bundle_table (ct), 0xFF, cb);
#endif //CARD_BUNDLE

        new_seg_mapping_table = (seg_mapping*)(mem + card_table_element_layout[seg_mapping_table_element]);
        new_seg_mapping_table = (seg_mapping*)((uint8_t*)new_seg_mapping_table -
                                            size_seg_mapping_table_of (0, (align_lower_segment (saved_g_lowest_address))));
        memcpy(&new_seg_mapping_table[seg_mapping_word_of(g_gc_lowest_address)],
            &seg_mapping_table[seg_mapping_word_of(g_gc_lowest_address)],
            size_seg_mapping_table_of(g_gc_lowest_address, g_gc_highest_address));

        // new_seg_mapping_table gets assigned to seg_mapping_table at the bottom of this function,
        // not here. The reason for this is that, if we fail at mark array committing (OOM) and we've
        // already switched seg_mapping_table to point to the new mapping table, we'll decommit it and
        // run into trouble. By not assigning here, we're making sure that we will not change seg_mapping_table
        // if an OOM occurs.

#ifdef BACKGROUND_GC
        if(gc_can_use_concurrent)
            card_table_mark_array (ct) = (uint32_t*)(mem + card_table_element_layout[mark_array_element]);
        else
            card_table_mark_array (ct) = NULL;
#endif //BACKGROUND_GC

        translated_ct = translate_card_table (ct);

#ifdef BACKGROUND_GC
        dprintf (GC_TABLE_LOG, ("card table: %zx(translated: %zx), seg map: %zx, mark array: %zx",
            (size_t)ct, (size_t)translated_ct, (size_t)new_seg_mapping_table, (size_t)card_table_mark_array (ct)));

        if (is_bgc_in_progress())
        {
            dprintf (GC_TABLE_LOG, ("new low: %p, new high: %p, latest mark array is %p(translate: %p)",
                                    saved_g_lowest_address, saved_g_highest_address,
                                    card_table_mark_array (ct),
                                    translate_mark_array (card_table_mark_array (ct))));
            uint32_t* new_mark_array = (uint32_t*)((uint8_t*)card_table_mark_array (ct) - size_mark_array_of (0, saved_g_lowest_address));
            if (!commit_new_mark_array_global (new_mark_array))
            {
                dprintf (GC_TABLE_LOG, ("failed to commit portions in the mark array for existing segments"));
                set_fgm_result (fgm_commit_table, logging_ma_commit_size, uoh_p);
                goto fail;
            }

            if (!commit_mark_array_new_seg (hp, new_seg, translated_ct, saved_g_lowest_address))
            {
                dprintf (GC_TABLE_LOG, ("failed to commit mark array for the new seg"));
                set_fgm_result (fgm_commit_table, logging_ma_commit_size, uoh_p);
                goto fail;
            }
        }
        else
        {
            clear_commit_flag_global();
        }
#endif //BACKGROUND_GC

#if defined(FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP) && defined(BACKGROUND_GC)
        if (gc_can_use_concurrent)
        {
            // The current design of software write watch requires that the runtime is suspended during resize. Suspending
            // on resize is preferred because it is a far less frequent operation than GetWriteWatch() / ResetWriteWatch().
            // Suspending here allows copying dirty state from the old table into the new table, and not have to merge old
            // table info lazily as done for card tables.

            // Either this thread was the thread that did the suspension which means we are suspended; or this is called
            // from a GC thread which means we are in a blocking GC and also suspended.
            bool is_runtime_suspended = GCToEEInterface::IsGCThread();
            if (!is_runtime_suspended)
            {
                // Note on points where the runtime is suspended anywhere in this function. Upon an attempt to suspend the
                // runtime, a different thread may suspend first, causing this thread to block at the point of the suspend call.
                // So, at any suspend point, externally visible state needs to be consistent, as code that depends on that state
                // may run while this thread is blocked. This includes updates to g_gc_card_table, g_gc_lowest_address, and
                // g_gc_highest_address.
                suspend_EE();
            }

            g_gc_card_table = translated_ct;

#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
            g_gc_card_bundle_table = translate_card_bundle_table(card_table_card_bundle_table(ct), saved_g_lowest_address);
#endif

            SoftwareWriteWatch::SetResizedUntranslatedTable(
                mem + card_table_element_layout[software_write_watch_table_element],
                saved_g_lowest_address,
                saved_g_highest_address);

            seg_mapping_table = new_seg_mapping_table;

            // Since the runtime is already suspended, update the write barrier here as well.
            // This passes a bool telling whether we need to switch to the post
            // grow version of the write barrier.  This test tells us if the new
            // segment was allocated at a lower address than the old, requiring
            // that we start doing an upper bounds check in the write barrier.
            g_gc_lowest_address = saved_g_lowest_address;
            g_gc_highest_address = saved_g_highest_address;
            stomp_write_barrier_resize(true, la != saved_g_lowest_address);
            write_barrier_updated = true;

            if (!is_runtime_suspended)
            {
                restart_EE();
            }
        }
        else
#endif //FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP && BACKGROUND_GC
        {
            g_gc_card_table = translated_ct;

#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
            g_gc_card_bundle_table = translate_card_bundle_table(card_table_card_bundle_table(ct), saved_g_lowest_address);
#endif
        }

        if (!write_barrier_updated)
        {
            seg_mapping_table = new_seg_mapping_table;
            minipal_memory_barrier_process_wide();
            g_gc_lowest_address = saved_g_lowest_address;
            g_gc_highest_address = saved_g_highest_address;

            // This passes a bool telling whether we need to switch to the post
            // grow version of the write barrier.  This test tells us if the new
            // segment was allocated at a lower address than the old, requiring
            // that we start doing an upper bounds check in the write barrier.
            // This will also suspend the runtime if the write barrier type needs
            // to be changed, so we are doing this after all global state has
            // been updated. See the comment above suspend_EE() above for more
            // info.
            stomp_write_barrier_resize(GCToEEInterface::IsGCThread(), la != saved_g_lowest_address);
        }

        return 0;

fail:
        if (mem)
        {
            assert(g_gc_card_table == saved_g_card_table);

#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
            assert(g_gc_card_bundle_table  == saved_g_card_bundle_table);
#endif

            if (!GCToOSInterface::VirtualRelease (mem, alloc_size))
            {
                dprintf (GC_TABLE_LOG, ("GCToOSInterface::VirtualRelease failed"));
                assert (!"release failed");
            }
            reduce_committed_bytes (mem, commit_size, recorded_committed_bookkeeping_bucket, -1, true);
        }

        return -1;
    }
    else
    {
#ifdef BACKGROUND_GC
        if (is_bgc_in_progress())
        {
            dprintf (GC_TABLE_LOG, ("in range new seg %p, mark_array is %p", new_seg, hp->mark_array));
            if (!commit_mark_array_new_seg (hp, new_seg))
            {
                dprintf (GC_TABLE_LOG, ("failed to commit mark array for the new seg in range"));
                set_fgm_result (fgm_commit_table, logging_ma_commit_size, uoh_p);
                return -1;
            }
        }
#endif //BACKGROUND_GC
    }

    return 0;
}

//copy all of the arrays managed by the card table for a page aligned range
void gc_heap::copy_brick_card_range (uint8_t* la, uint32_t* old_card_table,
                                     short* old_brick_table,
                                     uint8_t* start, uint8_t* end)
{
    ptrdiff_t brick_offset = brick_of (start) - brick_of (la);
    dprintf (2, ("copying tables for range [%zx %zx[", (size_t)start, (size_t)end));

    // copy brick table
    short* brick_start = &brick_table [brick_of (start)];
    if (old_brick_table)
    {
        // segments are always on page boundaries
        memcpy (brick_start, &old_brick_table[brick_offset],
                size_brick_of (start, end));
    }

    uint32_t* old_ct = &old_card_table[card_word (card_of (la))];

#ifdef BACKGROUND_GC
    if (gc_heap::background_running_p())
    {
        uint32_t* old_mark_array = card_table_mark_array (old_ct);

        // We don't need to go through all the card tables here because
        // we only need to copy from the GC version of the mark array - when we
        // mark (even in allocate_uoh_object) we always use that mark array.
        if ((card_table_highest_address (old_ct) >= start) &&
            (card_table_lowest_address (old_ct) <= end))
        {
            if ((background_saved_highest_address >= start) &&
                (background_saved_lowest_address <= end))
            {
                //copy the mark bits
                // segments are always on page boundaries
                uint8_t* m_start = max (background_saved_lowest_address, start);
                uint8_t* m_end = min (background_saved_highest_address, end);
                memcpy (&mark_array[mark_word_of (m_start)],
                        &old_mark_array[mark_word_of (m_start) - mark_word_of (la)],
                        size_mark_array_of (m_start, m_end));
            }
        }
        else
        {
            //only large segments can be out of range
            assert (old_brick_table == 0);
        }
    }
#endif //BACKGROUND_GC

    // n way merge with all of the card table ever used in between
    uint32_t* ct = card_table_next (&card_table[card_word (card_of(lowest_address))]);

    assert (ct);
    while (card_table_next (old_ct) != ct)
    {
        //copy if old card table contained [start, end[
        if ((card_table_highest_address (ct) >= end) &&
            (card_table_lowest_address (ct) <= start))
        {
            // or the card_tables
            size_t start_word = card_word (card_of (start));

            uint32_t* dest = &card_table[start_word];
            uint32_t* src = &((translate_card_table (ct))[start_word]);
            ptrdiff_t count = count_card_of (start, end);
            for (int x = 0; x < count; x++)
            {
                *dest |= *src;

#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
                if (*src != 0)
                {
                    card_bundle_set(cardw_card_bundle(start_word+x));
                }
#endif

                dest++;
                src++;
            }
        }
        ct = card_table_next (ct);
    }
}

void gc_heap::copy_brick_card_table()
{
    uint32_t* old_card_table = card_table;
    short* old_brick_table = brick_table;

    uint8_t* la = lowest_address;
#ifdef _DEBUG
    uint8_t* ha = highest_address;
    assert (la == card_table_lowest_address (&old_card_table[card_word (card_of (la))]));
    assert (ha == card_table_highest_address (&old_card_table[card_word (card_of (la))]));
#endif //_DEBUG

    /* todo: Need a global lock for this */
    uint32_t* ct = &g_gc_card_table[card_word (gcard_of (g_gc_lowest_address))];
    own_card_table (ct);
    card_table = translate_card_table (ct);
    bookkeeping_start = (uint8_t*)ct - sizeof(card_table_info);
    card_table_size(ct) = card_table_element_layout[total_bookkeeping_elements];
    /* End of global lock */
    highest_address = card_table_highest_address (ct);
    lowest_address = card_table_lowest_address (ct);

    brick_table = card_table_brick_table (ct);

#ifdef BACKGROUND_GC
    if (gc_can_use_concurrent)
    {
        mark_array = translate_mark_array (card_table_mark_array (ct));
        assert (mark_word_of (g_gc_highest_address) ==
            mark_word_of (align_on_mark_word (g_gc_highest_address)));
    }
    else
        mark_array = NULL;
#endif //BACKGROUND_GC

#ifdef CARD_BUNDLE
    card_bundle_table = translate_card_bundle_table (card_table_card_bundle_table (ct), g_gc_lowest_address);

    // Ensure that the word that represents g_gc_lowest_address in the translated table is located at the
    // start of the untranslated table.
    assert (&card_bundle_table [card_bundle_word (cardw_card_bundle (card_word (card_of (g_gc_lowest_address))))] ==
            card_table_card_bundle_table (ct));

    //set the card table if we are in a heap growth scenario
    if (card_bundles_enabled())
    {
        card_bundles_set (cardw_card_bundle (card_word (card_of (lowest_address))),
                          cardw_card_bundle (align_cardw_on_bundle (card_word (card_of (highest_address)))));
    }
    //check if we need to turn on card_bundles.
#ifdef MULTIPLE_HEAPS
    // use INT64 arithmetic here because of possible overflow on 32p
    uint64_t th = (uint64_t)MH_TH_CARD_BUNDLE*gc_heap::n_heaps;
#else
    // use INT64 arithmetic here because of possible overflow on 32p
    uint64_t th = (uint64_t)SH_TH_CARD_BUNDLE;
#endif //MULTIPLE_HEAPS
    if (reserved_memory >= th)
    {
        enable_card_bundles();
    }
#endif //CARD_BUNDLE

    // for each of the segments and heaps, copy the brick table and
    // or the card table
    for (int i = get_start_generation_index(); i < total_generation_count; i++)
    {
        heap_segment* seg = generation_start_segment (generation_of (i));
        while (seg)
        {
            if (heap_segment_read_only_p (seg) && !heap_segment_in_range_p (seg))
            {
                //check if it became in range
                if ((heap_segment_reserved (seg) > lowest_address) &&
                    (heap_segment_mem (seg) < highest_address))
                {
                    set_ro_segment_in_range (seg);
                }
            }
            else
            {
                uint8_t* end = align_on_page (heap_segment_allocated (seg));
                copy_brick_card_range (la, old_card_table,
                    (i < uoh_start_generation) ? old_brick_table : NULL,
                    align_lower_page (heap_segment_mem (seg)),
                    end);
            }
            seg = heap_segment_next (seg);
        }
    }

    release_card_table (&old_card_table[card_word (card_of(la))]);
}

void gc_heap::copy_brick_card_table_on_growth ()
{
#ifdef MULTIPLE_HEAPS
    for (int i = 0; i < gc_heap::n_heaps; i++)
    {
        gc_heap* hp = gc_heap::g_heaps[i];
#else //MULTIPLE_HEAPS
    {
        gc_heap* hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS

        if (g_gc_card_table != hp->card_table)
        {
            hp->copy_brick_card_table ();
        }
    }
}
#endif //!USE_REGIONS

#ifdef FEATURE_BASICFREEZE
// Note that we always insert at the head of the max_generation segment list.
BOOL gc_heap::insert_ro_segment (heap_segment* seg)
{
#ifdef FEATURE_EVENT_TRACE
    if (!use_frozen_segments_p)
        use_frozen_segments_p = true;
#endif //FEATURE_EVENT_TRACE

    enter_spin_lock (&gc_heap::gc_lock);

    if (!gc_heap::seg_table->ensure_space_for_insert ()
#ifdef BACKGROUND_GC
        || (is_bgc_in_progress() && !commit_mark_array_new_seg(__this, seg))
#endif //BACKGROUND_GC
        )
    {
        leave_spin_lock(&gc_heap::gc_lock);
        return FALSE;
    }

    generation* gen2 = generation_of (max_generation);
    heap_segment* oldhead = generation_start_segment (gen2);
    heap_segment_next (seg) = oldhead;
    generation_start_segment (gen2) = seg;

#ifdef USE_REGIONS
    dprintf (REGIONS_LOG, ("setting gen2 start seg to %zx(%p)->%p",
        (size_t)seg, heap_segment_mem (seg), heap_segment_mem (oldhead)));

    if (generation_tail_ro_region (gen2) == 0)
    {
        dprintf (REGIONS_LOG, ("setting gen2 tail ro -> %p", heap_segment_mem (seg)));
        generation_tail_ro_region (gen2) = seg;
    }
#endif //USE_REGIONS

    seg_table->insert (heap_segment_mem(seg), (size_t)seg);

    seg_mapping_table_add_ro_segment (seg);

#ifdef USE_REGIONS
    // For regions ro segments are always out of range.
    assert (!((heap_segment_reserved (seg) > lowest_address) &&
        (heap_segment_mem (seg) < highest_address)));
#else
    if ((heap_segment_reserved (seg) > lowest_address) &&
        (heap_segment_mem (seg) < highest_address))
    {
        set_ro_segment_in_range (seg);
    }
#endif //USE_REGIONS

    FIRE_EVENT(GCCreateSegment_V1, heap_segment_mem(seg), (size_t)(heap_segment_reserved (seg) - heap_segment_mem(seg)), gc_etw_segment_read_only_heap);

    leave_spin_lock (&gc_heap::gc_lock);
    return TRUE;
}

void gc_heap::update_ro_segment (heap_segment* seg, uint8_t* allocated, uint8_t* committed)
{
    enter_spin_lock (&gc_heap::gc_lock);

    assert (heap_segment_read_only_p (seg));
    assert (allocated <= committed);
    assert (committed <= heap_segment_reserved (seg));
    heap_segment_allocated (seg) = allocated;
    heap_segment_committed (seg) = committed;

    leave_spin_lock (&gc_heap::gc_lock);
}

// No one is calling this function right now. If this is getting called we need
// to take care of decommitting the mark array for it - we will need to remember
// which portion of the mark array was committed and only decommit that.
void gc_heap::remove_ro_segment (heap_segment* seg)
{
    //clear the mark bits so a new segment allocated in its place will have a clear mark bits
#ifdef BACKGROUND_GC
    if (gc_can_use_concurrent)
    {
        if ((seg->flags & heap_segment_flags_ma_committed) || (seg->flags & heap_segment_flags_ma_pcommitted))
        {
            seg_clear_mark_array_bits_soh (seg);
        }
    }
#endif //BACKGROUND_GC

    enter_spin_lock (&gc_heap::gc_lock);

    seg_table->remove (heap_segment_mem (seg));
    seg_mapping_table_remove_ro_segment (seg);

    // Locate segment (and previous segment) in the list.
    generation* gen2 = generation_of (max_generation);

#ifdef USE_REGIONS
    if (generation_tail_ro_region (gen2) == seg)
    {
        generation_tail_ro_region (gen2) = 0;
    }
#endif //USE_REGIONS

    heap_segment* curr_seg = generation_start_segment (gen2);
    heap_segment* prev_seg = NULL;

    while (curr_seg && curr_seg != seg)
    {
        prev_seg = curr_seg;
        curr_seg = heap_segment_next (curr_seg);
    }
    assert (curr_seg == seg);

    // Patch previous segment (or list head if there is none) to skip the removed segment.
    if (prev_seg)
        heap_segment_next (prev_seg) = heap_segment_next (curr_seg);
    else
        generation_start_segment (gen2) = heap_segment_next (curr_seg);

    leave_spin_lock (&gc_heap::gc_lock);
}
#endif //FEATURE_BASICFREEZE

uint8_t** make_mark_list (size_t size)
{
    uint8_t** mark_list = new (nothrow) uint8_t* [size];
    return mark_list;
}

#define swap(a,b){uint8_t* t; t = a; a = b; b = t;}

void verify_qsort_array (uint8_t* *low, uint8_t* *high)
{
    uint8_t **i = 0;

    for (i = low+1; i <= high; i++)
    {
        if (*i < *(i-1))
        {
            FATAL_GC_ERROR();
        }
    }
}

#ifndef USE_INTROSORT
void qsort1( uint8_t* *low, uint8_t* *high, unsigned int depth)
{
    if (((low + 16) >= high) || (depth > 100))
    {
        //insertion sort
        uint8_t **i, **j;
        for (i = low+1; i <= high; i++)
        {
            uint8_t* val = *i;
            for (j=i;j >low && val<*(j-1);j--)
            {
                *j=*(j-1);
            }
            *j=val;
        }
    }
    else
    {
        uint8_t *pivot, **left, **right;

        //sort low middle and high
        if (*(low+((high-low)/2)) < *low)
            swap (*(low+((high-low)/2)), *low);
        if (*high < *low)
            swap (*low, *high);
        if (*high < *(low+((high-low)/2)))
            swap (*(low+((high-low)/2)), *high);

        swap (*(low+((high-low)/2)), *(high-1));
        pivot =  *(high-1);
        left = low; right = high-1;
        while (1) {
            while (*(--right) > pivot);
            while (*(++left)  < pivot);
            if (left < right)
            {
                swap(*left, *right);
            }
            else
                break;
        }
        swap (*left, *(high-1));
        qsort1(low, left-1, depth+1);
        qsort1(left+1, high, depth+1);
    }
}
#endif //USE_INTROSORT

#ifdef USE_VXSORT
static void do_vxsort (uint8_t** item_array, ptrdiff_t item_count, uint8_t* range_low, uint8_t* range_high)
{
    // above this threshold, using AVX2 for sorting will likely pay off
    // despite possible downclocking on some devices
    const ptrdiff_t AVX2_THRESHOLD_SIZE = 8 * 1024;

    // above this threshold, using AVX512F for sorting will likely pay off
    // despite possible downclocking on current devices
    const ptrdiff_t AVX512F_THRESHOLD_SIZE = 128 * 1024;

    // above this threshold, using NEON for sorting will likely pay off
    const ptrdiff_t NEON_THRESHOLD_SIZE = 1024;

    if (item_count <= 1)
        return;

#if defined(TARGET_AMD64)
    if (IsSupportedInstructionSet (InstructionSet::AVX2) && (item_count > AVX2_THRESHOLD_SIZE))
    {
        dprintf(3, ("Sorting mark lists"));

        // use AVX512F only if the list is large enough to pay for downclocking impact
        if (IsSupportedInstructionSet (InstructionSet::AVX512F) && (item_count > AVX512F_THRESHOLD_SIZE))
        {
            do_vxsort_avx512 (item_array, &item_array[item_count - 1], range_low, range_high);
        }
        else
        {
            do_vxsort_avx2 (item_array, &item_array[item_count - 1], range_low, range_high);
        }
    }
#elif defined(TARGET_ARM64)
    if (IsSupportedInstructionSet (InstructionSet::NEON) && (item_count > NEON_THRESHOLD_SIZE))
    {
        dprintf(3, ("Sorting mark lists"));
        do_vxsort_neon (item_array, &item_array[item_count - 1], range_low, range_high);
    }
#endif
    else
    {
        dprintf (3, ("Sorting mark lists"));
        introsort::sort (item_array, &item_array[item_count - 1], 0);
    }
#ifdef _DEBUG
    // check the array is sorted
    for (ptrdiff_t i = 0; i < item_count - 1; i++)
    {
        assert (item_array[i] <= item_array[i + 1]);
    }
    // check that the ends of the array are indeed in range
    // together with the above this implies all elements are in range
    assert ((range_low <= item_array[0]) && (item_array[item_count - 1] <= range_high));
#endif
}
#endif //USE_VXSORT

#ifdef MULTIPLE_HEAPS
static size_t target_mark_count_for_heap (size_t total_mark_count, int heap_count, int heap_number)
{
    // compute the average (rounded down)
    size_t average_mark_count = total_mark_count / heap_count;

    // compute the remainder
    size_t remaining_mark_count = total_mark_count - (average_mark_count * heap_count);

    // compute the target count for this heap - last heap has the remainder
    if (heap_number == (heap_count - 1))
        return (average_mark_count + remaining_mark_count);
    else
        return average_mark_count;
}
NOINLINE
uint8_t** gc_heap::equalize_mark_lists (size_t total_mark_list_size)
{
    size_t local_mark_count[MAX_SUPPORTED_CPUS];
    size_t total_mark_count = 0;

    // compute mark count per heap into a local array
    // compute the total
    for (int i = 0; i < n_heaps; i++)
    {
        gc_heap* hp = g_heaps[i];
        size_t mark_count = hp->mark_list_index - hp->mark_list;
        local_mark_count[i] = mark_count;
        total_mark_count += mark_count;
    }

    // this should agree with our input parameter
    assert(total_mark_count == total_mark_list_size);

    // compute the target count for this heap
    size_t this_target_mark_count = target_mark_count_for_heap (total_mark_count, n_heaps, heap_number);

    // if our heap has sufficient entries, we can exit early
    if (local_mark_count[heap_number] >= this_target_mark_count)
        return (mark_list + this_target_mark_count);

    // In the following, we try to fill the deficit in heap "deficit_heap_index" with
    // surplus from "surplus_heap_index".
    // If there is no deficit or surplus (anymore), the indices are advanced.
    int surplus_heap_index = 0;
    for (int deficit_heap_index = 0; deficit_heap_index <= heap_number; deficit_heap_index++)
    {
        // compute the target count for this heap - last heap has the remainder
        size_t deficit_target_mark_count = target_mark_count_for_heap (total_mark_count, n_heaps, deficit_heap_index);

        // if this heap has the target or larger count, skip it
        if (local_mark_count[deficit_heap_index] >= deficit_target_mark_count)
            continue;

        // while this heap is lower than average, fill it up
        while ((surplus_heap_index < n_heaps) && (local_mark_count[deficit_heap_index] < deficit_target_mark_count))
        {
            size_t deficit = deficit_target_mark_count - local_mark_count[deficit_heap_index];

            size_t surplus_target_mark_count = target_mark_count_for_heap(total_mark_count, n_heaps, surplus_heap_index);

            if (local_mark_count[surplus_heap_index] > surplus_target_mark_count)
            {
                size_t surplus = local_mark_count[surplus_heap_index] - surplus_target_mark_count;
                size_t amount_to_transfer = min(deficit, surplus);
                local_mark_count[surplus_heap_index] -= amount_to_transfer;
                if (deficit_heap_index == heap_number)
                {
                    // copy amount_to_transfer mark list items
                    memcpy(&g_heaps[deficit_heap_index]->mark_list[local_mark_count[deficit_heap_index]],
                           &g_heaps[surplus_heap_index]->mark_list[local_mark_count[surplus_heap_index]],
                           (amount_to_transfer*sizeof(mark_list[0])));
                }
                local_mark_count[deficit_heap_index] += amount_to_transfer;
            }
            else
            {
                surplus_heap_index++;
            }
        }
    }
    return (mark_list + local_mark_count[heap_number]);
}

NOINLINE
size_t gc_heap::sort_mark_list()
{
    if ((settings.condemned_generation >= max_generation)
#ifdef USE_REGIONS
      || (g_mark_list_piece == nullptr)
#endif //USE_REGIONS
        )
    {
        // fake a mark list overflow so merge_mark_lists knows to quit early
        mark_list_index = mark_list_end + 1;
        return 0;
    }

    // if this heap had a mark list overflow, we don't do anything
    if (mark_list_index > mark_list_end)
    {
        dprintf (2, ("h%d sort_mark_list overflow", heap_number));
        mark_list_overflow = true;
        return 0;
    }

    // if any other heap had a mark list overflow, we fake one too,
    // so we don't use an incomplete mark list by mistake
    for (int i = 0; i < n_heaps; i++)
    {
        if (g_heaps[i]->mark_list_index > g_heaps[i]->mark_list_end)
        {
            mark_list_index = mark_list_end + 1;
            dprintf (2, ("h%d sort_mark_list: detected overflow on heap %d", heap_number, i));
            return 0;
        }
    }

    // compute total mark list size and total ephemeral size
    size_t total_mark_list_size = 0;
    size_t total_ephemeral_size = 0;
    uint8_t* low = (uint8_t*)~0;
    uint8_t* high = 0;
    for (int i = 0; i < n_heaps; i++)
    {
        gc_heap* hp = g_heaps[i];
        total_mark_list_size += (hp->mark_list_index - hp->mark_list);
#ifdef USE_REGIONS
        // iterate through the ephemeral regions to get a tighter bound
        for (int gen_num = settings.condemned_generation; gen_num >= 0; gen_num--)
        {
            generation* gen = hp->generation_of (gen_num);
            for (heap_segment* seg = generation_start_segment (gen); seg != nullptr; seg = heap_segment_next (seg))
            {
                size_t ephemeral_size = heap_segment_allocated (seg) - heap_segment_mem (seg);
                total_ephemeral_size += ephemeral_size;
                low = min (low, heap_segment_mem (seg));
                high = max (high, heap_segment_allocated (seg));
            }
        }
#else //USE_REGIONS
        size_t ephemeral_size = heap_segment_allocated (hp->ephemeral_heap_segment) - hp->gc_low;
        total_ephemeral_size += ephemeral_size;
        low = min (low, hp->gc_low);
        high = max (high, heap_segment_allocated (hp->ephemeral_heap_segment));
#endif //USE_REGIONS
    }

    // give up if the mark list size is unreasonably large
    if (total_mark_list_size > (total_ephemeral_size / 256))
    {
        mark_list_index = mark_list_end + 1;
        // let's not count this as a mark list overflow
        dprintf (2, ("h%d total mark list %zd is too large > (%zd / 256), don't use",
            heap_number, total_mark_list_size, total_ephemeral_size));
        mark_list_overflow = false;
        return 0;
    }

    uint8_t **local_mark_list_index = equalize_mark_lists (total_mark_list_size);

#ifdef USE_VXSORT
    ptrdiff_t item_count = local_mark_list_index - mark_list;
//#define WRITE_SORT_DATA
#if defined(_DEBUG) || defined(WRITE_SORT_DATA)
        // in debug, make a copy of the mark list
        // for checking and debugging purposes
    uint8_t** mark_list_copy = &g_mark_list_copy[heap_number * mark_list_size];
    uint8_t** mark_list_copy_index = &mark_list_copy[item_count];
    for (ptrdiff_t i = 0; i < item_count; i++)
    {
        uint8_t* item = mark_list[i];
        assert ((low <= item) && (item < high));
        mark_list_copy[i] = item;
    }
#endif // _DEBUG || WRITE_SORT_DATA

    do_vxsort (mark_list, item_count, low, high);

#ifdef WRITE_SORT_DATA
    char file_name[256];
    sprintf_s (file_name, ARRAY_SIZE(file_name), "sort_data_gc%d_heap%d", settings.gc_index, heap_number);

    FILE* f;
    errno_t err = fopen_s (&f, file_name, "wb");

    if (err == 0)
    {
        size_t magic = 'SDAT';
        if (fwrite (&magic, sizeof(magic), 1, f) != 1)
            dprintf (3, ("fwrite failed\n"));
        if (fwrite (&elapsed_cycles, sizeof(elapsed_cycles), 1, f) != 1)
            dprintf (3, ("fwrite failed\n"));
        if (fwrite (&low, sizeof(low), 1, f) != 1)
            dprintf (3, ("fwrite failed\n"));
        if (fwrite (&item_count, sizeof(item_count), 1, f) != 1)
            dprintf (3, ("fwrite failed\n"));
        if (fwrite (mark_list_copy, sizeof(mark_list_copy[0]), item_count, f) != item_count)
            dprintf (3, ("fwrite failed\n"));
        if (fwrite (&magic, sizeof(magic), 1, f) != 1)
            dprintf (3, ("fwrite failed\n"));
        if (fclose (f) != 0)
            dprintf (3, ("fclose failed\n"));
    }
#endif

#ifdef _DEBUG
    // in debug, sort the copy as well using the proven sort, so we can check we got the right result
    if (mark_list_copy_index > mark_list_copy)
    {
        introsort::sort (mark_list_copy, mark_list_copy_index - 1, 0);
    }
    for (ptrdiff_t i = 0; i < item_count; i++)
    {
        uint8_t* item = mark_list[i];
        assert (mark_list_copy[i] == item);
    }
#endif //_DEBUG

#else //USE_VXSORT
    dprintf (3, ("Sorting mark lists"));
    if (local_mark_list_index > mark_list)
    {
        introsort::sort (mark_list, local_mark_list_index - 1, 0);
    }
#endif //USE_VXSORT

    uint8_t** x = mark_list;

#ifdef USE_REGIONS
    // first set the pieces for all regions to empty
    assert (g_mark_list_piece_size >= region_count);
    assert (g_mark_list_piece_total_size >= region_count*n_heaps);
    for (size_t region_index = 0; region_index < region_count; region_index++)
    {
        mark_list_piece_start[region_index] = NULL;
        mark_list_piece_end[region_index] = NULL;
    }

    // predicate means: x is still within the mark list, and within the bounds of this region
#define predicate(x) (((x) < local_mark_list_index) && (*(x) < region_limit))

    while (x < local_mark_list_index)
    {
        heap_segment* region = get_region_info_for_address (*x);

        // sanity check - the object on the mark list should be within the region
        assert ((heap_segment_mem (region) <= *x) && (*x < heap_segment_allocated (region)));

        size_t region_index = get_basic_region_index_for_address (heap_segment_mem (region));
        uint8_t* region_limit = heap_segment_allocated (region);

        // Due to GC holes, x can point to something in a region that already got freed. And that region's
        // allocated would be 0 and cause an infinite loop which is much harder to handle on production than
        // simply throwing an exception.
        if (region_limit == 0)
        {
            FATAL_GC_ERROR();
        }

        uint8_t*** mark_list_piece_start_ptr = &mark_list_piece_start[region_index];
        uint8_t*** mark_list_piece_end_ptr = &mark_list_piece_end[region_index];
#else // USE_REGIONS

// predicate means: x is still within the mark list, and within the bounds of this heap
#define predicate(x) (((x) < local_mark_list_index) && (*(x) < heap->ephemeral_high))

    // first set the pieces for all heaps to empty
    int heap_num;
    for (heap_num = 0; heap_num < n_heaps; heap_num++)
    {
        mark_list_piece_start[heap_num] = NULL;
        mark_list_piece_end[heap_num] = NULL;
    }

    heap_num = -1;
    while (x < local_mark_list_index)
    {
        gc_heap* heap;
        // find the heap x points into - searching cyclically from the last heap,
        // because in many cases the right heap is the next one or comes soon after
#ifdef _DEBUG
        int last_heap_num = heap_num;
#endif //_DEBUG
        do
        {
            heap_num++;
            if (heap_num >= n_heaps)
                heap_num = 0;
            assert(heap_num != last_heap_num); // we should always find the heap - infinite loop if not!
            heap = g_heaps[heap_num];
        }
        while (!(*x >= heap->ephemeral_low && *x < heap->ephemeral_high));

        uint8_t*** mark_list_piece_start_ptr = &mark_list_piece_start[heap_num];
        uint8_t*** mark_list_piece_end_ptr = &mark_list_piece_end[heap_num];
#endif // USE_REGIONS

        // x is the start of the mark list piece for this heap/region
        *mark_list_piece_start_ptr = x;

        // to find the end of the mark list piece for this heap/region, find the first x
        // that has !predicate(x), i.e. that is either not in this heap, or beyond the end of the list
        if (predicate(x))
        {
            // let's see if we get lucky and the whole rest belongs to this piece
            if (predicate(local_mark_list_index -1))
            {
                x = local_mark_list_index;
                *mark_list_piece_end_ptr = x;
                break;
            }

            // we play a variant of binary search to find the point sooner.
            // the first loop advances by increasing steps until the predicate turns false.
            // then we retreat the last step, and the second loop advances by decreasing steps, keeping the predicate true.
            unsigned inc = 1;
            do
            {
                inc *= 2;
                uint8_t** temp_x = x;
                x += inc;
                if (temp_x > x)
                {
                    break;
                }
            }
            while (predicate(x));
            // we know that only the last step was wrong, so we undo it
            x -= inc;
            do
            {
                // loop invariant - predicate holds at x, but not x + inc
                assert (predicate(x) && !(((x + inc) > x) && predicate(x + inc)));
                inc /= 2;
                if (((x + inc) > x) && predicate(x + inc))
                {
                    x += inc;
                }
            }
            while (inc > 1);
            // the termination condition and the loop invariant together imply this:
            assert(predicate(x) && !predicate(x + inc) && (inc == 1));
            // so the spot we're looking for is one further
            x += 1;
        }
        *mark_list_piece_end_ptr = x;
    }

#undef predicate

    return total_mark_list_size;
}

void gc_heap::append_to_mark_list (uint8_t **start, uint8_t **end)
{
    size_t slots_needed = end - start;
    size_t slots_available = mark_list_end + 1 - mark_list_index;
    size_t slots_to_copy = min(slots_needed, slots_available);
    memcpy(mark_list_index, start, slots_to_copy*sizeof(*start));
    mark_list_index += slots_to_copy;
    dprintf (3, ("h%d: appended %zd slots to mark_list\n", heap_number, slots_to_copy));
}

#ifdef _DEBUG

#if !defined(_MSC_VER)
#if !defined(__cdecl)
#if defined(__i386__)
#define __cdecl __attribute__((cdecl))
#else
#define __cdecl
#endif
#endif
#endif

static int __cdecl cmp_mark_list_item (const void* vkey, const void* vdatum)
{
    uint8_t** key = (uint8_t**)vkey;
    uint8_t** datum = (uint8_t**)vdatum;
    if (*key < *datum)
        return -1;
    else if (*key > *datum)
        return 1;
    else
        return 0;
}
#endif // _DEBUG

#ifdef USE_REGIONS
uint8_t** gc_heap::get_region_mark_list (BOOL& use_mark_list, uint8_t* start, uint8_t* end, uint8_t*** mark_list_end_ptr)
{
    size_t region_number = get_basic_region_index_for_address (start);
    size_t source_number = region_number;
#else //USE_REGIONS
void gc_heap::merge_mark_lists (size_t total_mark_list_size)
{
    // in case of mark list overflow, don't bother
    if (total_mark_list_size == 0)
    {
        return;
    }

#ifdef _DEBUG
    // if we had more than the average number of mark list items,
    // make sure these got copied to another heap, i.e. didn't get lost
    size_t this_mark_list_size = target_mark_count_for_heap (total_mark_list_size, n_heaps, heap_number);
    for (uint8_t** p = mark_list + this_mark_list_size; p < mark_list_index; p++)
    {
        uint8_t* item = *p;
        uint8_t** found_slot = nullptr;
        for (int i = 0; i < n_heaps; i++)
        {
            uint8_t** heap_mark_list = &g_mark_list[i * mark_list_size];
            size_t heap_mark_list_size = target_mark_count_for_heap (total_mark_list_size, n_heaps, i);
            found_slot = (uint8_t**)bsearch (&item, heap_mark_list, heap_mark_list_size, sizeof(item), cmp_mark_list_item);
            if (found_slot != nullptr)
                break;
        }
        assert ((found_slot != nullptr) && (*found_slot == item));
    }
#endif

    dprintf(3, ("merge_mark_lists: heap_number = %d  starts out with %zd entries",
        heap_number, (mark_list_index - mark_list)));

    int source_number = (size_t)heap_number;
#endif //USE_REGIONS

    uint8_t** source[MAX_SUPPORTED_CPUS];
    uint8_t** source_end[MAX_SUPPORTED_CPUS];
    int source_heap[MAX_SUPPORTED_CPUS];
    int source_count = 0;

    for (int i = 0; i < n_heaps; i++)
    {
        gc_heap* heap = g_heaps[i];
        if (heap->mark_list_piece_start[source_number] < heap->mark_list_piece_end[source_number])
        {
            source[source_count] = heap->mark_list_piece_start[source_number];
            source_end[source_count] = heap->mark_list_piece_end[source_number];
            source_heap[source_count] = i;
            if (source_count < MAX_SUPPORTED_CPUS)
                source_count++;
        }
    }

    dprintf(3, ("source_number = %zd  has %d sources\n", (size_t)source_number, source_count));

#if defined(_DEBUG) || defined(TRACE_GC)
    for (int j = 0; j < source_count; j++)
    {
        dprintf(3, ("source_number = %zd  ", (size_t)source_number));
        dprintf(3, (" source from heap %zd = %zx .. %zx (%zd entries)",
            (size_t)(source_heap[j]), (size_t)(source[j][0]),
            (size_t)(source_end[j][-1]), (size_t)(source_end[j] - source[j])));
       // the sources should all be sorted
        for (uint8_t **x = source[j]; x < source_end[j] - 1; x++)
        {
            if (x[0] > x[1])
            {
                dprintf(3, ("oops, mark_list from source %d for heap %zd isn't sorted\n", j,  (size_t)source_number));
                assert (0);
            }
        }
    }
#endif //_DEBUG || TRACE_GC

    mark_list = &g_mark_list_copy [heap_number*mark_list_size];
    mark_list_index = mark_list;
    mark_list_end = &mark_list [mark_list_size-1];
    int piece_count = 0;
    if (source_count == 0)
    {
        ; // nothing to do
    }
    else if (source_count == 1)
    {
        mark_list = source[0];
        mark_list_index = source_end[0];
        mark_list_end = mark_list_index;
        piece_count++;
    }
    else
    {
        while (source_count > 1)
        {
            // find the lowest and second lowest value in the sources we're merging from
            int lowest_source = 0;
            uint8_t *lowest = *source[0];
            uint8_t *second_lowest = *source[1];
            for (int i = 1; i < source_count; i++)
            {
                if (lowest > *source[i])
                {
                    second_lowest = lowest;
                    lowest = *source[i];
                    lowest_source = i;
                }
                else if (second_lowest > *source[i])
                {
                    second_lowest = *source[i];
                }
            }

            // find the point in the lowest source where it either runs out or is not <= second_lowest anymore
            // let's first try to get lucky and see if the whole source is <= second_lowest -- this is actually quite common
            uint8_t **x;
            if (source_end[lowest_source][-1] <= second_lowest)
                x = source_end[lowest_source];
            else
            {
                // use linear search to find the end -- could also use binary search as in sort_mark_list,
                // but saw no improvement doing that
                for (x = source[lowest_source]; x < source_end[lowest_source] && *x <= second_lowest; x++)
                    ;
            }

            // blast this piece to the mark list
            append_to_mark_list(source[lowest_source], x);
#ifdef USE_REGIONS
            if (mark_list_index > mark_list_end)
            {
                use_mark_list = false;
                return nullptr;
            }
#endif //USE_REGIONS
            piece_count++;

            source[lowest_source] = x;

            // check whether this source is now exhausted
            if (x >= source_end[lowest_source])
            {
                // if it's not the source with the highest index, copy the source with the highest index
                // over it so the non-empty sources are always at the beginning
                if (lowest_source < source_count-1)
                {
                    source[lowest_source] = source[source_count-1];
                    source_end[lowest_source] = source_end[source_count-1];
                }
                source_count--;
            }
        }
        // we're left with just one source that we copy
        append_to_mark_list(source[0], source_end[0]);
#ifdef USE_REGIONS
        if (mark_list_index > mark_list_end)
        {
            use_mark_list = false;
            return nullptr;
        }
#endif //USE_REGIONS
        piece_count++;
    }

#if defined(_DEBUG) || defined(TRACE_GC)
    // the final mark list must be sorted
    for (uint8_t **x = mark_list; x < mark_list_index - 1; x++)
    {
        if (x[0] > x[1])
        {
            dprintf(3, ("oops, mark_list for heap %d isn't sorted at the end of merge_mark_lists", heap_number));
            assert (0);
        }
    }
#endif //_DEBUG || TRACE_GC

#ifdef USE_REGIONS
    *mark_list_end_ptr = mark_list_index;
    return mark_list;
#endif // USE_REGIONS
}
#else

#ifdef USE_REGIONS
// a variant of binary search that doesn't look for an exact match,
// but finds the first element >= e
static uint8_t** binary_search (uint8_t** left, uint8_t** right, uint8_t* e)
{
    if (left == right)
        return left;
    assert (left < right);
    uint8_t** a = left;
    size_t l = 0;
    size_t r = (size_t)(right - left);
    while ((r - l) >= 2)
    {
        size_t m = l + (r - l) / 2;

        // loop condition says that r - l is at least 2
        // so l, m, r are all different
        assert ((l < m) && (m < r));

        if (a[m] < e)
        {
            l = m;
        }
        else
        {
            r = m;
        }
    }
    if (a[l] < e)
        return a + l + 1;
    else
        return a + l;
}

uint8_t** gc_heap::get_region_mark_list (BOOL& use_mark_list, uint8_t* start, uint8_t* end, uint8_t*** mark_list_end_ptr)
{
    // do a binary search over the sorted marked list to find start and end of the
    // mark list for this region
    *mark_list_end_ptr = binary_search (mark_list, mark_list_index, end);
    return binary_search (mark_list, *mark_list_end_ptr, start);
}
#endif //USE_REGIONS
#endif //MULTIPLE_HEAPS

void gc_heap::grow_mark_list ()
{
    // with vectorized sorting, we can use bigger mark lists
    bool use_big_lists = false;
#if defined(USE_VXSORT) && defined(TARGET_AMD64)
    use_big_lists = IsSupportedInstructionSet (InstructionSet::AVX2);
#elif defined(USE_VXSORT) && defined(TARGET_ARM64)
    use_big_lists = IsSupportedInstructionSet (InstructionSet::NEON);
#endif //USE_VXSORT

#ifdef MULTIPLE_HEAPS
    const size_t MAX_MARK_LIST_SIZE = use_big_lists ? (1000 * 1024) : (200 * 1024);
#else //MULTIPLE_HEAPS
    const size_t MAX_MARK_LIST_SIZE = use_big_lists ? (32 * 1024) : (16 * 1024);
#endif //MULTIPLE_HEAPS

    size_t new_mark_list_size = min (mark_list_size * 2, MAX_MARK_LIST_SIZE);
    size_t new_mark_list_total_size = new_mark_list_size*n_heaps;
    if (new_mark_list_total_size == g_mark_list_total_size)
        return;

#ifdef MULTIPLE_HEAPS
    uint8_t** new_mark_list = make_mark_list (new_mark_list_total_size);
    uint8_t** new_mark_list_copy = make_mark_list (new_mark_list_total_size);

    if ((new_mark_list != nullptr) && (new_mark_list_copy != nullptr))
    {
        delete[] g_mark_list;
        g_mark_list = new_mark_list;
        delete[] g_mark_list_copy;
        g_mark_list_copy = new_mark_list_copy;
        mark_list_size = new_mark_list_size;
        g_mark_list_total_size = new_mark_list_total_size;
    }
    else
    {
        delete[] new_mark_list;
        delete[] new_mark_list_copy;
    }

#else //MULTIPLE_HEAPS
    uint8_t** new_mark_list = make_mark_list (new_mark_list_size);
    if (new_mark_list != nullptr)
    {
        delete[] mark_list;
        g_mark_list = new_mark_list;
        mark_list_size = new_mark_list_size;
        g_mark_list_total_size = new_mark_list_size;
    }
#endif //MULTIPLE_HEAPS
}

#ifndef USE_REGIONS
class seg_free_spaces
{
    struct seg_free_space
    {
        BOOL is_plug;
        void* start;
    };

    struct free_space_bucket
    {
        seg_free_space* free_space;
        ptrdiff_t count_add; // Assigned when we first construct the array.
        ptrdiff_t count_fit; // How many items left when we are fitting plugs.
    };

    void move_bucket (int old_power2, int new_power2)
    {
        // PREFAST warning 22015: old_power2 could be negative
        assert (old_power2 >= 0);
        assert (old_power2 >= new_power2);

        if (old_power2 == new_power2)
        {
            return;
        }

        seg_free_space* src_index = free_space_buckets[old_power2].free_space;
        for (int i = old_power2; i > new_power2; i--)
        {
            seg_free_space** dest = &(free_space_buckets[i].free_space);
            (*dest)++;

            seg_free_space* dest_index = free_space_buckets[i - 1].free_space;
            if (i > (new_power2 + 1))
            {
                seg_free_space temp = *src_index;
                *src_index = *dest_index;
                *dest_index = temp;
            }
            src_index = dest_index;
        }

        free_space_buckets[old_power2].count_fit--;
        free_space_buckets[new_power2].count_fit++;
    }

#ifdef _DEBUG

    void dump_free_space (seg_free_space* item)
    {
        uint8_t* addr = 0;
        size_t len = 0;

        if (item->is_plug)
        {
            mark* m = (mark*)(item->start);
            len = pinned_len (m);
            addr = pinned_plug (m) - len;
        }
        else
        {
            heap_segment* seg = (heap_segment*)(item->start);
            addr = heap_segment_plan_allocated (seg);
            len = heap_segment_committed (seg) - addr;
        }

        dprintf (SEG_REUSE_LOG_1, ("[%d]0x%p %zd", heap_num, addr, len));
    }

    void dump()
    {
        seg_free_space* item = NULL;
        int i = 0;

        dprintf (SEG_REUSE_LOG_1, ("[%d]----------------------------------\nnow the free spaces look like:", heap_num));
        for (i = 0; i < (free_space_bucket_count - 1); i++)
        {
            dprintf (SEG_REUSE_LOG_1, ("[%d]Free spaces for 2^%d bucket:", heap_num, (base_power2 + i)));
            dprintf (SEG_REUSE_LOG_1, ("[%d]%s %s", heap_num, "start", "len"));
            item = free_space_buckets[i].free_space;
            while (item < free_space_buckets[i + 1].free_space)
            {
                dump_free_space (item);
                item++;
            }
            dprintf (SEG_REUSE_LOG_1, ("[%d]----------------------------------", heap_num));
        }

        dprintf (SEG_REUSE_LOG_1, ("[%d]Free spaces for 2^%d bucket:", heap_num, (base_power2 + i)));
        dprintf (SEG_REUSE_LOG_1, ("[%d]%s %s", heap_num, "start", "len"));
        item = free_space_buckets[i].free_space;

        while (item <= &seg_free_space_array[free_space_item_count - 1])
        {
            dump_free_space (item);
            item++;
        }
        dprintf (SEG_REUSE_LOG_1, ("[%d]----------------------------------", heap_num));
    }

#endif //_DEBUG

    free_space_bucket* free_space_buckets;
    seg_free_space* seg_free_space_array;
    ptrdiff_t free_space_bucket_count;
    ptrdiff_t free_space_item_count;
    int base_power2;
    int heap_num;
#ifdef _DEBUG
    BOOL has_end_of_seg;
#endif //_DEBUG

public:

    seg_free_spaces (int h_number)
    {
        heap_num = h_number;
    }

    BOOL alloc ()
    {
        size_t total_prealloc_size =
            MAX_NUM_BUCKETS * sizeof (free_space_bucket) +
            MAX_NUM_FREE_SPACES * sizeof (seg_free_space);

        free_space_buckets = (free_space_bucket*) new (nothrow) uint8_t[total_prealloc_size];

        return (!!free_space_buckets);
    }

    // We take the ordered free space array we got from the 1st pass,
    // and feed the portion that we decided to use to this method, ie,
    // the largest item_count free spaces.
    void add_buckets (int base, size_t* ordered_free_spaces, int bucket_count, size_t item_count)
    {
        assert (free_space_buckets);
        assert (item_count <= (size_t)MAX_PTR);

        free_space_bucket_count = bucket_count;
        free_space_item_count = item_count;
        base_power2 = base;
#ifdef _DEBUG
        has_end_of_seg = FALSE;
#endif //_DEBUG

        ptrdiff_t total_item_count = 0;
        ptrdiff_t i = 0;

        seg_free_space_array = (seg_free_space*)(free_space_buckets + free_space_bucket_count);

        for (i = 0; i < (ptrdiff_t)item_count; i++)
        {
            seg_free_space_array[i].start = 0;
            seg_free_space_array[i].is_plug = FALSE;
        }

        for (i = 0; i < bucket_count; i++)
        {
            free_space_buckets[i].count_add = ordered_free_spaces[i];
            free_space_buckets[i].count_fit = ordered_free_spaces[i];
            free_space_buckets[i].free_space = &seg_free_space_array[total_item_count];
            total_item_count += free_space_buckets[i].count_add;
        }

        assert (total_item_count == (ptrdiff_t)item_count);
    }

    // If we are adding a free space before a plug we pass the
    // mark stack position so we can update the length; we could
    // also be adding the free space after the last plug in which
    // case start is the segment which we'll need to update the
    // heap_segment_plan_allocated.
    void add (void* start, BOOL plug_p, BOOL first_p)
    {
        size_t size = (plug_p ?
                       pinned_len ((mark*)start) :
                       (heap_segment_committed ((heap_segment*)start) -
                           heap_segment_plan_allocated ((heap_segment*)start)));

        if (plug_p)
        {
            dprintf (SEG_REUSE_LOG_1, ("[%d]Adding a free space before plug: %zd", heap_num, size));
        }
        else
        {
            dprintf (SEG_REUSE_LOG_1, ("[%d]Adding a free space at end of seg: %zd", heap_num, size));
#ifdef _DEBUG
            has_end_of_seg = TRUE;
#endif //_DEBUG
        }

        if (first_p)
        {
            size_t eph_gen_starts = gc_heap::eph_gen_starts_size;
            size -= eph_gen_starts;
            if (plug_p)
            {
                mark* m = (mark*)(start);
                pinned_len (m) -= eph_gen_starts;
            }
            else
            {
                heap_segment* seg = (heap_segment*)start;
                heap_segment_plan_allocated (seg) += eph_gen_starts;
            }
        }

        int bucket_power2 = index_of_highest_set_bit (size);
        if (bucket_power2 < base_power2)
        {
            return;
        }

        free_space_bucket* bucket = &free_space_buckets[bucket_power2 - base_power2];

        seg_free_space* bucket_free_space = bucket->free_space;
        assert (plug_p || (!plug_p && bucket->count_add));

        if (bucket->count_add == 0)
        {
            dprintf (SEG_REUSE_LOG_1, ("[%d]Already have enough of 2^%d", heap_num, bucket_power2));
            return;
        }

        ptrdiff_t index = bucket->count_add - 1;

        dprintf (SEG_REUSE_LOG_1, ("[%d]Building free spaces: adding %p; len: %zd (2^%d)",
                    heap_num,
                    (plug_p ?
                        (pinned_plug ((mark*)start) - pinned_len ((mark*)start)) :
                        heap_segment_plan_allocated ((heap_segment*)start)),
                    size,
                    bucket_power2));

        if (plug_p)
        {
            bucket_free_space[index].is_plug = TRUE;
        }

        bucket_free_space[index].start = start;
        bucket->count_add--;
    }

#ifdef _DEBUG

    // Do a consistency check after all free spaces are added.
    void check()
    {
        ptrdiff_t i = 0;
        int end_of_seg_count = 0;

        for (i = 0; i < free_space_item_count; i++)
        {
            assert (seg_free_space_array[i].start);
            if (!(seg_free_space_array[i].is_plug))
            {
                end_of_seg_count++;
            }
        }

        if (has_end_of_seg)
        {
            assert (end_of_seg_count == 1);
        }
        else
        {
            assert (end_of_seg_count == 0);
        }

        for (i = 0; i < free_space_bucket_count; i++)
        {
            assert (free_space_buckets[i].count_add == 0);
        }
    }

#endif //_DEBUG

    uint8_t* fit (uint8_t* old_loc,
               size_t plug_size
               REQD_ALIGN_AND_OFFSET_DCL)
    {
        if (old_loc)
        {
#ifdef SHORT_PLUGS
            assert (!is_plug_padded (old_loc));
#endif //SHORT_PLUGS
            assert (!node_realigned (old_loc));
        }

        size_t saved_plug_size = plug_size;

#ifdef FEATURE_STRUCTALIGN
        // BARTOKTODO (4841): this code path is disabled (see can_fit_all_blocks_p) until we take alignment requirements into account
        _ASSERTE(requiredAlignment == DATA_ALIGNMENT && false);
#endif // FEATURE_STRUCTALIGN

        size_t plug_size_to_fit = plug_size;

        // best fit is only done for gen1 to gen2 and we do not pad in gen2.
        // however we must account for requirements of large alignment.
        // which may result in realignment padding.
#ifdef RESPECT_LARGE_ALIGNMENT
        plug_size_to_fit += switch_alignment_size(FALSE);
#endif //RESPECT_LARGE_ALIGNMENT

        int plug_power2 = index_of_highest_set_bit (round_up_power2 (plug_size_to_fit + Align(min_obj_size)));
        ptrdiff_t i;
        uint8_t* new_address = 0;

        if (plug_power2 < base_power2)
        {
            plug_power2 = base_power2;
        }

        int chosen_power2 = plug_power2 - base_power2;
retry:
        for (i = chosen_power2; i < free_space_bucket_count; i++)
        {
            if (free_space_buckets[i].count_fit != 0)
            {
                break;
            }
            chosen_power2++;
        }

        dprintf (SEG_REUSE_LOG_1, ("[%d]Fitting plug len %zd (2^%d) using 2^%d free space",
            heap_num,
            plug_size,
            plug_power2,
            (chosen_power2 + base_power2)));

        assert (i < free_space_bucket_count);

        seg_free_space* bucket_free_space = free_space_buckets[chosen_power2].free_space;
        ptrdiff_t free_space_count = free_space_buckets[chosen_power2].count_fit;
        size_t new_free_space_size = 0;
        BOOL can_fit = FALSE;
        size_t pad = 0;

        for (i = 0; i < free_space_count; i++)
        {
            size_t free_space_size = 0;
            pad = 0;

            if (bucket_free_space[i].is_plug)
            {
                mark* m = (mark*)(bucket_free_space[i].start);
                uint8_t* plug_free_space_start = pinned_plug (m) - pinned_len (m);

                if (!((old_loc == 0) || same_large_alignment_p (old_loc, plug_free_space_start)))
                {
                    pad = switch_alignment_size (FALSE);
                }

                plug_size = saved_plug_size + pad;

                free_space_size = pinned_len (m);
                new_address = pinned_plug (m) - pinned_len (m);

                if (free_space_size >= (plug_size + Align (min_obj_size)) ||
                    free_space_size == plug_size)
                {
                    new_free_space_size = free_space_size - plug_size;
                    pinned_len (m) = new_free_space_size;
#ifdef SIMPLE_DPRINTF
                    dprintf (SEG_REUSE_LOG_0, ("[%d]FP: 0x%p->0x%p(%zx)(%zx), [0x%p (2^%d) -> [0x%p (2^%d)",
                                heap_num,
                                old_loc,
                                new_address,
                                (plug_size - pad),
                                pad,
                                pinned_plug (m),
                                index_of_highest_set_bit (free_space_size),
                                (pinned_plug (m) - pinned_len (m)),
                                index_of_highest_set_bit (new_free_space_size)));
#endif //SIMPLE_DPRINTF

                    if (pad != 0)
                    {
                        set_node_realigned (old_loc);
                    }

                    can_fit = TRUE;
                }
            }
            else
            {
                heap_segment* seg = (heap_segment*)(bucket_free_space[i].start);
                free_space_size = heap_segment_committed (seg) - heap_segment_plan_allocated (seg);

                if (!((old_loc == 0) || same_large_alignment_p (old_loc, heap_segment_plan_allocated (seg))))
                {
                    pad = switch_alignment_size (FALSE);
                }

                plug_size = saved_plug_size + pad;

                if (free_space_size >= (plug_size + Align (min_obj_size)) ||
                    free_space_size == plug_size)
                {
                    new_address = heap_segment_plan_allocated (seg);
                    new_free_space_size = free_space_size - plug_size;
                    heap_segment_plan_allocated (seg) = new_address + plug_size;
#ifdef SIMPLE_DPRINTF
                    dprintf (SEG_REUSE_LOG_0, ("[%d]FS: 0x%p-> 0x%p(%zd) (2^%d) -> 0x%p (2^%d)",
                                heap_num,
                                old_loc,
                                new_address,
                                (plug_size - pad),
                                index_of_highest_set_bit (free_space_size),
                                heap_segment_plan_allocated (seg),
                                index_of_highest_set_bit (new_free_space_size)));
#endif //SIMPLE_DPRINTF

                    if (pad != 0)
                        set_node_realigned (old_loc);

                    can_fit = TRUE;
                }
            }

            if (can_fit)
            {
                break;
            }
        }

        if (!can_fit)
        {
            assert (chosen_power2 == 0);
            chosen_power2 = 1;
            goto retry;
        }

        new_address += pad;
        assert ((chosen_power2 && (i == 0)) ||
                ((!chosen_power2) && (i < free_space_count)));

        int new_bucket_power2 = index_of_highest_set_bit (new_free_space_size);

        if (new_bucket_power2 < base_power2)
        {
            new_bucket_power2 = base_power2;
        }

        move_bucket (chosen_power2, new_bucket_power2 - base_power2);

        //dump();

        return new_address;
    }

    void cleanup ()
    {
        if (free_space_buckets)
        {
            delete [] free_space_buckets;
        }
        if (seg_free_space_array)
        {
            delete [] seg_free_space_array;
        }
    }
};
#endif //!USE_REGIONS

#define marked(i) header(i)->IsMarked()
#define set_marked(i) header(i)->SetMarked()
#define clear_marked(i) header(i)->ClearMarked()
#define pinned(i) header(i)->IsPinned()
#define set_pinned(i) header(i)->SetPinned()
#define clear_pinned(i) header(i)->GetHeader()->ClrGCBit();

inline size_t my_get_size (Object* ob)
{
    MethodTable* mT = header(ob)->GetMethodTable();

    return (mT->GetBaseSize() +
            (mT->HasComponentSize() ?
             ((size_t)((CObjectHeader*)ob)->GetNumComponents() * mT->RawGetComponentSize()) : 0));
}

#define size(i) my_get_size (header(i))

#define contain_pointers(i) header(i)->ContainsGCPointers()
#ifdef COLLECTIBLE_CLASS
#define contain_pointers_or_collectible(i) header(i)->ContainsGCPointersOrCollectible()

#define get_class_object(i) GCToEEInterface::GetLoaderAllocatorObjectForGC((Object *)i)
#define is_collectible(i) method_table(i)->Collectible()
#else //COLLECTIBLE_CLASS
#define contain_pointers_or_collectible(i) header(i)->ContainsGCPointers()
#endif //COLLECTIBLE_CLASS

#ifdef BACKGROUND_GC
#ifdef FEATURE_BASICFREEZE
inline
void gc_heap::seg_clear_mark_array_bits_soh (heap_segment* seg)
{
    uint8_t* range_beg = 0;
    uint8_t* range_end = 0;
    if (bgc_mark_array_range (seg, FALSE, &range_beg, &range_end))
    {
        clear_mark_array (range_beg, align_on_mark_word (range_end));
    }
}

inline
void gc_heap::seg_set_mark_array_bits_soh (heap_segment* seg)
{
    uint8_t* range_beg = 0;
    uint8_t* range_end = 0;
    if (bgc_mark_array_range (seg, FALSE, &range_beg, &range_end))
    {
        size_t beg_word = mark_word_of (align_on_mark_word (range_beg));
        size_t end_word = mark_word_of (align_on_mark_word (range_end));

        uint8_t* op = range_beg;
        while (op < mark_word_address (beg_word))
        {
            mark_array_set_marked (op);
            op += mark_bit_pitch;
        }

        memset (&mark_array[beg_word], 0xFF, (end_word - beg_word)*sizeof (uint32_t));
    }
}
#endif //FEATURE_BASICFREEZE

void gc_heap::bgc_clear_batch_mark_array_bits (uint8_t* start, uint8_t* end)
{
    if ((start < background_saved_highest_address) &&
        (end > background_saved_lowest_address))
    {
        start = max (start, background_saved_lowest_address);
        end = min (end, background_saved_highest_address);

        size_t start_mark_bit = mark_bit_of (start);
        size_t end_mark_bit = mark_bit_of (end);
        unsigned int startbit = mark_bit_bit (start_mark_bit);
        unsigned int endbit = mark_bit_bit (end_mark_bit);
        size_t startwrd = mark_bit_word (start_mark_bit);
        size_t endwrd = mark_bit_word (end_mark_bit);

        dprintf (3, ("Clearing all mark array bits between [%zx:%zx-[%zx:%zx",
            (size_t)start, (size_t)start_mark_bit,
            (size_t)end, (size_t)end_mark_bit));

        unsigned int firstwrd = lowbits (~0, startbit);
        unsigned int lastwrd = highbits (~0, endbit);

        if (startwrd == endwrd)
        {
            if (startbit != endbit)
            {
                unsigned int wrd = firstwrd | lastwrd;
                mark_array[startwrd] &= wrd;
            }
            else
            {
                assert (start == end);
            }
            return;
        }

        // clear the first mark word.
        if (startbit)
        {
            mark_array[startwrd] &= firstwrd;
            startwrd++;
        }

        for (size_t wrdtmp = startwrd; wrdtmp < endwrd; wrdtmp++)
        {
            mark_array[wrdtmp] = 0;
        }

        // clear the last mark word.
        if (endbit)
        {
            mark_array[endwrd] &= lastwrd;
        }
    }
}
#endif //BACKGROUND_GC

inline
BOOL gc_heap::is_mark_set (uint8_t* o)
{
    return marked (o);
}

#if defined (_MSC_VER) && defined (TARGET_X86)
#pragma optimize("y", on)        // Small critical routines, don't put in EBP frame
#endif //_MSC_VER && TARGET_X86

// return the generation number of an object.
// It is assumed that the object is valid.
// Note that this will return max_generation for UOH objects
int gc_heap::object_gennum (uint8_t* o)
{
#ifdef USE_REGIONS
    return get_region_gen_num (o);
#else
    if (in_range_for_segment (o, ephemeral_heap_segment) &&
        (o >= generation_allocation_start (generation_of (max_generation - 1))))
    {
        // in an ephemeral generation.
        for ( int i = 0; i < max_generation-1; i++)
        {
            if ((o >= generation_allocation_start (generation_of (i))))
                return i;
        }
        return max_generation-1;
    }
    else
    {
        return max_generation;
    }
#endif //USE_REGIONS
}

int gc_heap::object_gennum_plan (uint8_t* o)
{
#ifdef USE_REGIONS
    return get_region_plan_gen_num (o);
#else
    if (in_range_for_segment (o, ephemeral_heap_segment))
    {
        for (int i = 0; i < ephemeral_generation_count; i++)
        {
            uint8_t* plan_start = generation_plan_allocation_start (generation_of (i));
            if (plan_start && (o >= plan_start))
            {
                return i;
            }
        }
    }
    return max_generation;
#endif //USE_REGIONS
}

#if defined(_MSC_VER) && defined(TARGET_X86)
#pragma optimize("", on)        // Go back to command line default optimizations
#endif //_MSC_VER && TARGET_X86

#ifdef USE_REGIONS
void get_initial_region(int gen, int hn, uint8_t** region_start, uint8_t** region_end)
{
    *region_start = initial_regions[hn][gen][0];
    *region_end = initial_regions[hn][gen][1];
}

bool gc_heap::initial_make_soh_regions (gc_heap* hp)
{
    uint8_t* region_start;
    uint8_t* region_end;
    uint32_t hn = 0;
#ifdef MULTIPLE_HEAPS
    hn = hp->heap_number;
#endif //MULTIPLE_HEAPS

    for (int i = max_generation; i >= 0; i--)
    {
        get_initial_region(i, hn, &region_start, &region_end);

        size_t region_size = region_end - region_start;

        heap_segment* current_region = make_heap_segment (region_start, region_size, hp, i);
        if (current_region == nullptr)
        {
            return false;
        }
        uint8_t* gen_start = heap_segment_mem (current_region);
        make_generation (i, current_region, gen_start);

        if (i == 0)
        {
            ephemeral_heap_segment = current_region;
            alloc_allocated = heap_segment_allocated (current_region);
        }
    }

    for (int i = max_generation; i >= 0; i--)
    {
        dprintf (REGIONS_LOG, ("h%d gen%d alloc seg is %p, start seg is %p (%p-%p)",
            heap_number, i, generation_allocation_segment (generation_of (i)),
            generation_start_segment (generation_of (i)),
            heap_segment_mem (generation_start_segment (generation_of (i))),
            heap_segment_allocated (generation_start_segment (generation_of (i)))));
    }

    return true;
}

bool gc_heap::initial_make_uoh_regions (int gen, gc_heap* hp)
{
    uint8_t* region_start;
    uint8_t* region_end;
    uint32_t hn = 0;
#ifdef MULTIPLE_HEAPS
    hn = hp->heap_number;
#endif //MULTIPLE_HEAPS

    get_initial_region(gen, hn, &region_start, &region_end);

    size_t region_size = region_end - region_start;
    heap_segment* uoh_region = make_heap_segment (region_start, region_size, hp, gen);
    if (uoh_region == nullptr)
    {
        return false;
    }
    uoh_region->flags |=
        (gen == loh_generation) ? heap_segment_flags_loh : heap_segment_flags_poh;
    uint8_t* gen_start = heap_segment_mem (uoh_region);
    make_generation (gen, uoh_region, gen_start);
    return true;
}

void gc_heap::clear_region_info (heap_segment* region)
{
    if (!heap_segment_uoh_p (region))
    {
        //cleanup the brick table back to the empty value
        clear_brick_table (heap_segment_mem (region), heap_segment_reserved (region));
    }

    clear_card_for_addresses (get_region_start (region), heap_segment_reserved (region));

#ifdef BACKGROUND_GC
    ::record_changed_seg ((uint8_t*)region, heap_segment_reserved (region),
                        settings.gc_index, current_bgc_state,
                        seg_deleted);

    bgc_verify_mark_array_cleared (region);
#endif //BACKGROUND_GC
}

// Note that returning a region to free does not decommit.
void gc_heap::return_free_region (heap_segment* region)
{
    gc_oh_num oh = heap_segment_oh (region);
    dprintf(3, ("commit-accounting:  from %d to free [%p, %p) for heap %d", oh, get_region_start (region), heap_segment_committed (region), heap_number));
    {
        size_t committed = heap_segment_committed (region) - get_region_start (region);
        if (committed > 0)
        {
            check_commit_cs.Enter();
            assert (committed_by_oh[oh] >= committed);
            committed_by_oh[oh] -= committed;
            committed_by_oh[recorded_committed_free_bucket] += committed;
#if defined(MULTIPLE_HEAPS) && defined(_DEBUG)
            assert (committed_by_oh_per_heap[oh] >= committed);
            committed_by_oh_per_heap[oh] -= committed;
#endif // MULTIPLE_HEAPS && _DEBUG
            check_commit_cs.Leave();
        }
    }
    clear_region_info (region);

    region_free_list::add_region_descending (region, free_regions);

    uint8_t* region_start = get_region_start (region);
    uint8_t* region_end = heap_segment_reserved (region);

    int num_basic_regions = (int)((region_end - region_start) >> min_segment_size_shr);
    dprintf (REGIONS_LOG, ("RETURNING region %p (%d basic regions) to free",
        heap_segment_mem (region), num_basic_regions));
    for (int i = 0; i < num_basic_regions; i++)
    {
        uint8_t* basic_region_start = region_start + ((size_t)i << min_segment_size_shr);
        heap_segment* basic_region = get_region_info (basic_region_start);
        heap_segment_allocated (basic_region) = 0;
#ifdef MULTIPLE_HEAPS
        heap_segment_heap (basic_region) = 0;
#endif //MULTIPLE_HEAPS

        // I'm intentionally not resetting gen_num/plan_gen_num which will show us
        // which gen/plan gen this region was and that's useful for debugging.
    }
}

// USE_REGIONS TODO: SOH should be able to get a large region and split it up into basic regions
// if needed.
// USE_REGIONS TODO: In Server GC we should allow to get a free region from another heap.
heap_segment* gc_heap::get_free_region (int gen_number, size_t size)
{
    heap_segment* region = 0;

    if (gen_number <= max_generation)
    {
        assert (size == 0);
        region = free_regions[basic_free_region].unlink_region_front();
    }
    else
    {
        const size_t LARGE_REGION_SIZE = global_region_allocator.get_large_region_alignment();

        assert (size >= LARGE_REGION_SIZE);
        if (size == LARGE_REGION_SIZE)
        {
            // get it from the local list of large free regions if possible
            region = free_regions[large_free_region].unlink_region_front();
        }
        else
        {
            // get it from the local list of huge free regions if possible
            region = free_regions[huge_free_region].unlink_smallest_region (size);
            if (region == nullptr)
            {
                if (settings.pause_mode == pause_no_gc)
                {
                    // In case of no-gc-region, the gc lock is being held by the thread
                    // triggering the GC.
                    assert (gc_lock.holding_thread != (Thread*)-1);
                }
                else
                {
                    ASSERT_HOLDING_SPIN_LOCK(&gc_lock);
                }

                // get it from the global list of huge free regions
                region = global_free_huge_regions.unlink_smallest_region (size);
            }
        }
    }

    if (region)
    {
        uint8_t* region_start = get_region_start (region);
        uint8_t* region_end = heap_segment_reserved (region);
        init_heap_segment (region, __this, region_start,
                           (region_end - region_start),
                           gen_number, true);

        gc_oh_num oh = gen_to_oh (gen_number);
        dprintf(3, ("commit-accounting:  from free to %d [%p, %p) for heap %d", oh, get_region_start (region), heap_segment_committed (region), heap_number));
        {
            size_t committed = heap_segment_committed (region) - get_region_start (region);
            if (committed > 0)
            {
                check_commit_cs.Enter();
                committed_by_oh[oh] += committed;
                assert (committed_by_oh[recorded_committed_free_bucket] >= committed);
                committed_by_oh[recorded_committed_free_bucket] -= committed;
#if defined(MULTIPLE_HEAPS) && defined(_DEBUG)
                committed_by_oh_per_heap[oh] += committed;
#endif // MULTIPLE_HEAPS && _DEBUG
                check_commit_cs.Leave();
            }
        }

        dprintf (REGIONS_LOG, ("h%d GFR get region %zx (%p-%p) for gen%d",
            heap_number, (size_t)region,
            region_start, region_end,
            gen_number));

        // Something is wrong if a free region is already filled
        assert (heap_segment_allocated(region) == heap_segment_mem (region));
    }
    else
    {
        region = allocate_new_region (__this, gen_number, (gen_number > max_generation), size);
    }

    if (region)
    {
        if (!init_table_for_region (gen_number, region))
        {
            region = 0;
        }
    }

    return region;
}

// Note that this gets the basic region index for obj. If the obj is in a large region,
// this region may not be the start of it.
heap_segment* gc_heap::region_of (uint8_t* obj)
{
    size_t index = (size_t)obj >> gc_heap::min_segment_size_shr;
    seg_mapping* entry = &seg_mapping_table[index];

    return (heap_segment*)entry;
}

heap_segment* gc_heap::get_region_at_index (size_t index)
{
    index += (size_t)g_gc_lowest_address >> gc_heap::min_segment_size_shr;
    return (heap_segment*)(&seg_mapping_table[index]);
}

// For debugging purposes to check that a region looks sane and
// do some logging. This was useful to sprinkle in various places
// where we were threading regions.
void gc_heap::check_seg_gen_num (heap_segment* seg)
{
#ifdef _DEBUG
    uint8_t* mem = heap_segment_mem (seg);

    if ((mem < g_gc_lowest_address) || (mem >= g_gc_highest_address))
    {
        GCToOSInterface::DebugBreak();
    }

    int alloc_seg_gen_num = get_region_gen_num (mem);
    int alloc_seg_plan_gen_num = get_region_plan_gen_num (mem);
    dprintf (3, ("seg %p->%p, num %d, %d",
        seg, mem, alloc_seg_gen_num, alloc_seg_plan_gen_num));
#endif //_DEBUG
}

int gc_heap::get_region_gen_num (heap_segment* region)
{
    return heap_segment_gen_num (region);
}

int gc_heap::get_region_gen_num (uint8_t* obj)
{
    size_t skewed_basic_region_index = get_skewed_basic_region_index_for_address (obj);
    int gen_num = map_region_to_generation_skewed[skewed_basic_region_index] & gc_heap::RI_GEN_MASK;
    assert ((soh_gen0 <= gen_num) && (gen_num <= soh_gen2));
    assert (gen_num == heap_segment_gen_num (region_of (obj)));
    return gen_num;
}

int gc_heap::get_region_plan_gen_num (uint8_t* obj)
{
    size_t skewed_basic_region_index = get_skewed_basic_region_index_for_address (obj);
    int plan_gen_num = map_region_to_generation_skewed[skewed_basic_region_index] >> gc_heap::RI_PLAN_GEN_SHR;
    assert ((soh_gen0 <= plan_gen_num) && (plan_gen_num <= soh_gen2));
    assert (plan_gen_num == heap_segment_plan_gen_num (region_of (obj)));
    return plan_gen_num;
}

bool gc_heap::is_region_demoted (uint8_t* obj)
{
    size_t skewed_basic_region_index = get_skewed_basic_region_index_for_address (obj);
    bool demoted_p = (map_region_to_generation_skewed[skewed_basic_region_index] & gc_heap::RI_DEMOTED) != 0;
    assert (demoted_p == heap_segment_demoted_p (region_of (obj)));
    return demoted_p;
}

static GCSpinLock write_barrier_spin_lock;

inline
void gc_heap::set_region_gen_num (heap_segment* region, int gen_num)
{
    assert (gen_num < (1 << (sizeof (uint8_t) * 8)));
    assert (gen_num >= 0);
    heap_segment_gen_num (region) = (uint8_t)gen_num;

    uint8_t* region_start = get_region_start (region);
    uint8_t* region_end = heap_segment_reserved (region);

    size_t region_index_start = get_basic_region_index_for_address (region_start);
    size_t region_index_end = get_basic_region_index_for_address (region_end);
    region_info entry = (region_info)((gen_num << RI_PLAN_GEN_SHR) | gen_num);
    for (size_t region_index = region_index_start; region_index < region_index_end; region_index++)
    {
        assert (gen_num <= max_generation);
        map_region_to_generation[region_index] = entry;
    }
    if (gen_num <= soh_gen1)
    {
        if ((region_start < ephemeral_low) || (ephemeral_high < region_end))
        {
            while (true)
            {
                if (Interlocked::CompareExchange(&write_barrier_spin_lock.lock, 0, -1) < 0)
                    break;

                if ((ephemeral_low <= region_start) && (region_end <= ephemeral_high))
                    return;

                while (write_barrier_spin_lock.lock >= 0)
                {
                    YieldProcessor();           // indicate to the processor that we are spinning
                }
            }
#ifdef _DEBUG
            write_barrier_spin_lock.holding_thread = GCToEEInterface::GetThread();
#endif //_DEBUG

            if ((region_start < ephemeral_low) || (ephemeral_high < region_end))
            {
                uint8_t* new_ephemeral_low = min (region_start, (uint8_t*)ephemeral_low);
                uint8_t* new_ephemeral_high = max (region_end, (uint8_t*)ephemeral_high);

                dprintf (REGIONS_LOG, ("about to set ephemeral_low = %p ephemeral_high = %p", new_ephemeral_low, new_ephemeral_high));

                stomp_write_barrier_ephemeral (new_ephemeral_low, new_ephemeral_high,
                                               map_region_to_generation_skewed, (uint8_t)min_segment_size_shr);

                // we should only *decrease* ephemeral_low and only *increase* ephemeral_high
                if (ephemeral_low < new_ephemeral_low)
                    GCToOSInterface::DebugBreak ();
                if (new_ephemeral_high < ephemeral_high)
                    GCToOSInterface::DebugBreak ();

                // only set the globals *after* we have updated the write barrier
                ephemeral_low = new_ephemeral_low;
                ephemeral_high = new_ephemeral_high;

                dprintf (REGIONS_LOG, ("set ephemeral_low = %p ephemeral_high = %p", new_ephemeral_low, new_ephemeral_high));
            }
            else
            {
                dprintf (REGIONS_LOG, ("leaving lock - no need to update ephemeral range [%p,%p[ for region [%p,%p]", (uint8_t*)ephemeral_low, (uint8_t*)ephemeral_high, region_start, region_end));
            }
#ifdef _DEBUG
            write_barrier_spin_lock.holding_thread = (Thread*)-1;
#endif //_DEBUG
            write_barrier_spin_lock.lock = -1;
        }
        else
        {
            dprintf (REGIONS_LOG, ("no need to update ephemeral range [%p,%p[ for region [%p,%p]", (uint8_t*)ephemeral_low, (uint8_t*)ephemeral_high, region_start, region_end));
        }
    }
}

inline
void gc_heap::set_region_plan_gen_num (heap_segment* region, int plan_gen_num, bool replace_p)
{
    int gen_num = heap_segment_gen_num (region);
    int supposed_plan_gen_num = get_plan_gen_num (gen_num);
    dprintf (REGIONS_LOG, ("h%d setting plan gen on %p->%p(was gen%d) to %d(should be: %d) %s",
        heap_number, region,
        heap_segment_mem (region),
        gen_num, plan_gen_num,
        supposed_plan_gen_num,
        ((plan_gen_num < supposed_plan_gen_num) ? "DEMOTED" : "ND")));
    region_info region_info_bits_to_set = (region_info)(plan_gen_num << RI_PLAN_GEN_SHR);
    if ((plan_gen_num < supposed_plan_gen_num) && (heap_segment_pinned_survived (region) != 0))
    {
        if (!settings.demotion)
        {
            settings.demotion = TRUE;
        }
        get_gc_data_per_heap()->set_mechanism_bit (gc_demotion_bit);
        region->flags |= heap_segment_flags_demoted;
        region_info_bits_to_set = (region_info)(region_info_bits_to_set | RI_DEMOTED);
    }
    else
    {
        region->flags &= ~heap_segment_flags_demoted;
    }

    // If replace_p is true, it means we need to move a region from its original planned gen to this new gen.
    if (replace_p)
    {
        int original_plan_gen_num = heap_segment_plan_gen_num (region);
        planned_regions_per_gen[original_plan_gen_num]--;
    }

    planned_regions_per_gen[plan_gen_num]++;
    dprintf (REGIONS_LOG, ("h%d g%d %zx(%zx) -> g%d (total %d region planned in g%d)",
        heap_number, heap_segment_gen_num (region), (size_t)region, heap_segment_mem (region), plan_gen_num, planned_regions_per_gen[plan_gen_num], plan_gen_num));

    heap_segment_plan_gen_num (region) = plan_gen_num;

    uint8_t* region_start = get_region_start (region);
    uint8_t* region_end = heap_segment_reserved (region);

    size_t region_index_start = get_basic_region_index_for_address (region_start);
    size_t region_index_end = get_basic_region_index_for_address (region_end);
    for (size_t region_index = region_index_start; region_index < region_index_end; region_index++)
    {
        assert (plan_gen_num <= max_generation);
        map_region_to_generation[region_index] = (region_info)(region_info_bits_to_set | (map_region_to_generation[region_index] & ~(RI_PLAN_GEN_MASK|RI_DEMOTED)));
    }
}

inline
void gc_heap::set_region_plan_gen_num_sip (heap_segment* region, int plan_gen_num)
{
    if (!heap_segment_swept_in_plan (region))
    {
        set_region_plan_gen_num (region, plan_gen_num);
    }
}

void gc_heap::set_region_sweep_in_plan (heap_segment*region)
{
    heap_segment_swept_in_plan (region) = true;

    // this should be a basic region
    assert (get_region_size (region) == global_region_allocator.get_region_alignment());

    uint8_t* region_start = get_region_start (region);
    size_t region_index = get_basic_region_index_for_address (region_start);
    map_region_to_generation[region_index] = (region_info)(map_region_to_generation[region_index] | RI_SIP);
}

void gc_heap::clear_region_sweep_in_plan (heap_segment*region)
{
    heap_segment_swept_in_plan (region) = false;

    // this should be a basic region
    assert (get_region_size (region) == global_region_allocator.get_region_alignment());

    uint8_t* region_start = get_region_start (region);
    size_t region_index = get_basic_region_index_for_address (region_start);
    map_region_to_generation[region_index] = (region_info)(map_region_to_generation[region_index] & ~RI_SIP);
}

void gc_heap::clear_region_demoted (heap_segment* region)
{
    region->flags &= ~heap_segment_flags_demoted;

    // this should be a basic region
    assert (get_region_size (region) == global_region_allocator.get_region_alignment());

    uint8_t* region_start = get_region_start (region);
    size_t region_index = get_basic_region_index_for_address (region_start);
    map_region_to_generation[region_index] = (region_info)(map_region_to_generation[region_index] & ~RI_DEMOTED);
}
#endif //USE_REGIONS

int gc_heap::get_plan_gen_num (int gen_number)
{
    return ((settings.promotion) ? min ((gen_number + 1), (int)max_generation) : gen_number);
}

uint8_t* gc_heap::get_uoh_start_object (heap_segment* region, generation* gen)
{
#ifdef USE_REGIONS
    uint8_t* o = heap_segment_mem (region);
#else
    uint8_t* o = generation_allocation_start (gen);
    assert(((CObjectHeader*)o)->IsFree());
    size_t s = Align (size (o), get_alignment_constant (FALSE));
    assert (s == AlignQword (min_obj_size));
    //Skip the generation gap object
    o += s;
#endif //USE_REGIONS
    return o;
}

uint8_t* gc_heap::get_soh_start_object (heap_segment* region, generation* gen)
{
#ifdef USE_REGIONS
    uint8_t* o             = heap_segment_mem (region);
#else
    uint8_t* o             = generation_allocation_start (gen);
#endif //USE_REGIONS
    return o;
}

size_t gc_heap::get_soh_start_obj_len (uint8_t* start_obj)
{
#ifdef USE_REGIONS
    return 0;
#else
    return Align (size (start_obj));
#endif //USE_REGIONS
}

void gc_heap::clear_gen1_cards()
{
#if defined(_DEBUG) && !defined(USE_REGIONS)
    for (int x = 0; x <= max_generation; x++)
    {
        assert (generation_allocation_start (generation_of (x)));
    }
#endif //_DEBUG && !USE_REGIONS

    if (!settings.demotion && settings.promotion)
    {
        //clear card for generation 1. generation 0 is empty
#ifdef USE_REGIONS
        heap_segment* region = generation_start_segment (generation_of (1));
        while (region)
        {
            clear_card_for_addresses (get_region_start (region), heap_segment_reserved (region));
            region = heap_segment_next (region);
        }
#else //USE_REGIONS
        clear_card_for_addresses (
            generation_allocation_start (generation_of (1)),
            generation_allocation_start (generation_of (0)));
#endif //USE_REGIONS

#ifdef _DEBUG
        uint8_t* start = get_soh_start_object (ephemeral_heap_segment, youngest_generation);
        assert (heap_segment_allocated (ephemeral_heap_segment) ==
                (start + get_soh_start_obj_len (start)));
#endif //_DEBUG
    }
}

heap_segment* gc_heap::make_heap_segment (uint8_t* new_pages, size_t size, gc_heap* hp, int gen_num)
{
    gc_oh_num oh = gen_to_oh (gen_num);
    size_t initial_commit = use_large_pages_p ? size : SEGMENT_INITIAL_COMMIT;
    int h_number =
#ifdef MULTIPLE_HEAPS
        hp->heap_number;
#else
        0;
#endif //MULTIPLE_HEAPS

    if (!virtual_commit (new_pages, initial_commit, oh, h_number))
    {
        log_init_error_to_host ("Committing %zd bytes for a region failed", initial_commit);
        return 0;
    }

#ifdef USE_REGIONS
    dprintf (REGIONS_LOG, ("Making region %p->%p(%zdmb)",
        new_pages, (new_pages + size), (size / 1024 / 1024)));
    heap_segment* new_segment = get_region_info (new_pages);
    uint8_t* start = new_pages + sizeof (aligned_plug_and_gap);
#else
    heap_segment* new_segment = (heap_segment*)new_pages;
    uint8_t* start = new_pages + segment_info_size;
#endif //USE_REGIONS
    heap_segment_mem (new_segment) = start;
    heap_segment_used (new_segment) = start;
    heap_segment_reserved (new_segment) = new_pages + size;
    heap_segment_committed (new_segment) = new_pages + initial_commit;

    init_heap_segment (new_segment, hp
#ifdef USE_REGIONS
                       , new_pages, size, gen_num
#endif //USE_REGIONS
                       );
    dprintf (2, ("Creating heap segment %zx", (size_t)new_segment));

    return new_segment;
}

void gc_heap::init_heap_segment (heap_segment* seg, gc_heap* hp
#ifdef USE_REGIONS
                                 , uint8_t* start, size_t size, int gen_num, bool existing_region_p
#endif //USE_REGIONS
    )
{
#ifndef USE_REGIONS
    bool existing_region_p = false;
#endif //!USE_REGIONS
#ifdef BACKGROUND_GC
    seg->flags = existing_region_p ? (seg->flags & heap_segment_flags_ma_committed) : 0;
#else
    seg->flags = 0;
#endif
    heap_segment_next (seg) = 0;
    heap_segment_plan_allocated (seg) = heap_segment_mem (seg);
    heap_segment_allocated (seg) = heap_segment_mem (seg);
    heap_segment_saved_allocated (seg) = heap_segment_mem (seg);
#if !defined(USE_REGIONS) || defined(MULTIPLE_HEAPS)
    heap_segment_decommit_target (seg) = heap_segment_reserved (seg);
#endif //!USE_REGIONS || MULTIPLE_HEAPS
#ifdef BACKGROUND_GC
    heap_segment_background_allocated (seg) = 0;
    heap_segment_saved_bg_allocated (seg) = 0;
#endif //BACKGROUND_GC

#ifdef MULTIPLE_HEAPS
    heap_segment_heap (seg) = hp;
#endif //MULTIPLE_HEAPS

#ifdef USE_REGIONS
    int gen_num_for_region = min (gen_num, (int)max_generation);
    set_region_gen_num (seg, gen_num_for_region);
    heap_segment_plan_gen_num (seg) = gen_num_for_region;
    heap_segment_swept_in_plan (seg) = false;
#endif //USE_REGIONS

#ifdef USE_REGIONS
    int num_basic_regions = (int)(size >> min_segment_size_shr);
    size_t basic_region_size = (size_t)1 << min_segment_size_shr;
    dprintf (REGIONS_LOG, ("this region contains %d basic regions", num_basic_regions));
    if (num_basic_regions > 1)
    {
        for (int i = 1; i < num_basic_regions; i++)
        {
            uint8_t* basic_region_start = start + (i * basic_region_size);
            heap_segment* basic_region = get_region_info (basic_region_start);
            heap_segment_allocated (basic_region) = (uint8_t*)(ptrdiff_t)-i;
            dprintf (REGIONS_LOG, ("Initing basic region %p->%p(%zdmb) alloc to %p",
                basic_region_start, (basic_region_start + basic_region_size),
                (size_t)(basic_region_size / 1024 / 1024),
                heap_segment_allocated (basic_region)));

            heap_segment_gen_num (basic_region) = (uint8_t)gen_num_for_region;
            heap_segment_plan_gen_num (basic_region) = gen_num_for_region;

#ifdef MULTIPLE_HEAPS
            heap_segment_heap (basic_region) = hp;
#endif //MULTIPLE_HEAPS
        }
    }
#endif //USE_REGIONS
}

//Releases the segment to the OS.
// this is always called on one thread only so calling seg_table->remove is fine.
void gc_heap::delete_heap_segment (heap_segment* seg, BOOL consider_hoarding)
{
    if (!heap_segment_uoh_p (seg))
    {
        //cleanup the brick table back to the empty value
        clear_brick_table (heap_segment_mem (seg), heap_segment_reserved (seg));
    }

#ifdef USE_REGIONS
    return_free_region (seg);
#else // USE_REGIONS
    if (consider_hoarding)
    {
        assert ((heap_segment_mem (seg) - (uint8_t*)seg) <= ptrdiff_t(2*OS_PAGE_SIZE));
        size_t ss = (size_t) (heap_segment_reserved (seg) - (uint8_t*)seg);
        //Don't keep the big ones.
        if (ss <= INITIAL_ALLOC)
        {
            dprintf (2, ("Hoarding segment %zx", (size_t)seg));
#ifdef BACKGROUND_GC
            // We don't need to clear the decommitted flag because when this segment is used
            // for a new segment the flags will be cleared.
            if (!heap_segment_decommitted_p (seg))
#endif //BACKGROUND_GC
            {
                decommit_heap_segment (seg);
            }

            seg_mapping_table_remove_segment (seg);

            heap_segment_next (seg) = segment_standby_list;
            segment_standby_list = seg;
            seg = 0;
        }
    }

    if (seg != 0)
    {
        dprintf (2, ("h%d: del seg: [%zx, %zx[",
                     heap_number, (size_t)seg,
                     (size_t)(heap_segment_reserved (seg))));

#ifdef BACKGROUND_GC
        ::record_changed_seg ((uint8_t*)seg, heap_segment_reserved (seg),
                            settings.gc_index, current_bgc_state,
                            seg_deleted);
        bgc_verify_mark_array_cleared (seg);

        decommit_mark_array_by_seg (seg);
#endif //BACKGROUND_GC

        seg_mapping_table_remove_segment (seg);
        release_segment (seg);
    }
#endif //USE_REGIONS
}

//resets the pages beyond allocates size so they won't be swapped out and back in

void gc_heap::reset_heap_segment_pages (heap_segment* seg)
{
    size_t page_start = align_on_page ((size_t)heap_segment_allocated (seg));
    size_t size = (size_t)heap_segment_committed (seg) - page_start;
    if (size != 0)
        GCToOSInterface::VirtualReset((void*)page_start, size, false /* unlock */);
}

void gc_heap::decommit_heap_segment_pages (heap_segment* seg,
                                           size_t extra_space)
{
    if (use_large_pages_p)
        return;

    uint8_t*  page_start = align_on_page (heap_segment_allocated(seg));
    assert (heap_segment_committed (seg) >= page_start);

    size_t size = heap_segment_committed (seg) - page_start;
    extra_space = align_on_page (extra_space);
    if (size >= max ((extra_space + 2*OS_PAGE_SIZE), MIN_DECOMMIT_SIZE))
    {
        page_start += max(extra_space, 32*OS_PAGE_SIZE);
        decommit_heap_segment_pages_worker (seg, page_start);
    }
}

size_t gc_heap::decommit_heap_segment_pages_worker (heap_segment* seg,
                                                    uint8_t* new_committed)
{
    assert (!use_large_pages_p);
    uint8_t* page_start = align_on_page (new_committed);
    ptrdiff_t size = heap_segment_committed (seg) - page_start;
    if (size > 0)
    {
        bool decommit_succeeded_p = virtual_decommit (page_start, (size_t)size, heap_segment_oh (seg), heap_number);
        if (decommit_succeeded_p)
        {
            dprintf (3, ("Decommitting heap segment [%zx, %zx[(%zd)",
                (size_t)page_start,
                (size_t)(page_start + size),
                size));
            heap_segment_committed (seg) = page_start;
            if (heap_segment_used (seg) > heap_segment_committed (seg))
            {
                heap_segment_used (seg) = heap_segment_committed (seg);
            }
        }
        else
        {
            dprintf (3, ("Decommitting heap segment failed"));
        }
    }
    return size;
}

//decommit all pages except one or 2
void gc_heap::decommit_heap_segment (heap_segment* seg)
{
#ifdef USE_REGIONS
    if (!dt_high_memory_load_p())
    {
        return;
    }
#endif

    uint8_t*  page_start = align_on_page (heap_segment_mem (seg));

    dprintf (3, ("Decommitting heap segment %zx(%p)", (size_t)seg, heap_segment_mem (seg)));

#if defined(BACKGROUND_GC) && !defined(USE_REGIONS)
    page_start += OS_PAGE_SIZE;
#endif //BACKGROUND_GC && !USE_REGIONS

    assert (heap_segment_committed (seg) >= page_start);
    size_t size = heap_segment_committed (seg) - page_start;
    bool decommit_succeeded_p = virtual_decommit (page_start, size, heap_segment_oh (seg), heap_number);

    if (decommit_succeeded_p)
    {
        //re-init the segment object
        heap_segment_committed (seg) = page_start;
        if (heap_segment_used (seg) > heap_segment_committed (seg))
        {
            heap_segment_used (seg) = heap_segment_committed (seg);
        }
    }
}

void gc_heap::clear_gen0_bricks()
{
    if (!gen0_bricks_cleared)
    {
        gen0_bricks_cleared = TRUE;
        //initialize brick table for gen 0
#ifdef USE_REGIONS
        heap_segment* gen0_region = generation_start_segment (generation_of (0));
        while (gen0_region)
        {
            uint8_t* clear_start = heap_segment_mem (gen0_region);
#else
        heap_segment* gen0_region = ephemeral_heap_segment;
        uint8_t* clear_start = generation_allocation_start (generation_of (0));
        {
#endif //USE_REGIONS
            for (size_t b = brick_of (clear_start);
                    b < brick_of (align_on_brick
                                (heap_segment_allocated (gen0_region)));
                    b++)
            {
                set_brick (b, -1);
            }

#ifdef USE_REGIONS
            gen0_region = heap_segment_next (gen0_region);
#endif //USE_REGIONS
        }
    }
}

void gc_heap::check_gen0_bricks()
{
//#ifdef _DEBUG
    if (gen0_bricks_cleared)
    {
#ifdef USE_REGIONS
        heap_segment* gen0_region = generation_start_segment (generation_of (0));
        while (gen0_region)
        {
            uint8_t* start = heap_segment_mem (gen0_region);
#else
        heap_segment* gen0_region = ephemeral_heap_segment;
        uint8_t* start = generation_allocation_start (generation_of (0));
        {
#endif //USE_REGIONS
            size_t end_b = brick_of (heap_segment_allocated (gen0_region));
            for (size_t b = brick_of (start); b < end_b; b++)
            {
                assert (brick_table[b] != 0);
                if (brick_table[b] == 0)
                {
                    GCToOSInterface::DebugBreak();
                }
            }

#ifdef USE_REGIONS
            gen0_region = heap_segment_next (gen0_region);
#endif //USE_REGIONS
        }
    }
//#endif //_DEBUG
}

#ifdef BACKGROUND_GC
void gc_heap::rearrange_small_heap_segments()
{
    heap_segment* seg = freeable_soh_segment;
    while (seg)
    {
        heap_segment* next_seg = heap_segment_next (seg);
        // TODO: we need to consider hoarding here.
        delete_heap_segment (seg, FALSE);
        seg = next_seg;
    }
    freeable_soh_segment = 0;
}
#endif //BACKGROUND_GC

void gc_heap::rearrange_uoh_segments()
{
    dprintf (2, ("deleting empty large segments"));
    heap_segment* seg = freeable_uoh_segment;
    while (seg)
    {
        heap_segment* next_seg = heap_segment_next (seg);
        delete_heap_segment (seg, GCConfig::GetRetainVM());
        seg = next_seg;
    }
    freeable_uoh_segment = 0;
}

void gc_heap::delay_free_segments()
{
    rearrange_uoh_segments();
#ifdef BACKGROUND_GC
    background_delay_delete_uoh_segments();
    if (!gc_heap::background_running_p())
        rearrange_small_heap_segments();
#endif //BACKGROUND_GC
}

#ifndef USE_REGIONS
void gc_heap::rearrange_heap_segments(BOOL compacting)
{
    heap_segment* seg =
        generation_start_segment (generation_of (max_generation));

    heap_segment* prev_seg = 0;
    heap_segment* next_seg = 0;
    while (seg)
    {
        next_seg = heap_segment_next (seg);

        //link ephemeral segment when expanding
        if ((next_seg == 0) && (seg != ephemeral_heap_segment))
        {
            seg->next = ephemeral_heap_segment;
            next_seg = heap_segment_next (seg);
        }

        //re-used expanded heap segment
        if ((seg == ephemeral_heap_segment) && next_seg)
        {
            heap_segment_next (prev_seg) = next_seg;
            heap_segment_next (seg) = 0;
        }
        else
        {
            uint8_t* end_segment = (compacting ?
                                 heap_segment_plan_allocated (seg) :
                                 heap_segment_allocated (seg));
            // check if the segment was reached by allocation
            if ((end_segment == heap_segment_mem (seg))&&
                !heap_segment_read_only_p (seg))
            {
                //if not, unthread and delete
                assert (prev_seg);
                assert (seg != ephemeral_heap_segment);
                heap_segment_next (prev_seg) = next_seg;
                delete_heap_segment (seg, GCConfig::GetRetainVM());

                dprintf (2, ("Deleting heap segment %zx", (size_t)seg));
            }
            else
            {
                if (!heap_segment_read_only_p (seg))
                {
                    if (compacting)
                    {
                        heap_segment_allocated (seg) =
                            heap_segment_plan_allocated (seg);
                    }

                    // reset the pages between allocated and committed.
                    if (seg != ephemeral_heap_segment)
                    {
                        decommit_heap_segment_pages (seg, 0);
                    }
                }
                prev_seg = seg;
            }
        }

        seg = next_seg;
    }
}
#endif //!USE_REGIONS

#if defined(USE_REGIONS)
// trim down the list of regions pointed at by src down to target_count, moving the extra ones to dest
static void trim_region_list (region_free_list* dest, region_free_list* src, size_t target_count)
{
    while (src->get_num_free_regions() > target_count)
    {
        heap_segment* region = src->unlink_region_front();
        dest->add_region_front (region);
    }
}

// add regions from src to dest, trying to grow the size of dest to target_count
static int64_t grow_region_list (region_free_list* dest, region_free_list* src, size_t target_count)
{
    int64_t added_count = 0;
    while (dest->get_num_free_regions() < target_count)
    {
        if (src->get_num_free_regions() == 0)
            break;

        added_count++;

        heap_segment* region = src->unlink_region_front();
        dest->add_region_front (region);
    }
    return added_count;
}

region_free_list::region_free_list() : num_free_regions (0),
                                       size_free_regions (0),
                                       size_committed_in_free_regions (0),
                                       num_free_regions_added (0),
                                       num_free_regions_removed (0),
                                       head_free_region (nullptr),
                                       tail_free_region (nullptr)
{
}

void region_free_list::verify (bool empty_p)
{
#ifdef _DEBUG
    assert ((num_free_regions == 0) == empty_p);
    assert ((size_free_regions == 0) == empty_p);
    assert ((size_committed_in_free_regions == 0) == empty_p);
    assert ((head_free_region == nullptr) == empty_p);
    assert ((tail_free_region == nullptr) == empty_p);
    assert (num_free_regions == (num_free_regions_added - num_free_regions_removed));

    if (!empty_p)
    {
        assert (heap_segment_next (tail_free_region) == nullptr);
        assert (heap_segment_prev_free_region (head_free_region) == nullptr);

        size_t actual_count = 0;
        heap_segment* last_region = nullptr;
        for (heap_segment* region = head_free_region; region != nullptr; region = heap_segment_next(region))
        {
            last_region = region;
            actual_count++;
        }
        assert (num_free_regions == actual_count);
        assert (last_region == tail_free_region);
        heap_segment* first_region = nullptr;
        for (heap_segment* region = tail_free_region; region != nullptr; region = heap_segment_prev_free_region(region))
        {
            first_region = region;
            actual_count--;
        }
        assert (actual_count == 0);
        assert (head_free_region == first_region);
    }
#endif
}

void region_free_list::reset()
{
    num_free_regions = 0;
    size_free_regions = 0;
    size_committed_in_free_regions = 0;

    head_free_region = nullptr;
    tail_free_region = nullptr;
}

inline
void region_free_list::update_added_region_info (heap_segment* region)
{
    num_free_regions++;
    num_free_regions_added++;

    size_t region_size = get_region_size (region);
    size_free_regions += region_size;

    size_t region_committed_size = get_region_committed_size (region);
    size_committed_in_free_regions += region_committed_size;

    verify (false);
}

void region_free_list::add_region_front (heap_segment* region)
{
    assert (heap_segment_containing_free_list (region) == nullptr);
    heap_segment_containing_free_list(region) = this;
    if (head_free_region != nullptr)
    {
        heap_segment_prev_free_region(head_free_region) = region;
        assert (tail_free_region != nullptr);
    }
    else
    {
        tail_free_region = region;
    }
    heap_segment_next (region) = head_free_region;
    head_free_region = region;
    heap_segment_prev_free_region (region) = nullptr;

    update_added_region_info (region);
}

// This inserts fully committed regions at the head, otherwise it goes backward in the list till
// we find a region whose committed size is >= this region's committed or we reach the head.
void region_free_list::add_region_in_descending_order (heap_segment* region_to_add)
{
    assert (heap_segment_containing_free_list (region_to_add) == nullptr);
    heap_segment_containing_free_list (region_to_add) = this;
    heap_segment_age_in_free (region_to_add) = 0;
    heap_segment* prev_region = nullptr;
    heap_segment* region = nullptr;

    // if the region is fully committed, it's inserted at the front
    if (heap_segment_committed (region_to_add) == heap_segment_reserved (region_to_add))
    {
        region = head_free_region;
    }
    else
    {
        // otherwise we search backwards for a good insertion spot
        // most regions at the front are fully committed and thus boring to search

        size_t region_to_add_committed = get_region_committed_size (region_to_add);

        for (prev_region = tail_free_region; prev_region != nullptr; prev_region = heap_segment_prev_free_region (prev_region))
        {
            size_t prev_region_committed = get_region_committed_size (prev_region);

            if (prev_region_committed >= region_to_add_committed)
            {
                break;
            }
            region = prev_region;
        }
    }

    if (prev_region != nullptr)
    {
        heap_segment_next (prev_region) = region_to_add;
    }
    else
    {
        assert (region == head_free_region);
        head_free_region = region_to_add;
    }

    heap_segment_prev_free_region (region_to_add) = prev_region;
    heap_segment_next (region_to_add) = region;

    if (region != nullptr)
    {
        heap_segment_prev_free_region (region) = region_to_add;
    }
    else
    {
        assert (prev_region == tail_free_region);
        tail_free_region = region_to_add;
    }

    update_added_region_info (region_to_add);
}

heap_segment* region_free_list::unlink_region_front()
{
    heap_segment* region = head_free_region;
    if (region != nullptr)
    {
        assert (heap_segment_containing_free_list (region) == this);
        unlink_region (region);
    }
    return region;
}

void region_free_list::unlink_region (heap_segment* region)
{
    region_free_list* rfl = heap_segment_containing_free_list (region);
    rfl->verify (false);

    heap_segment* prev = heap_segment_prev_free_region (region);
    heap_segment* next = heap_segment_next (region);

    if (prev != nullptr)
    {
        assert (region != rfl->head_free_region);
        assert (heap_segment_next (prev) == region);
        heap_segment_next (prev) = next;
    }
    else
    {
        assert (region == rfl->head_free_region);
        rfl->head_free_region = next;
    }

    if (next != nullptr)
    {
        assert (region != rfl->tail_free_region);
        assert (heap_segment_prev_free_region (next) == region);
        heap_segment_prev_free_region (next) = prev;
    }
    else
    {
        assert (region == rfl->tail_free_region);
        rfl->tail_free_region = prev;
    }
    heap_segment_containing_free_list (region) = nullptr;

    rfl->num_free_regions--;
    rfl->num_free_regions_removed++;

    size_t region_size = get_region_size (region);
    assert (rfl->size_free_regions >= region_size);
    rfl->size_free_regions -= region_size;

    size_t region_committed_size = get_region_committed_size (region);
    assert (rfl->size_committed_in_free_regions >= region_committed_size);
    rfl->size_committed_in_free_regions -= region_committed_size;
}

free_region_kind region_free_list::get_region_kind (heap_segment* region)
{
    const size_t BASIC_REGION_SIZE = global_region_allocator.get_region_alignment();
    const size_t LARGE_REGION_SIZE = global_region_allocator.get_large_region_alignment();
    size_t region_size = get_region_size (region);

    if (region_size == BASIC_REGION_SIZE)
        return basic_free_region;
    else if (region_size == LARGE_REGION_SIZE)
        return large_free_region;
    else
    {
        assert(region_size > LARGE_REGION_SIZE);
        return huge_free_region;
    }
}

heap_segment* region_free_list::unlink_smallest_region (size_t minimum_size)
{
    verify (num_free_regions == 0);

    // look for the smallest region that is large enough
    heap_segment* smallest_region = nullptr;
    size_t smallest_size = (size_t)-1;
    for (heap_segment* region = head_free_region; region != nullptr; region = heap_segment_next (region))
    {
        uint8_t* region_start = get_region_start(region);
        uint8_t* region_end = heap_segment_reserved(region);

        size_t region_size = get_region_size (region);
        const size_t LARGE_REGION_SIZE = global_region_allocator.get_large_region_alignment();
        assert (region_size >= LARGE_REGION_SIZE * 2);
        if (region_size >= minimum_size)
        {
            // found a region that is large enough - see if it's smaller than the smallest so far
            if (smallest_size > region_size)
            {
                smallest_size = region_size;
                smallest_region = region;
            }
            // is the region's size equal to the minimum on this list?
            if (region_size == LARGE_REGION_SIZE * 2)
            {
                // we won't find a smaller one on this list
                assert (region == smallest_region);
                break;
            }
        }
    }

    if (smallest_region != nullptr)
    {
        unlink_region (smallest_region);
        dprintf(REGIONS_LOG, ("get %p-%p-%p",
            heap_segment_mem(smallest_region), heap_segment_committed(smallest_region), heap_segment_used(smallest_region)));
    }

    return smallest_region;
}

void region_free_list::transfer_regions (region_free_list* from)
{
    this->verify (this->num_free_regions == 0);
    from->verify (from->num_free_regions == 0);

    if (from->num_free_regions == 0)
    {
        // the from list is empty
        return;
    }

    if (num_free_regions == 0)
    {
        // this list is empty
        head_free_region = from->head_free_region;
        tail_free_region = from->tail_free_region;
    }
    else
    {
        // both free lists are non-empty
        // attach the from list at the tail
        heap_segment* this_tail = tail_free_region;
        heap_segment* from_head = from->head_free_region;

        heap_segment_next (this_tail) = from_head;
        heap_segment_prev_free_region (from_head) = this_tail;

        tail_free_region = from->tail_free_region;

    }

    for (heap_segment* region = from->head_free_region; region != nullptr; region = heap_segment_next (region))
    {
        heap_segment_containing_free_list (region) = this;
    }

    num_free_regions += from->num_free_regions;
    num_free_regions_added += from->num_free_regions;
    size_free_regions += from->size_free_regions;
    size_committed_in_free_regions += from->size_committed_in_free_regions;

    from->num_free_regions_removed += from->num_free_regions;
    from->reset();

    verify (false);
}

size_t region_free_list::get_num_free_regions()
{
#ifdef _DEBUG
    verify (num_free_regions == 0);
#endif //_DEBUG
    return num_free_regions;
}

void region_free_list::add_region (heap_segment* region, region_free_list to_free_list[count_free_region_kinds])
{
    free_region_kind kind = get_region_kind (region);
    to_free_list[kind].add_region_front (region);
}

void region_free_list::add_region_descending (heap_segment* region, region_free_list to_free_list[count_free_region_kinds])
{
    free_region_kind kind = get_region_kind (region);
    to_free_list[kind].add_region_in_descending_order (region);
}

bool region_free_list::is_on_free_list (heap_segment* region, region_free_list free_list[count_free_region_kinds])
{
    region_free_list* rfl = heap_segment_containing_free_list (region);
    free_region_kind kind = get_region_kind (region);
    return rfl == &free_list[kind];
}

void region_free_list::age_free_regions()
{
    for (heap_segment* region = head_free_region; region != nullptr; region = heap_segment_next (region))
    {
        // only age to 99... that's enough for us to decommit this.
        if (heap_segment_age_in_free (region) < MAX_AGE_IN_FREE)
            heap_segment_age_in_free (region)++;
    }
}

void region_free_list::age_free_regions (region_free_list free_lists[count_free_region_kinds])
{
    for (int kind = basic_free_region; kind < count_free_region_kinds; kind++)
    {
        free_lists[kind].age_free_regions();
    }
}

void region_free_list::print (int hn, const char* msg, int* ages)
{
    dprintf (3, ("h%2d PRINTING-------------------------------", hn));
    for (heap_segment* region = head_free_region; region != nullptr; region = heap_segment_next (region))
    {
        if (ages)
        {
            ages[heap_segment_age_in_free (region)]++;
        }

        dprintf (3, ("[%s] h%2d age %d region %p (%zd)%s",
            msg, hn, (int)heap_segment_age_in_free (region),
            heap_segment_mem (region), get_region_committed_size (region),
            ((heap_segment_committed (region) == heap_segment_reserved (region)) ? "(FC)" : "")));
    }
    dprintf (3, ("h%2d PRINTING END-------------------------------", hn));
}

void region_free_list::print (region_free_list free_lists[count_free_region_kinds], int hn, const char* msg, int* ages)
{
    for (int kind = basic_free_region; kind < count_free_region_kinds; kind++)
    {
        free_lists[kind].print (hn, msg, ages);
    }
}

static int compare_by_committed_and_age (heap_segment* l, heap_segment* r)
{
    size_t l_committed = get_region_committed_size (l);
    size_t r_committed = get_region_committed_size (r);
    if (l_committed > r_committed)
        return -1;
    else if (l_committed < r_committed)
        return 1;
    int l_age = heap_segment_age_in_free (l);
    int r_age = heap_segment_age_in_free (r);
    return (l_age - r_age);
}

static heap_segment* merge_sort_by_committed_and_age (heap_segment *head, size_t count)
{
    if (count <= 1)
        return head;
    size_t half = count / 2;
    heap_segment* mid = nullptr;
    size_t i = 0;
    for (heap_segment *region = head; region != nullptr; region = heap_segment_next (region))
    {
        i++;
        if (i == half)
        {
            mid = heap_segment_next (region);
            heap_segment_next (region) = nullptr;
            break;
        }
    }
    head = merge_sort_by_committed_and_age (head, half);
    mid = merge_sort_by_committed_and_age (mid, count - half);

    heap_segment* new_head;
    if (compare_by_committed_and_age (head, mid) <= 0)
    {
        new_head = head;
        head = heap_segment_next (head);
    }
    else
    {
        new_head = mid;
        mid = heap_segment_next (mid);
    }
    heap_segment* new_tail = new_head;
    while ((head != nullptr) && (mid != nullptr))
    {
        heap_segment* region = nullptr;
        if (compare_by_committed_and_age (head, mid) <= 0)
        {
            region = head;
            head = heap_segment_next (head);
        }
        else
        {
            region = mid;
            mid = heap_segment_next (mid);
        }

        heap_segment_next (new_tail) = region;
        new_tail = region;
    }

    if (head != nullptr)
    {
        assert (mid == nullptr);
        heap_segment_next (new_tail) = head;
    }
    else
    {
        heap_segment_next (new_tail) = mid;
    }
    return new_head;
}

void region_free_list::sort_by_committed_and_age()
{
    if (num_free_regions <= 1)
        return;
    heap_segment* new_head = merge_sort_by_committed_and_age (head_free_region, num_free_regions);

    // need to set head, tail, and all the prev links again
    head_free_region = new_head;
    heap_segment* prev = nullptr;
    for (heap_segment* region = new_head; region != nullptr; region = heap_segment_next (region))
    {
        heap_segment_prev_free_region (region) = prev;
        assert ((prev == nullptr) || (compare_by_committed_and_age (prev, region) <= 0));
        prev = region;
    }
    tail_free_region = prev;
}

void gc_heap::age_free_regions (const char* msg)
{
    // If we are doing an ephemeral GC as a precursor to a BGC, then we will age all of the region
    // kinds during the ephemeral GC and skip the call to age_free_regions during the BGC itself.
    bool age_all_region_kinds = (settings.condemned_generation == max_generation);

    if (!age_all_region_kinds)
    {
#ifdef MULTIPLE_HEAPS
        gc_heap* hp = g_heaps[0];
#else //MULTIPLE_HEAPS
        gc_heap* hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS
        age_all_region_kinds = (hp->current_bgc_state == bgc_initialized);
    }

    if (age_all_region_kinds)
    {
        global_free_huge_regions.age_free_regions();
    }

#ifdef MULTIPLE_HEAPS
    for (int i = 0; i < n_heaps; i++)
    {
        gc_heap* hp = g_heaps[i];
#else //MULTIPLE_HEAPS
    {
        gc_heap* hp = pGenGCHeap;
        const int i = 0;
#endif //MULTIPLE_HEAPS

        if (age_all_region_kinds)
        {
            // age and print all kinds of free regions
            region_free_list::age_free_regions (hp->free_regions);
            region_free_list::print (hp->free_regions, i, msg);
        }
        else
        {
            // age and print only basic free regions
            hp->free_regions[basic_free_region].age_free_regions();
            hp->free_regions[basic_free_region].print (i, msg);
        }
    }
}

// distribute_free_regions is called during all blocking GCs and in the start of the BGC mark phase
// unless we already called it during an ephemeral GC right before the BGC.
//
// Free regions are stored on the following permanent lists:
// - global_regions_to_decommit
// - global_free_huge_regions
// - (per-heap) free_regions
// and the following lists that are local to distribute_free_regions:
// - aged_regions
// - surplus_regions
//
// For reason_induced_aggressive GCs, we decommit all regions.  Therefore, the below description is
// for other GC types.
//
// distribute_free_regions steps:
//
// 1. Process region ages
//    a. Move all huge regions from free_regions to global_free_huge_regions.
//       (The intention is that free_regions shouldn't contain any huge regions outside of the period
//       where a GC reclaims them and distribute_free_regions moves them to global_free_huge_regions,
//       though perhaps BGC can leave them there.  Future work could verify and assert this.)
//    b. Move any basic region in global_regions_to_decommit (which means we intended to decommit them
//       but haven't done so yet) to surplus_regions
//    c. Move all huge regions that are past the age threshold from global_free_huge_regions to aged_regions
//    d. Move all basic/large regions that are past the age threshold from free_regions to aged_regions
// 2. Move all regions from aged_regions to global_regions_to_decommit.  Note that the intention is to
//    combine this with move_highest_free_regions in a future change, which is why we don't just do this
//    in steps 1c/1d.
// 3. Compute the required per-heap budgets for SOH (basic regions) and the balance.  The budget for LOH
//    (large) is zero as we are using an entirely age-based approach.
//        balance = (number of free regions) - budget
// 4. Decide if we are going to distribute or decommit a nonzero balance.  To distribute, we adjust the
//    per-heap budgets, so after this step the LOH (large) budgets can be positive.
//    a. A negative balance (deficit) for SOH (basic) will be distributed it means we expect to use
//       more memory than we have on the free lists.  A negative balance for LOH (large) isn't possible
//       for LOH since the budgets start at zero.
//    b. For SOH (basic), we will decommit surplus regions unless we are in a foreground GC during BGC.
//    c. For LOH (large), we will distribute surplus regions since we are using an entirely age-based
//       approach.  However, if we are in a high-memory-usage scenario, we will decommit.  In this case,
//       we will also decommit the huge regions in global_free_huge_regions.  Note that they were not
//       originally included in the balance because they are kept in a global list.  Only basic/large
//       regions are kept in per-heap lists where they can be distributed.
// 5. Implement the distribute-or-decommit strategy.  To distribute, we simply move regions across heaps,
//    using surplus_regions as a holding space.  To decommit, for server GC we generally leave them on the
//    global_regions_to_decommit list and decommit them over time.  However, in high-memory-usage scenarios,
//    we will immediately decommit some or all of these regions.  For workstation GC, we decommit a limited
//    amount and move the rest back to the (one) heap's free_list.
void gc_heap::distribute_free_regions()
{
#ifdef MULTIPLE_HEAPS
    BOOL joined_last_gc_before_oom = FALSE;
    for (int i = 0; i < n_heaps; i++)
    {
        if (g_heaps[i]->last_gc_before_oom)
        {
            joined_last_gc_before_oom = TRUE;
            break;
        }
    }
#else
    BOOL joined_last_gc_before_oom = last_gc_before_oom;
#endif //MULTIPLE_HEAPS
    if (settings.reason == reason_induced_aggressive)
    {
        global_regions_to_decommit[huge_free_region].transfer_regions (&global_free_huge_regions);

#ifdef MULTIPLE_HEAPS
        for (int i = 0; i < n_heaps; i++)
        {
            gc_heap* hp = g_heaps[i];
#else //MULTIPLE_HEAPS
        {
            gc_heap* hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS
            for (int kind = basic_free_region; kind < count_free_region_kinds; kind++)
            {
                global_regions_to_decommit[kind].transfer_regions (&hp->free_regions[kind]);
            }
        }
        while (decommit_step(DECOMMIT_TIME_STEP_MILLISECONDS))
        {
        }
#ifdef MULTIPLE_HEAPS
        for (int i = 0; i < n_heaps; i++)
        {
            gc_heap* hp = g_heaps[i];
            int hn = i;
#else //MULTIPLE_HEAPS
        {
            gc_heap* hp = pGenGCHeap;
            int hn  = 0;
#endif //MULTIPLE_HEAPS
            for (int i = 0; i < total_generation_count; i++)
            {
                generation* generation = hp->generation_of (i);
                heap_segment* region = heap_segment_rw (generation_start_segment (generation));
                while (region != nullptr)
                {
                    uint8_t* aligned_allocated = align_on_page (heap_segment_allocated (region));
                    size_t end_space = heap_segment_committed (region) - aligned_allocated;
                    if (end_space > 0)
                    {
                        virtual_decommit (aligned_allocated, end_space, gen_to_oh (i), hn);
                        heap_segment_committed (region) = aligned_allocated;
                        heap_segment_used (region) = min (heap_segment_used (region), heap_segment_committed (region));
                        assert (heap_segment_committed (region) > heap_segment_mem (region));
                    }
                    region = heap_segment_next_rw (region);
                }
            }
        }

        return;
    }

    // first step: accumulate the number of free regions and the budget over all heaps
    //
    // The initial budget will only be calculated for basic free regions.  For large regions, the initial budget
    // is zero, and distribute-vs-decommit will be determined entirely by region ages and whether we are in a
    // high memory usage scenario.  Distributing a surplus/deficit of regions can change the budgets that are used.
    size_t total_num_free_regions[count_distributed_free_region_kinds] = { 0, 0 };
    size_t total_budget_in_region_units[count_distributed_free_region_kinds] = { 0, 0 };

    size_t heap_budget_in_region_units[count_distributed_free_region_kinds][MAX_SUPPORTED_CPUS] = {};
    size_t min_heap_budget_in_region_units[count_distributed_free_region_kinds][MAX_SUPPORTED_CPUS] = {};
    region_free_list aged_regions[count_free_region_kinds];
    region_free_list surplus_regions[count_distributed_free_region_kinds];

    // we may still have regions left on the regions_to_decommit list -
    // use these to fill the budget as well
    surplus_regions[basic_free_region].transfer_regions (&global_regions_to_decommit[basic_free_region]);

#ifdef MULTIPLE_HEAPS
    for (int i = 0; i < n_heaps; i++)
    {
        gc_heap* hp = g_heaps[i];
#else //MULTIPLE_HEAPS
    {
        gc_heap* hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS

        global_free_huge_regions.transfer_regions (&hp->free_regions[huge_free_region]);
    }

    move_all_aged_regions(total_num_free_regions, aged_regions, joined_last_gc_before_oom);
    // For now, we just decommit right away, but eventually these will be used in move_highest_free_regions
    move_regions_to_decommit(aged_regions);

    size_t total_basic_free_regions = total_num_free_regions[basic_free_region] + surplus_regions[basic_free_region].get_num_free_regions();
    total_budget_in_region_units[basic_free_region] = compute_basic_region_budgets(heap_budget_in_region_units[basic_free_region], min_heap_budget_in_region_units[basic_free_region], total_basic_free_regions);
    
    bool aggressive_decommit_large_p = joined_last_gc_before_oom || dt_high_memory_load_p() || near_heap_hard_limit_p();

    int region_factor[count_distributed_free_region_kinds] = { 1, LARGE_REGION_FACTOR };

#ifndef MULTIPLE_HEAPS
    // just to reduce the number of #ifdefs in the code below
    const int n_heaps = 1;
#endif //!MULTIPLE_HEAPS

    for (int kind = basic_free_region; kind < count_distributed_free_region_kinds; kind++)
    {
        dprintf(REGIONS_LOG, ("%zd %s free regions, %zd regions budget, %zd regions on surplus list",
            total_num_free_regions[kind],
            free_region_kind_name[kind],
            total_budget_in_region_units[kind],
            surplus_regions[kind].get_num_free_regions()));

        // check if the free regions exceed the budget
        // if so, put the highest free regions on the decommit list
        total_num_free_regions[kind] += surplus_regions[kind].get_num_free_regions();

        ptrdiff_t balance_to_distribute = total_num_free_regions[kind] - total_budget_in_region_units[kind];

        if (distribute_surplus_p(balance_to_distribute, kind, aggressive_decommit_large_p))
        {
#ifdef MULTIPLE_HEAPS
            // we may have a deficit or - for large regions or if background GC is going on - a surplus.
            // adjust the budget per heap accordingly
            if (balance_to_distribute != 0)
            {
                dprintf (REGIONS_LOG, ("distributing the %zd %s regions deficit", -balance_to_distribute, free_region_kind_name[kind]));

                ptrdiff_t curr_balance = 0;
                ptrdiff_t rem_balance = 0;
                for (int i = 0; i < n_heaps; i++)
                {
                    curr_balance += balance_to_distribute;
                    ptrdiff_t adjustment_per_heap = curr_balance / n_heaps;
                    curr_balance -= adjustment_per_heap * n_heaps;
                    ptrdiff_t new_budget = (ptrdiff_t)heap_budget_in_region_units[kind][i] + adjustment_per_heap;
                    ptrdiff_t min_budget = (ptrdiff_t)min_heap_budget_in_region_units[kind][i];
                    dprintf (REGIONS_LOG, ("adjusting the budget for heap %d from %zd %s regions by %zd to %zd",
                        i,
                        heap_budget_in_region_units[kind][i],
                        free_region_kind_name[kind],
                        adjustment_per_heap,
                        max (min_budget, new_budget)));
                    heap_budget_in_region_units[kind][i] = max (min_budget, new_budget);
                    rem_balance += new_budget - heap_budget_in_region_units[kind][i];
                }
                assert (rem_balance <= 0);
                dprintf (REGIONS_LOG, ("remaining balance: %zd %s regions", rem_balance, free_region_kind_name[kind]));

                // if we have a left over deficit, distribute that to the heaps that still have more than the minimum
                while (rem_balance < 0)
                {
                    for (int i = 0; i < n_heaps; i++)
                    {
                        size_t min_budget = min_heap_budget_in_region_units[kind][i];
                        if (heap_budget_in_region_units[kind][i] > min_budget)
                        {
                            dprintf (REGIONS_LOG, ("adjusting the budget for heap %d from %zd %s regions by %d to %zd",
                                i,
                                heap_budget_in_region_units[kind][i],
                                free_region_kind_name[kind],
                                -1,
                                heap_budget_in_region_units[kind][i] - 1));

                            heap_budget_in_region_units[kind][i] -= 1;
                            rem_balance += 1;
                            if (rem_balance == 0)
                                break;
                        }
                    }
                }
            }
#endif //MULTIPLE_HEAPS
        }
        else
        {
            assert (balance_to_distribute >= 0);

            ptrdiff_t balance_to_decommit = balance_to_distribute;
            if (kind == large_free_region)
            {
                // huge regions aren't part of balance_to_distribute because they are kept in a global list
                // and therefore can't be distributed across heaps
                balance_to_decommit += global_free_huge_regions.get_size_free_regions() / global_region_allocator.get_large_region_alignment();
            }

            dprintf(REGIONS_LOG, ("distributing the %zd %s regions, removing %zd regions",
                total_budget_in_region_units[kind],
                free_region_kind_name[kind],
                balance_to_decommit));

            if (balance_to_decommit > 0)
            {
                // remember how many regions we had on the decommit list already due to aging
                size_t num_regions_to_decommit_before = global_regions_to_decommit[kind].get_num_free_regions();

                // put the highest regions on the decommit list
                global_region_allocator.move_highest_free_regions (balance_to_decommit * region_factor[kind],
                                                                   kind == basic_free_region,
                                                                   global_regions_to_decommit);

                dprintf (REGIONS_LOG, ("Moved %zd %s regions to decommit list",
                         global_regions_to_decommit[kind].get_num_free_regions(), free_region_kind_name[kind]));

                if (kind == basic_free_region)
                {
                    // we should now have 'balance' regions more on the decommit list
                    assert (global_regions_to_decommit[kind].get_num_free_regions() ==
                            num_regions_to_decommit_before + (size_t)balance_to_decommit);
                }
                else
                {
                    dprintf (REGIONS_LOG, ("Moved %zd %s regions to decommit list",
                        global_regions_to_decommit[huge_free_region].get_num_free_regions(), free_region_kind_name[huge_free_region]));

                    // cannot assert we moved any regions because there may be a single huge region with more than we want to decommit
                }
            }
        }
    }

    for (int kind = basic_free_region; kind < count_distributed_free_region_kinds; kind++)
    {
#ifdef MULTIPLE_HEAPS
        // now go through all the heaps and remove any free regions above the target count
        for (int i = 0; i < n_heaps; i++)
        {
            gc_heap* hp = g_heaps[i];

            if (hp->free_regions[kind].get_num_free_regions() > heap_budget_in_region_units[kind][i])
            {
                dprintf (REGIONS_LOG, ("removing %zd %s regions from heap %d with %zd regions, budget is %zd",
                    hp->free_regions[kind].get_num_free_regions() - heap_budget_in_region_units[kind][i],
                    free_region_kind_name[kind],
                    i,
                    hp->free_regions[kind].get_num_free_regions(),
                    heap_budget_in_region_units[kind][i]));

                trim_region_list (&surplus_regions[kind], &hp->free_regions[kind], heap_budget_in_region_units[kind][i]);
            }
        }
        // finally go through all the heaps and distribute any surplus regions to heaps having too few free regions
        for (int i = 0; i < n_heaps; i++)
        {
            gc_heap* hp = g_heaps[i];
#else //MULTIPLE_HEAPS
        {
            gc_heap* hp = pGenGCHeap;
            const int i = 0;
#endif //MULTIPLE_HEAPS

            // second pass: fill all the regions having less than budget
            if (hp->free_regions[kind].get_num_free_regions() < heap_budget_in_region_units[kind][i])
            {
                int64_t num_added_regions = grow_region_list (&hp->free_regions[kind], &surplus_regions[kind], heap_budget_in_region_units[kind][i]);
                dprintf (REGIONS_LOG, ("added %zd %s regions to heap %d - now has %zd, budget is %zd",
                    (size_t)num_added_regions,
                    free_region_kind_name[kind],
                    i,
                    hp->free_regions[kind].get_num_free_regions(),
                    heap_budget_in_region_units[kind][i]));
            }
            hp->free_regions[kind].sort_by_committed_and_age();
        }

        if (surplus_regions[kind].get_num_free_regions() > 0)
        {
            assert (!"should have exhausted the surplus_regions");
            global_regions_to_decommit[kind].transfer_regions (&surplus_regions[kind]);
        }
    }

    decide_on_decommit_strategy(aggressive_decommit_large_p);
}

void gc_heap::move_all_aged_regions(size_t total_num_free_regions[count_distributed_free_region_kinds], region_free_list aged_regions[count_free_region_kinds], bool joined_last_gc_before_oom)
{
    move_aged_regions(aged_regions, global_free_huge_regions, huge_free_region, joined_last_gc_before_oom);

#ifdef MULTIPLE_HEAPS
    for (int i = 0; i < n_heaps; i++)
    {
        gc_heap* hp = g_heaps[i];
#else //MULTIPLE_HEAPS
    {
        gc_heap* hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS

        for (int kind = basic_free_region; kind < count_distributed_free_region_kinds; kind++)
        {
            move_aged_regions(aged_regions, hp->free_regions[kind], static_cast<free_region_kind>(kind), joined_last_gc_before_oom);
            total_num_free_regions[kind] += hp->free_regions[kind].get_num_free_regions();
        }
    }
}

void gc_heap::move_aged_regions(region_free_list dest[count_free_region_kinds], region_free_list& src, free_region_kind kind, bool joined_last_gc_before_oom)
{
    heap_segment* next_region = nullptr;
    for (heap_segment* region = src.get_first_free_region(); region != nullptr; region = next_region)
    {
        next_region = heap_segment_next (region);
        // when we are about to get OOM, we'd like to discount the free regions that just have the initial page commit as they are not useful
        if (aged_region_p(region, kind) ||
            ((get_region_committed_size (region) == GC_PAGE_SIZE) && joined_last_gc_before_oom))
        {
            region_free_list::unlink_region (region);
            region_free_list::add_region (region, dest);
        }
    }
}

bool gc_heap::aged_region_p(heap_segment* region, free_region_kind kind)
{
#ifndef MULTIPLE_HEAPS
    const int n_heaps = 1;
#endif

    int age_in_free_to_decommit;
    switch (kind)
    {
        case basic_free_region:
            age_in_free_to_decommit = max(AGE_IN_FREE_TO_DECOMMIT_BASIC, n_heaps);
            break;
        case large_free_region:
            age_in_free_to_decommit = AGE_IN_FREE_TO_DECOMMIT_LARGE;
            break;
        case huge_free_region:
            age_in_free_to_decommit = AGE_IN_FREE_TO_DECOMMIT_HUGE;
            break;
        default:
            assert(!"unexpected kind");
            age_in_free_to_decommit = 0;
    }

    age_in_free_to_decommit = min (age_in_free_to_decommit, MAX_AGE_IN_FREE);
    return (heap_segment_age_in_free (region) >= age_in_free_to_decommit);
}

void gc_heap::move_regions_to_decommit(region_free_list regions[count_free_region_kinds])
{
    for (int kind = basic_free_region; kind < count_free_region_kinds; kind++)
    {
        dprintf (1, ("moved %2zd %s regions (%8zd) to decommit based on time",
            regions[kind].get_num_free_regions(), free_region_kind_name[kind], regions[kind].get_size_committed_in_free()));
    }
    for (int kind = basic_free_region; kind < count_free_region_kinds; kind++)
    {
        heap_segment* next_region = nullptr;
        for (heap_segment* region = regions[kind].get_first_free_region(); region != nullptr; region = next_region)
        {
            next_region = heap_segment_next (region);
            dprintf (REGIONS_LOG, ("region %p age %2d, decommit",
                heap_segment_mem (region), heap_segment_age_in_free (region)));
            region_free_list::unlink_region (region);
            region_free_list::add_region (region, global_regions_to_decommit);
        }
    }
    for (int kind = basic_free_region; kind < count_free_region_kinds; kind++)
    {
        assert(regions[kind].get_num_free_regions() == 0);
    }
}

size_t gc_heap::compute_basic_region_budgets(
    size_t heap_basic_budget_in_region_units[MAX_SUPPORTED_CPUS],
    size_t min_heap_basic_budget_in_region_units[MAX_SUPPORTED_CPUS],
    size_t total_basic_free_regions)
{
    const size_t region_size = global_region_allocator.get_region_alignment();
    size_t total_budget_in_region_units = 0;

    for (int gen = soh_gen0; gen <= max_generation; gen++)
    {
        if (total_budget_in_region_units >= total_basic_free_regions)
        {
            // don't accumulate budget from higher soh generations if we cannot cover lower ones
            dprintf (REGIONS_LOG, ("out of free regions - skipping gen %d budget = %zd >= avail %zd",
                gen,
                total_budget_in_region_units,
                total_basic_free_regions));
            break;
        }

#ifdef MULTIPLE_HEAPS
        for (int i = 0; i < n_heaps; i++)
        {
            gc_heap* hp = g_heaps[i];
#else //MULTIPLE_HEAPS
        {
            gc_heap* hp = pGenGCHeap;
            // just to reduce the number of #ifdefs in the code below
            const int i = 0;
#endif //MULTIPLE_HEAPS
            ptrdiff_t budget_gen = max (hp->estimate_gen_growth (gen), (ptrdiff_t)0);
            size_t budget_gen_in_region_units = (budget_gen + (region_size - 1)) / region_size;
            dprintf (REGIONS_LOG, ("h%2d gen %d has an estimated growth of %zd bytes (%zd regions)", i, gen, budget_gen, budget_gen_in_region_units));

            // preserve the budget for the previous generation - we should not go below that
            min_heap_basic_budget_in_region_units[i] = heap_basic_budget_in_region_units[i];

            heap_basic_budget_in_region_units[i] += budget_gen_in_region_units;
            total_budget_in_region_units += budget_gen_in_region_units;
        }
    }

    return total_budget_in_region_units;
}

bool gc_heap::near_heap_hard_limit_p()
{
    if (heap_hard_limit)
    {
        int current_percent_heap_hard_limit = (int)((float)current_total_committed * 100.0 / (float)heap_hard_limit);
        dprintf (REGIONS_LOG, ("committed %zd is %d%% of limit %zd",
            current_total_committed, current_percent_heap_hard_limit, heap_hard_limit));
        if (current_percent_heap_hard_limit >= 90)
        {
            return true;
        }
    }

    return false;
}

bool gc_heap::distribute_surplus_p(ptrdiff_t balance, int kind, bool aggressive_decommit_large_p)
{
    if (balance < 0)
    {
        return true;
    }

    if (kind == basic_free_region)
    {
#ifdef BACKGROUND_GC
        // This is detecting FGCs that run during BGCs. It is not detecting ephemeral GCs that
        // (possibly) run right before a BGC as background_running_p() is not yet true at that point.
        return (background_running_p() && (settings.condemned_generation != max_generation));
#else
        return false;
#endif
    }

    return !aggressive_decommit_large_p;
}

void gc_heap::decide_on_decommit_strategy(bool joined_last_gc_before_oom)
{
#ifdef MULTIPLE_HEAPS
    if (joined_last_gc_before_oom || g_low_memory_status)
    {
        dprintf (REGIONS_LOG, ("low memory - decommitting everything (last_gc_before_oom=%d, g_low_memory_status=%d)", joined_last_gc_before_oom, g_low_memory_status));

        while (decommit_step(DECOMMIT_TIME_STEP_MILLISECONDS))
        {
        }
        return;
    }

    ptrdiff_t size_to_decommit_for_heap_hard_limit = 0;
    if (heap_hard_limit)
    {
        size_to_decommit_for_heap_hard_limit = (ptrdiff_t)(current_total_committed - (heap_hard_limit * (MAX_ALLOWED_MEM_LOAD / 100.0f)));
        size_to_decommit_for_heap_hard_limit = max(size_to_decommit_for_heap_hard_limit, (ptrdiff_t)0);
    }

    // For the various high memory load situations, we're not using the process size at all.  In
    // particular, if we had a large process and smaller processes running in the same container,
    // then we will treat them the same if the container reaches reaches high_memory_load_th.  In
    // the future, we could consider additional complexity to try to reclaim more memory from
    // larger processes than smaller ones.
    ptrdiff_t size_to_decommit_for_physical = 0;
    if (settings.entry_memory_load >= high_memory_load_th)
    {
        size_t entry_used_physical_mem = total_physical_mem - entry_available_physical_mem;
        size_t goal_used_physical_mem = (size_t)(((almost_high_memory_load_th) / 100.0) * total_physical_mem);
        size_to_decommit_for_physical = entry_used_physical_mem - goal_used_physical_mem;
    }

    size_t size_to_decommit = max(size_to_decommit_for_heap_hard_limit, size_to_decommit_for_physical);
    if (size_to_decommit > 0)
    {
        dprintf (REGIONS_LOG, ("low memory - decommitting %zd (for heap_hard_limit: %zd, for physical: %zd)", size_to_decommit, size_to_decommit_for_heap_hard_limit, size_to_decommit_for_physical));

        decommit_step(size_to_decommit / DECOMMIT_SIZE_PER_MILLISECOND);
    }

    for (int kind = basic_free_region; kind < count_free_region_kinds; kind++)
    {
        if (global_regions_to_decommit[kind].get_num_free_regions() != 0)
        {
            gradual_decommit_in_progress_p = TRUE;
            break;
        }
    }
#else //MULTIPLE_HEAPS
    // we want to limit the amount of decommit we do per time to indirectly
    // limit the amount of time spent in recommit and page faults
    // we use the elapsed time since the last GC to arrive at the desired
    // decommit size
    // we limit the elapsed time to 10 seconds to avoid spending too much time decommitting
    // if less than DECOMMIT_TIME_STEP_MILLISECONDS elapsed, we don't decommit -
    // we don't want to decommit fractions of regions here
    dynamic_data* dd0 = dynamic_data_of (0);
    size_t ephemeral_elapsed = (size_t)((dd_time_clock (dd0) - gc_last_ephemeral_decommit_time) / 1000);
    if (ephemeral_elapsed >= DECOMMIT_TIME_STEP_MILLISECONDS)
    {
        gc_last_ephemeral_decommit_time = dd_time_clock (dd0);
        size_t decommit_step_milliseconds = min (ephemeral_elapsed, (size_t)(10*1000));

        decommit_step (decommit_step_milliseconds);
    }
    // transfer any remaining regions on the decommit list back to the free list
    for (int kind = basic_free_region; kind < count_free_region_kinds; kind++)
    {
        if (global_regions_to_decommit[kind].get_num_free_regions() != 0)
        {
            free_regions[kind].transfer_regions (&global_regions_to_decommit[kind]);
        }
    }
#endif //MULTIPLE_HEAPS
}

#endif //USE_REGIONS

#ifdef WRITE_WATCH
uint8_t* g_addresses [array_size+2]; // to get around the bug in GetWriteWatch

#ifdef CARD_BUNDLE
inline void gc_heap::verify_card_bundle_bits_set(size_t first_card_word, size_t last_card_word)
{
#ifdef _DEBUG
    for (size_t x = cardw_card_bundle (first_card_word); x < cardw_card_bundle (last_card_word); x++)
    {
        if (!card_bundle_set_p (x))
        {
            assert (!"Card bundle not set");
            dprintf (3, ("Card bundle %zx not set", x));
        }
    }
#else
    UNREFERENCED_PARAMETER(first_card_word);
    UNREFERENCED_PARAMETER(last_card_word);
#endif
}

// Verifies that any bundles that are not set represent only cards that are not set.
inline void gc_heap::verify_card_bundles()
{
#ifdef _DEBUG
    size_t lowest_card = card_word (card_of (lowest_address));
#ifdef USE_REGIONS
    size_t highest_card = card_word (card_of (global_region_allocator.get_left_used_unsafe()));
#else
    size_t highest_card = card_word (card_of (highest_address));
#endif
    size_t cardb = cardw_card_bundle (lowest_card);
    size_t end_cardb = cardw_card_bundle (align_cardw_on_bundle (highest_card));

    while (cardb < end_cardb)
    {
        uint32_t* card_word = &card_table[max(card_bundle_cardw (cardb), lowest_card)];
        uint32_t* card_word_end = &card_table[min(card_bundle_cardw (cardb+1), highest_card)];

        if (card_bundle_set_p (cardb) == 0)
        {
            // Verify that no card is set
            while (card_word < card_word_end)
            {
                if (*card_word != 0)
                {
                    dprintf  (3, ("gc: %zd, Card word %zx for address %zx set, card_bundle %zx clear",
                            dd_collection_count (dynamic_data_of (0)),
                            (size_t)(card_word-&card_table[0]),
                            (size_t)(card_address ((size_t)(card_word-&card_table[0]) * card_word_width)),
                            cardb));
                }

                assert((*card_word)==0);
                card_word++;
            }
        }

        cardb++;
    }
#endif
}

// If card bundles are enabled, use write watch to find pages in the card table that have
// been dirtied, and set the corresponding card bundle bits.
void gc_heap::update_card_table_bundle()
{
    if (card_bundles_enabled())
    {
        // The address of the card word containing the card representing the lowest heap address
        uint8_t* base_address = (uint8_t*)(&card_table[card_word (card_of (lowest_address))]);

        // The address of the card word containing the card representing the highest heap address
#ifdef USE_REGIONS
        uint8_t* high_address = (uint8_t*)(&card_table[card_word (card_of (global_region_allocator.get_left_used_unsafe()))]);
#else
        uint8_t* high_address = (uint8_t*)(&card_table[card_word (card_of (highest_address))]);
#endif //USE_REGIONS

        uint8_t* saved_base_address = base_address;
        uintptr_t bcount = array_size;
        size_t saved_region_size = align_on_page (high_address) - saved_base_address;

        do
        {
            size_t region_size = align_on_page (high_address) - base_address;

            dprintf (3,("Probing card table pages [%zx, %zx[",
                (size_t)base_address, (size_t)(base_address + region_size)));
            bool success = GCToOSInterface::GetWriteWatch(false /* resetState */,
                                                          base_address,
                                                          region_size,
                                                          (void**)g_addresses,
                                                          &bcount);
            assert (success && "GetWriteWatch failed!");

            dprintf (3,("Found %zd pages written", bcount));
            for (unsigned i = 0; i < bcount; i++)
            {
                // Offset of the dirty page from the start of the card table (clamped to base_address)
                size_t bcardw = (uint32_t*)(max(g_addresses[i],base_address)) - &card_table[0];

                // Offset of the end of the page from the start of the card table (clamped to high addr)
                size_t ecardw = (uint32_t*)(min(g_addresses[i]+OS_PAGE_SIZE, high_address)) - &card_table[0];
                assert (bcardw >= card_word (card_of (g_gc_lowest_address)));

                // Set the card bundle bits representing the dirty card table page
                card_bundles_set (cardw_card_bundle (bcardw),
                                  cardw_card_bundle (align_cardw_on_bundle (ecardw)));
                dprintf (3,("Set Card bundle [%zx, %zx[",
                    cardw_card_bundle (bcardw), cardw_card_bundle (align_cardw_on_bundle (ecardw))));

                verify_card_bundle_bits_set(bcardw, ecardw);
            }

            if (bcount >= array_size)
            {
                base_address = g_addresses [array_size-1] + OS_PAGE_SIZE;
                bcount = array_size;
            }

        } while ((bcount >= array_size) && (base_address < high_address));

        // Now that we've updated the card bundle bits, reset the write-tracking state.
        GCToOSInterface::ResetWriteWatch (saved_base_address, saved_region_size);
    }
}
#endif //CARD_BUNDLE

#ifdef BACKGROUND_GC
// static
void gc_heap::reset_write_watch_for_gc_heap(void* base_address, size_t region_size)
{
#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
    SoftwareWriteWatch::ClearDirty(base_address, region_size);
#else // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
    GCToOSInterface::ResetWriteWatch(base_address, region_size);
#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
}

// static
void gc_heap::get_write_watch_for_gc_heap(bool reset, void *base_address, size_t region_size,
                                          void** dirty_pages, uintptr_t* dirty_page_count_ref,
                                          bool is_runtime_suspended)
{
#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
    SoftwareWriteWatch::GetDirty(base_address, region_size, dirty_pages, dirty_page_count_ref,
                                 reset, is_runtime_suspended);
#else // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
    UNREFERENCED_PARAMETER(is_runtime_suspended);
    bool success = GCToOSInterface::GetWriteWatch(reset, base_address, region_size, dirty_pages,
                                                  dirty_page_count_ref);
    assert(success);
#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
}

const size_t ww_reset_quantum = 128*1024*1024;

inline
void gc_heap::switch_one_quantum()
{
    enable_preemptive ();
    GCToOSInterface::Sleep (1);
    disable_preemptive (true);
}

void gc_heap::reset_ww_by_chunk (uint8_t* start_address, size_t total_reset_size)
{
    size_t reset_size = 0;
    size_t remaining_reset_size = 0;
    size_t next_reset_size = 0;

    while (reset_size != total_reset_size)
    {
        remaining_reset_size = total_reset_size - reset_size;
        next_reset_size = ((remaining_reset_size >= ww_reset_quantum) ?
            ww_reset_quantum : remaining_reset_size);
        if (next_reset_size)
        {
            reset_write_watch_for_gc_heap(start_address, next_reset_size);
            reset_size += next_reset_size;

            switch_one_quantum();
        }
    }

    assert (reset_size == total_reset_size);
}

// This does a Sleep(1) for every reset ww_reset_quantum bytes of reset
// we do concurrently.
void gc_heap::switch_on_reset (BOOL concurrent_p, size_t* current_total_reset_size, size_t last_reset_size)
{
    if (concurrent_p)
    {
        *current_total_reset_size += last_reset_size;

        dprintf (2, ("reset %zd bytes so far", *current_total_reset_size));

        if (*current_total_reset_size > ww_reset_quantum)
        {
            switch_one_quantum();

            *current_total_reset_size = 0;
        }
    }
}

void gc_heap::reset_write_watch (BOOL concurrent_p)
{
#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
    // Software write watch currently requires the runtime to be suspended during reset.
    // See SoftwareWriteWatch::ClearDirty().
    assert(!concurrent_p);
#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP

    dprintf (2, ("bgc lowest: %p, bgc highest: %p",
        background_saved_lowest_address, background_saved_highest_address));

    size_t reset_size = 0;

    for (int i = get_start_generation_index(); i < total_generation_count; i++)
    {
        heap_segment* seg = heap_segment_rw (generation_start_segment (generation_of (i)));

        while (seg)
        {
            uint8_t* base_address = align_lower_page (heap_segment_mem (seg));
            base_address = max (base_address, background_saved_lowest_address);

            uint8_t* high_address = ((seg == ephemeral_heap_segment) ?
                alloc_allocated : heap_segment_allocated (seg));
            high_address = min (high_address, background_saved_highest_address);

            if (base_address < high_address)
            {
                size_t reset_size = 0;
                size_t region_size = high_address - base_address;
                dprintf (3, ("h%d, gen: %x, ww: [%zx(%zd)", heap_number, i, (size_t)base_address, region_size));
                //reset_ww_by_chunk (base_address, region_size);
                reset_write_watch_for_gc_heap(base_address, region_size);
                switch_on_reset (concurrent_p, &reset_size, region_size);
            }

            seg = heap_segment_next_rw (seg);

            concurrent_print_time_delta (i == max_generation ? "CRWW soh": "CRWW uoh");
        }
    }
}
#endif //BACKGROUND_GC
#endif //WRITE_WATCH

#ifdef BACKGROUND_GC
void gc_heap::restart_vm()
{
    //assert (generation_allocation_pointer (youngest_generation) == 0);
    dprintf (3, ("Restarting EE"));
    STRESS_LOG0(LF_GC, LL_INFO10000, "Concurrent GC: Restarting EE\n");
    ee_proceed_event.Set();
}

inline
void fire_alloc_wait_event (alloc_wait_reason awr, BOOL begin_p)
{
    if (awr != awr_ignored)
    {
        if (begin_p)
        {
            FIRE_EVENT(BGCAllocWaitBegin, awr);
        }
        else
        {
            FIRE_EVENT(BGCAllocWaitEnd, awr);
        }
    }
}


void gc_heap::fire_alloc_wait_event_begin (alloc_wait_reason awr)
{
    fire_alloc_wait_event (awr, TRUE);
}


void gc_heap::fire_alloc_wait_event_end (alloc_wait_reason awr)
{
    fire_alloc_wait_event (awr, FALSE);
}
#endif //BACKGROUND_GC

void gc_heap::make_generation (int gen_num, heap_segment* seg, uint8_t* start)
{
    generation* gen = generation_of (gen_num);

    gen->gen_num = gen_num;
#ifndef USE_REGIONS
    gen->allocation_start = start;
    gen->plan_allocation_start = 0;
#endif //USE_REGIONS
    gen->allocation_context.alloc_ptr = 0;
    gen->allocation_context.alloc_limit = 0;
    gen->allocation_context.alloc_bytes = 0;
    gen->allocation_context.alloc_bytes_uoh = 0;
    gen->allocation_context_start_region = 0;
    gen->start_segment = seg;

#ifdef USE_REGIONS
    dprintf (REGIONS_LOG, ("g%d start seg is %zx-%p", gen_num, (size_t)seg, heap_segment_mem (seg)));
    gen->tail_region = seg;
    gen->tail_ro_region = 0;
#endif //USE_REGIONS
    gen->allocation_segment = seg;
    gen->free_list_space = 0;
    gen->free_list_allocated = 0;
    gen->end_seg_allocated = 0;
    gen->condemned_allocated = 0;
    gen->sweep_allocated = 0;
    gen->free_obj_space = 0;
    gen->allocation_size = 0;
    gen->pinned_allocation_sweep_size = 0;
    gen->pinned_allocation_compact_size = 0;
    gen->allocate_end_seg_p = FALSE;
    gen->free_list_allocator.clear();

#ifdef DOUBLY_LINKED_FL
    gen->set_bgc_mark_bit_p = FALSE;
#endif //DOUBLY_LINKED_FL

#ifdef FREE_USAGE_STATS
    memset (gen->gen_free_spaces, 0, sizeof (gen->gen_free_spaces));
    memset (gen->gen_current_pinned_free_spaces, 0, sizeof (gen->gen_current_pinned_free_spaces));
    memset (gen->gen_plugs, 0, sizeof (gen->gen_plugs));
#endif //FREE_USAGE_STATS
}

void gc_heap::adjust_ephemeral_limits ()
{
#ifndef USE_REGIONS
    ephemeral_low = generation_allocation_start (generation_of (max_generation - 1));
    ephemeral_high = heap_segment_reserved (ephemeral_heap_segment);

    dprintf (3, ("new ephemeral low: %zx new ephemeral high: %zx",
        (size_t)ephemeral_low, (size_t)ephemeral_high))

#ifndef MULTIPLE_HEAPS
    // This updates the write barrier helpers with the new info.
    stomp_write_barrier_ephemeral(ephemeral_low, ephemeral_high);
#endif // MULTIPLE_HEAPS
#endif //USE_REGIONS
}

uint32_t adjust_heaps_hard_limit_worker (uint32_t nhp, size_t limit)
{
    if (!limit)
        return nhp;

    size_t aligned_limit =  align_on_segment_hard_limit (limit);
    uint32_t nhp_oh = (uint32_t)(aligned_limit / min_segment_size_hard_limit);
    nhp = min (nhp_oh, nhp);
    return (max (nhp, 1u));
}

uint32_t gc_heap::adjust_heaps_hard_limit (uint32_t nhp)
{
#ifdef MULTIPLE_HEAPS
    if (heap_hard_limit_oh[soh])
    {
        for (int i = 0; i < (total_oh_count - 1); i++)
        {
            nhp = adjust_heaps_hard_limit_worker (nhp, heap_hard_limit_oh[i]);
        }
    }
    else if (heap_hard_limit)
    {
        nhp = adjust_heaps_hard_limit_worker (nhp, heap_hard_limit);
    }
#endif

    return nhp;
}

size_t gc_heap::adjust_segment_size_hard_limit_va (size_t seg_size)
{
    return (use_large_pages_p ?
            align_on_segment_hard_limit (seg_size) :
            round_up_power2 (seg_size));
}

size_t gc_heap::adjust_segment_size_hard_limit (size_t limit, uint32_t nhp)
{
    if (!limit)
    {
        limit = min_segment_size_hard_limit;
    }

    size_t seg_size = align_on_segment_hard_limit (limit) / nhp;
    return adjust_segment_size_hard_limit_va (seg_size);
}

#ifdef USE_REGIONS
bool allocate_initial_regions(int number_of_heaps)
{
    initial_regions = new (nothrow) uint8_t*[number_of_heaps][total_generation_count][2];
    if (initial_regions == nullptr)
    {
        log_init_error_to_host ("allocate_initial_regions failed to allocate %zd bytes", (number_of_heaps * total_generation_count * 2 * sizeof (uint8_t*)));
        return false;
    }
    for (int i = 0; i < number_of_heaps; i++)
    {
        bool succeed = global_region_allocator.allocate_large_region(
            poh_generation,
            &initial_regions[i][poh_generation][0],
            &initial_regions[i][poh_generation][1], allocate_forward, 0, nullptr);
        assert(succeed);
    }
    for (int i = 0; i < number_of_heaps; i++)
    {
        for (int gen_num = max_generation; gen_num >= 0; gen_num--)
        {
            bool succeed = global_region_allocator.allocate_basic_region(
                gen_num,
                &initial_regions[i][gen_num][0],
                &initial_regions[i][gen_num][1], nullptr);
            assert(succeed);
        }
    }
    for (int i = 0; i < number_of_heaps; i++)
    {
        bool succeed = global_region_allocator.allocate_large_region(
            loh_generation,
            &initial_regions[i][loh_generation][0],
            &initial_regions[i][loh_generation][1], allocate_forward, 0, nullptr);
        assert(succeed);
    }
    return true;
}
#endif

void
gc_heap::suspend_EE ()
{
    dprintf (2, ("suspend_EE"));
    GCToEEInterface::SuspendEE (SUSPEND_FOR_GC_PREP);
}

void
gc_heap::restart_EE ()
{
    dprintf (2, ("restart_EE"));
    GCToEEInterface::RestartEE (FALSE);
}

HRESULT gc_heap::initialize_gc (size_t soh_segment_size,
                                size_t loh_segment_size,
                                size_t poh_segment_size
#ifdef MULTIPLE_HEAPS
                                ,int number_of_heaps
#endif //MULTIPLE_HEAPS
)
{
#ifdef GC_CONFIG_DRIVEN
    if (GCConfig::GetConfigLogEnabled())
    {
        gc_config_log = CreateLogFile(GCConfig::GetConfigLogFile(), true);

        if (gc_config_log == NULL)
        {
            return E_FAIL;
        }

        gc_config_log_buffer = new (nothrow) uint8_t [gc_config_log_buffer_size];
        if (!gc_config_log_buffer)
        {
            fclose(gc_config_log);
            return E_OUTOFMEMORY;
        }

        compact_ratio = static_cast<int>(GCConfig::GetCompactRatio());

        //         h#  | GC  | gen | C   | EX   | NF  | BF  | ML  | DM  || PreS | PostS | Merge | Conv | Pre | Post | PrPo | PreP | PostP |
        cprintf (("%2s | %6s | %1s | %1s | %2s | %2s | %2s | %2s | %2s || %5s | %5s | %5s | %5s | %5s | %5s | %5s | %5s | %5s |",
                "h#", // heap index
                "GC", // GC index
                "g", // generation
                "C",  // compaction (empty means sweeping), 'M' means it was mandatory, 'W' means it was not
                "EX", // heap expansion
                "NF", // normal fit
                "BF", // best fit (if it indicates neither NF nor BF it means it had to acquire a new seg.
                "ML", // mark list
                "DM", // demotion
                "PreS", // short object before pinned plug
                "PostS", // short object after pinned plug
                "Merge", // merged pinned plugs
                "Conv", // converted to pinned plug
                "Pre", // plug before pinned plug but not after
                "Post", // plug after pinned plug but not before
                "PrPo", // plug both before and after pinned plug
                "PreP", // pre short object padded
                "PostP" // post short object padded
                ));
    }
#endif //GC_CONFIG_DRIVEN

    HRESULT hres = S_OK;

    conserve_mem_setting = (int)GCConfig::GetGCConserveMem();

#ifdef DYNAMIC_HEAP_COUNT
    dynamic_adaptation_mode = (int)GCConfig::GetGCDynamicAdaptationMode();
    if (GCConfig::GetHeapCount() != 0)
    {
        dynamic_adaptation_mode = 0;
    }

    if ((dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes) && (conserve_mem_setting == 0))
        conserve_mem_setting = 5;

#ifdef STRESS_DYNAMIC_HEAP_COUNT
    bgc_to_ngc2_ratio = (int)GCConfig::GetGCDBGCRatio();
    dprintf (1, ("bgc_to_ngc2_ratio is %d", bgc_to_ngc2_ratio));
#endif
#endif //DYNAMIC_HEAP_COUNT

    if (conserve_mem_setting < 0)
        conserve_mem_setting = 0;
    if (conserve_mem_setting > 9)
        conserve_mem_setting = 9;

    dprintf (1, ("conserve_mem_setting = %d", conserve_mem_setting));

#ifdef WRITE_WATCH
    hardware_write_watch_api_supported();
#ifdef BACKGROUND_GC
    if (can_use_write_watch_for_gc_heap() && GCConfig::GetConcurrentGC())
    {
        gc_can_use_concurrent = true;
#ifndef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
        virtual_alloc_hardware_write_watch = true;
#endif // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
    }
    else
    {
        gc_can_use_concurrent = false;
    }

    GCConfig::SetConcurrentGC(gc_can_use_concurrent);
#else //BACKGROUND_GC
    GCConfig::SetConcurrentGC(false);
#endif //BACKGROUND_GC
#endif //WRITE_WATCH

#ifdef BACKGROUND_GC
#ifdef USE_REGIONS
    int bgc_uoh_inc_percent_alloc_wait = (int)GCConfig::GetUOHWaitBGCSizeIncPercent();
    if (bgc_uoh_inc_percent_alloc_wait != -1)
    {
        bgc_uoh_inc_ratio_alloc_wait = (float)bgc_uoh_inc_percent_alloc_wait / 100.0f;
    }
    else
    {
        bgc_uoh_inc_percent_alloc_wait = (int)(bgc_uoh_inc_ratio_alloc_wait * 100.0f);
    }

    if (bgc_uoh_inc_ratio_alloc_normal > bgc_uoh_inc_ratio_alloc_wait)
    {
        bgc_uoh_inc_ratio_alloc_normal = bgc_uoh_inc_ratio_alloc_wait;
    }
    GCConfig::SetUOHWaitBGCSizeIncPercent (bgc_uoh_inc_percent_alloc_wait);
    dprintf (1, ("UOH allocs during BGC are allowed normally when inc ratio is  < %.3f, will wait when > %.3f",
        bgc_uoh_inc_ratio_alloc_normal, bgc_uoh_inc_ratio_alloc_wait));
#endif 

    // leave the first page to contain only segment info
    // because otherwise we could need to revisit the first page frequently in
    // background GC.
    segment_info_size = OS_PAGE_SIZE;
#else
    segment_info_size = Align (sizeof (heap_segment), get_alignment_constant (FALSE));
#endif //BACKGROUND_GC

    reserved_memory = 0;
    size_t initial_heap_size = soh_segment_size + loh_segment_size + poh_segment_size;
    uint16_t* heap_no_to_numa_node = nullptr;
#ifdef MULTIPLE_HEAPS
    reserved_memory_limit = initial_heap_size * number_of_heaps;
    if (!heap_select::init(number_of_heaps))
        return E_OUTOFMEMORY;
    if (GCToOSInterface::CanEnableGCNumaAware())
        heap_no_to_numa_node = heap_select::heap_no_to_numa_node;
#else //MULTIPLE_HEAPS
    reserved_memory_limit = initial_heap_size;
    int number_of_heaps = 1;
#endif //MULTIPLE_HEAPS

    check_commit_cs.Initialize();
#ifdef COMMITTED_BYTES_SHADOW
    decommit_lock.Initialize();
#endif //COMMITTED_BYTES_SHADOW

#ifdef USE_REGIONS
    if (regions_range)
    {
        // REGIONS TODO: we should reserve enough space at the end of what we reserved that's
        // big enough to accommodate if we were to materialize all the GC bookkeeping datastructures.
        // We only need to commit what we use and just need to commit more instead of having to
        // relocate the existing table and then calling copy_brick_card_table.
        // Right now all the non mark array portions are commmitted since I'm calling make_card_table
        // on the whole range. This can be committed as needed.
        size_t reserve_size = regions_range;
        uint8_t* reserve_range = (uint8_t*)virtual_alloc (reserve_size, use_large_pages_p);
        if (!reserve_range)
        {
            log_init_error_to_host ("Reserving %zd bytes (%zd GiB) for the regions range failed, do you have a virtual memory limit set on this process?",
                reserve_size, gib (reserve_size));
            return E_OUTOFMEMORY;
        }

        if (!global_region_allocator.init (reserve_range, (reserve_range + reserve_size),
                                           ((size_t)1 << min_segment_size_shr),
                                           &g_gc_lowest_address, &g_gc_highest_address))
            return E_OUTOFMEMORY;

        if (!allocate_initial_regions(number_of_heaps))
            return E_OUTOFMEMORY;
    }
    else
    {
        assert (!"cannot use regions without specifying the range!!!");
        log_init_error_to_host ("Regions range is 0! unexpected");
        return E_FAIL;
    }
#else //USE_REGIONS
    bool separated_poh_p = use_large_pages_p &&
                           heap_hard_limit_oh[soh] &&
                           (GCConfig::GetGCHeapHardLimitPOH() == 0) &&
                           (GCConfig::GetGCHeapHardLimitPOHPercent() == 0);
    if (!reserve_initial_memory (soh_segment_size, loh_segment_size, poh_segment_size, number_of_heaps,
                                 use_large_pages_p, separated_poh_p, heap_no_to_numa_node))
        return E_OUTOFMEMORY;
    if (use_large_pages_p)
    {
#ifndef HOST_64BIT
        // Large pages are not supported on 32bit
        assert (false);
#endif //!HOST_64BIT

        if (heap_hard_limit_oh[soh])
        {
            heap_hard_limit_oh[soh] = soh_segment_size * number_of_heaps;
            heap_hard_limit_oh[loh] = loh_segment_size * number_of_heaps;
            heap_hard_limit_oh[poh] = poh_segment_size * number_of_heaps;
            heap_hard_limit = heap_hard_limit_oh[soh] + heap_hard_limit_oh[loh] + heap_hard_limit_oh[poh];
        }
        else
        {
            assert (heap_hard_limit);
            heap_hard_limit = (soh_segment_size + loh_segment_size + poh_segment_size) * number_of_heaps;
        }
    }
#endif //USE_REGIONS

#ifdef CARD_BUNDLE
    //check if we need to turn on card_bundles.
#ifdef MULTIPLE_HEAPS
    // use INT64 arithmetic here because of possible overflow on 32p
    uint64_t th = (uint64_t)MH_TH_CARD_BUNDLE*number_of_heaps;
#else
    // use INT64 arithmetic here because of possible overflow on 32p
    uint64_t th = (uint64_t)SH_TH_CARD_BUNDLE;
#endif //MULTIPLE_HEAPS

    if (can_use_write_watch_for_card_table() && reserved_memory >= th)
    {
        settings.card_bundles = TRUE;
    }
    else
    {
        settings.card_bundles = FALSE;
    }
#endif //CARD_BUNDLE

    settings.first_init();

    int latency_level_from_config = static_cast<int>(GCConfig::GetLatencyLevel());
    if (latency_level_from_config >= latency_level_first && latency_level_from_config <= latency_level_last)
    {
        gc_heap::latency_level = static_cast<gc_latency_level>(latency_level_from_config);
    }

    init_static_data();

    g_gc_card_table = make_card_table (g_gc_lowest_address, g_gc_highest_address);

    if (!g_gc_card_table)
        return E_OUTOFMEMORY;

    gc_started = FALSE;

#ifdef MULTIPLE_HEAPS
    g_heaps = new (nothrow) gc_heap* [number_of_heaps];
    if (!g_heaps)
        return E_OUTOFMEMORY;

#if !defined(USE_REGIONS) || defined(_DEBUG)
    g_promoted = new (nothrow) size_t [number_of_heaps*16];
    if (!g_promoted)
        return E_OUTOFMEMORY;
#endif //!USE_REGIONS || _DEBUG
#ifdef BACKGROUND_GC
    g_bpromoted = new (nothrow) size_t [number_of_heaps*16];
    if (!g_bpromoted)
        return E_OUTOFMEMORY;
#endif

#ifdef MH_SC_MARK
    g_mark_stack_busy = new (nothrow) int[(number_of_heaps+2)*HS_CACHE_LINE_SIZE/sizeof(int)];
#endif //MH_SC_MARK

#ifdef MH_SC_MARK
    if (!g_mark_stack_busy)
        return E_OUTOFMEMORY;
#endif //MH_SC_MARK

    if (!create_thread_support (number_of_heaps))
        return E_OUTOFMEMORY;

#endif //MULTIPLE_HEAPS

#ifdef MULTIPLE_HEAPS
    yp_spin_count_unit = 32 * number_of_heaps;
#else
    yp_spin_count_unit = 32 * g_num_processors;
#endif //MULTIPLE_HEAPS

    // Check if the values are valid for the spin count if provided by the user
    // and if they are, set them as the yp_spin_count_unit and then ignore any updates made in SetYieldProcessorScalingFactor.
    int64_t spin_count_unit_from_config = GCConfig::GetGCSpinCountUnit();
    gc_heap::spin_count_unit_config_p = (spin_count_unit_from_config > 0) && (spin_count_unit_from_config <= MAX_YP_SPIN_COUNT_UNIT);
    if (gc_heap::spin_count_unit_config_p)
    {
        yp_spin_count_unit = static_cast<int32_t>(spin_count_unit_from_config);
    }

    original_spin_count_unit = yp_spin_count_unit;

#if (defined(MULTIPLE_HEAPS) && defined(DYNAMIC_HEAP_COUNT))
    if ((dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes) && (!gc_heap::spin_count_unit_config_p))
    {
        yp_spin_count_unit = 10;
    }
#endif // MULTIPLE_HEAPS && DYNAMIC_HEAP_COUNT

#if defined(__linux__)
    GCToEEInterface::UpdateGCEventStatus(static_cast<int>(GCEventStatus::GetEnabledLevel(GCEventProvider_Default)),
                                         static_cast<int>(GCEventStatus::GetEnabledKeywords(GCEventProvider_Default)),
                                         static_cast<int>(GCEventStatus::GetEnabledLevel(GCEventProvider_Private)),
                                         static_cast<int>(GCEventStatus::GetEnabledKeywords(GCEventProvider_Private)));
#endif // __linux__

#ifdef USE_VXSORT
    InitSupportedInstructionSet ((int32_t)GCConfig::GetGCEnabledInstructionSets());
#endif

    if (!init_semi_shared())
    {
        log_init_error_to_host ("PER_HEAP_ISOLATED data members initialization failed");
        hres = E_FAIL;
    }

    return hres;
}

//Initializes PER_HEAP_ISOLATED data members.
int
gc_heap::init_semi_shared()
{
    int ret = 0;

#ifdef BGC_SERVO_TUNING
    uint32_t current_memory_load = 0;
    uint32_t sweep_flr_goal = 0;
    uint32_t sweep_flr_goal_loh = 0;
#endif //BGC_SERVO_TUNING

#ifndef USE_REGIONS
    // This is used for heap expansion - it's to fix exactly the start for gen 0
    // through (max_generation-1). When we expand the heap we allocate all these
    // gen starts at the beginning of the new ephemeral seg.
    eph_gen_starts_size = (Align (min_obj_size)) * max_generation;
#endif //!USE_REGIONS

#ifdef MULTIPLE_HEAPS
    mark_list_size = min ((size_t)100*1024, max ((size_t)8192, soh_segment_size/(2*10*32)));
#ifdef DYNAMIC_HEAP_COUNT
    if (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes)
    {
        // we'll actually start with one heap in this case
        g_mark_list_total_size = mark_list_size;
    }
    else
#endif //DYNAMIC_HEAP_COUNT
    {
        g_mark_list_total_size = mark_list_size*n_heaps;
    }
    g_mark_list = make_mark_list (g_mark_list_total_size);

    min_balance_threshold = alloc_quantum_balance_units * CLR_SIZE * 2;
    g_mark_list_copy = make_mark_list (g_mark_list_total_size);
    if (!g_mark_list_copy)
    {
        goto cleanup;
    }
#else //MULTIPLE_HEAPS

    mark_list_size = min((size_t)100*1024, max ((size_t)8192, soh_segment_size/(64*32)));
    g_mark_list_total_size = mark_list_size;
    g_mark_list = make_mark_list (mark_list_size);

#endif //MULTIPLE_HEAPS

    dprintf (3, ("mark_list_size: %zd", mark_list_size));

    if (!g_mark_list)
    {
        goto cleanup;
    }

#ifdef MULTIPLE_HEAPS
    // gradual decommit: set size to some reasonable value per time interval
    max_decommit_step_size = ((DECOMMIT_SIZE_PER_MILLISECOND * DECOMMIT_TIME_STEP_MILLISECONDS) / n_heaps);

    // but do at least MIN_DECOMMIT_SIZE per step to make the OS call worthwhile
    max_decommit_step_size = max (max_decommit_step_size, MIN_DECOMMIT_SIZE);
#endif //MULTIPLE_HEAPS

#ifdef FEATURE_BASICFREEZE
    seg_table = sorted_table::make_sorted_table();

    if (!seg_table)
        goto cleanup;
#endif //FEATURE_BASICFREEZE

#ifndef USE_REGIONS
    segment_standby_list = 0;
#endif //USE_REGIONS

    if (!full_gc_approach_event.CreateManualEventNoThrow(FALSE))
    {
        goto cleanup;
    }
    if (!full_gc_end_event.CreateManualEventNoThrow(FALSE))
    {
        goto cleanup;
    }

    fgn_loh_percent = 0;
    full_gc_approach_event_set = false;

    memset (full_gc_counts, 0, sizeof (full_gc_counts));

#ifndef USE_REGIONS
    should_expand_in_full_gc = FALSE;
#endif //!USE_REGIONS


#ifdef FEATURE_LOH_COMPACTION
    loh_compaction_always_p = GCConfig::GetLOHCompactionMode() != 0;
    loh_compaction_mode = loh_compaction_default;
#endif //FEATURE_LOH_COMPACTION

#ifdef BGC_SERVO_TUNING
    memset (bgc_tuning::gen_calc, 0, sizeof (bgc_tuning::gen_calc));
    memset (bgc_tuning::gen_stats, 0, sizeof (bgc_tuning::gen_stats));
    memset (bgc_tuning::current_bgc_end_data, 0, sizeof (bgc_tuning::current_bgc_end_data));

    // for the outer loop - the ML (memory load) loop
    bgc_tuning::enable_fl_tuning = (GCConfig::GetBGCFLTuningEnabled() != 0);
    bgc_tuning::memory_load_goal = (uint32_t)GCConfig::GetBGCMemGoal();
    bgc_tuning::memory_load_goal_slack = (uint32_t)GCConfig::GetBGCMemGoalSlack();
    bgc_tuning::ml_kp = (double)GCConfig::GetBGCMLkp() / 1000.0;
    bgc_tuning::ml_ki = (double)GCConfig::GetBGCMLki() / 1000.0;
    bgc_tuning::ratio_correction_step = (double)GCConfig::GetBGCG2RatioStep() / 100.0;

    // for the inner loop - the alloc loop which calculates the allocated bytes in gen2 before
    // triggering the next BGC.
    bgc_tuning::above_goal_kp = (double)GCConfig::GetBGCFLkp() / 1000000.0;
    bgc_tuning::enable_ki = (GCConfig::GetBGCFLEnableKi() != 0);
    bgc_tuning::above_goal_ki = (double)GCConfig::GetBGCFLki() / 1000000.0;
    bgc_tuning::enable_kd = (GCConfig::GetBGCFLEnableKd() != 0);
    bgc_tuning::above_goal_kd = (double)GCConfig::GetBGCFLkd() / 100.0;
    bgc_tuning::enable_smooth = (GCConfig::GetBGCFLEnableSmooth() != 0);
    bgc_tuning::num_gen1s_smooth_factor = (double)GCConfig::GetBGCFLSmoothFactor() / 100.0;
    bgc_tuning::enable_tbh = (GCConfig::GetBGCFLEnableTBH() != 0);
    bgc_tuning::enable_ff = (GCConfig::GetBGCFLEnableFF() != 0);
    bgc_tuning::above_goal_ff = (double)GCConfig::GetBGCFLff() / 100.0;
    bgc_tuning::enable_gradual_d = (GCConfig::GetBGCFLGradualD() != 0);
    sweep_flr_goal = (uint32_t)GCConfig::GetBGCFLSweepGoal();
    sweep_flr_goal_loh = (uint32_t)GCConfig::GetBGCFLSweepGoalLOH();

    bgc_tuning::gen_calc[0].sweep_flr_goal = ((sweep_flr_goal == 0) ? 20.0 : (double)sweep_flr_goal);
    bgc_tuning::gen_calc[1].sweep_flr_goal = ((sweep_flr_goal_loh == 0) ? 20.0 : (double)sweep_flr_goal_loh);

    bgc_tuning::available_memory_goal = (uint64_t)((double)gc_heap::total_physical_mem * (double)(100 - bgc_tuning::memory_load_goal) / 100);
    get_memory_info (&current_memory_load);

    dprintf (BGC_TUNING_LOG, ("BTL tuning %s!!!",
        (bgc_tuning::enable_fl_tuning ? "enabled" : "disabled")));

#ifdef SIMPLE_DPRINTF
    dprintf (BGC_TUNING_LOG, ("BTL tuning parameters: mem goal: %d%%(%zd), +/-%d%%, gen2 correction factor: %.2f, sweep flr goal: %d%%, smooth factor: %.3f(%s), TBH: %s, FF: %.3f(%s), ml: kp %.5f, ki %.10f",
        bgc_tuning::memory_load_goal,
        bgc_tuning::available_memory_goal,
        bgc_tuning::memory_load_goal_slack,
        bgc_tuning::ratio_correction_step,
        (int)bgc_tuning::gen_calc[0].sweep_flr_goal,
        bgc_tuning::num_gen1s_smooth_factor,
        (bgc_tuning::enable_smooth ? "enabled" : "disabled"),
        (bgc_tuning::enable_tbh ? "enabled" : "disabled"),
        bgc_tuning::above_goal_ff,
        (bgc_tuning::enable_ff ? "enabled" : "disabled"),
        bgc_tuning::ml_kp,
        bgc_tuning::ml_ki));

    dprintf (BGC_TUNING_LOG, ("BTL tuning parameters: kp: %.5f, ki: %.5f (%s), kd: %.3f (kd-%s, gd-%s), ff: %.3f",
        bgc_tuning::above_goal_kp,
        bgc_tuning::above_goal_ki,
        (bgc_tuning::enable_ki ? "enabled" : "disabled"),
        bgc_tuning::above_goal_kd,
        (bgc_tuning::enable_kd ? "enabled" : "disabled"),
        (bgc_tuning::enable_gradual_d ? "enabled" : "disabled"),
        bgc_tuning::above_goal_ff));
#endif //SIMPLE_DPRINTF

    if (bgc_tuning::enable_fl_tuning && (current_memory_load < bgc_tuning::memory_load_goal))
    {
        uint32_t distance_to_goal = bgc_tuning::memory_load_goal - current_memory_load;
        bgc_tuning::stepping_interval = max (distance_to_goal / 10, 1u);
        bgc_tuning::last_stepping_mem_load = current_memory_load;
        bgc_tuning::last_stepping_bgc_count = 0;
        dprintf (BGC_TUNING_LOG, ("current ml: %d, %d to goal, interval: %d",
            current_memory_load, distance_to_goal, bgc_tuning::stepping_interval));
    }
    else
    {
        dprintf (BGC_TUNING_LOG, ("current ml: %d, >= goal: %d, disable stepping",
            current_memory_load, bgc_tuning::memory_load_goal));
        bgc_tuning::use_stepping_trigger_p = false;
    }
#endif //BGC_SERVO_TUNING

#ifdef BACKGROUND_GC
    memset (ephemeral_fgc_counts, 0, sizeof (ephemeral_fgc_counts));
    bgc_alloc_spin_count = static_cast<uint32_t>(GCConfig::GetBGCSpinCount());
    bgc_alloc_spin = static_cast<uint32_t>(GCConfig::GetBGCSpin());

    {
        int number_bgc_threads = get_num_heaps();
        if (!create_bgc_threads_support (number_bgc_threads))
        {
            goto cleanup;
        }
    }
#endif //BACKGROUND_GC

    memset (&current_no_gc_region_info, 0, sizeof (current_no_gc_region_info));

#ifdef GC_CONFIG_DRIVEN
    compact_or_sweep_gcs[0] = 0;
    compact_or_sweep_gcs[1] = 0;
#endif //GC_CONFIG_DRIVEN

#if defined(SHORT_PLUGS) && !defined(USE_REGIONS)
    short_plugs_pad_ratio = (double)DESIRED_PLUG_LENGTH / (double)(DESIRED_PLUG_LENGTH - Align (min_obj_size));
#endif //SHORT_PLUGS && !USE_REGIONS

    generation_skip_ratio_threshold = (int)GCConfig::GetGCLowSkipRatio();

#ifdef FEATURE_EVENT_TRACE
    gc_time_info = new (nothrow) uint64_t[max_compact_time_type];
    if (!gc_time_info)
    {
        goto cleanup;
    }
#ifdef BACKGROUND_GC
    bgc_time_info = new (nothrow) uint64_t[max_bgc_time_type];
    if (!bgc_time_info)
    {
        goto cleanup;
    }
#endif //BACKGROUND_GC

#ifdef FEATURE_LOH_COMPACTION
    loh_compact_info = new (nothrow) etw_loh_compact_info [get_num_heaps()];
    if (!loh_compact_info)
    {
        goto cleanup;
    }
#endif //FEATURE_LOH_COMPACTION
#endif //FEATURE_EVENT_TRACE

    reset_mm_p = TRUE;

    ret = 1;

cleanup:

    if (!ret)
    {
        if (full_gc_approach_event.IsValid())
        {
            full_gc_approach_event.CloseEvent();
        }
        if (full_gc_end_event.IsValid())
        {
            full_gc_end_event.CloseEvent();
        }
    }

    return ret;
}

gc_heap* gc_heap::make_gc_heap (
#ifdef MULTIPLE_HEAPS
                                GCHeap* vm_hp,
                                int heap_number
#endif //MULTIPLE_HEAPS
                                )
{
    gc_heap* res = 0;

#ifdef MULTIPLE_HEAPS
    res = new (nothrow) gc_heap;
    if (!res)
        return 0;

    res->vm_heap = vm_hp;
    res->alloc_context_count = 0;

#ifndef USE_REGIONS
    res->mark_list_piece_start = new (nothrow) uint8_t**[n_heaps];
    if (!res->mark_list_piece_start)
        return 0;

    res->mark_list_piece_end = new (nothrow) uint8_t**[n_heaps + 32]; // +32 is padding to reduce false sharing

    if (!res->mark_list_piece_end)
        return 0;
#endif //!USE_REGIONS

#endif //MULTIPLE_HEAPS

    if (res->init_gc_heap (
#ifdef MULTIPLE_HEAPS
        heap_number
#else  //MULTIPLE_HEAPS
        0
#endif //MULTIPLE_HEAPS
        )==0)
    {
        return 0;
    }

#ifdef MULTIPLE_HEAPS
    return res;
#else
    return (gc_heap*)1;
#endif //MULTIPLE_HEAPS
}

uint32_t
gc_heap::wait_for_gc_done(int32_t timeOut)
{
    bool cooperative_mode = enable_preemptive ();

    uint32_t dwWaitResult = NOERROR;

    gc_heap* wait_heap = NULL;
    while (gc_heap::gc_started)
    {
#ifdef MULTIPLE_HEAPS
        wait_heap = g_heaps[heap_select::select_heap(NULL)];
        dprintf(2, ("waiting for the gc_done_event on heap %d", wait_heap->heap_number));
#endif // MULTIPLE_HEAPS

        dwWaitResult = wait_heap->gc_done_event.Wait(timeOut, FALSE);
    }
    disable_preemptive (cooperative_mode);

    return dwWaitResult;
}

void
gc_heap::set_gc_done()
{
    enter_gc_done_event_lock();
    if (!gc_done_event_set)
    {
        gc_done_event_set = true;
        dprintf (2, ("heap %d: setting gc_done_event", heap_number));
        gc_done_event.Set();
    }
    exit_gc_done_event_lock();
}

void
gc_heap::reset_gc_done()
{
    enter_gc_done_event_lock();
    if (gc_done_event_set)
    {
        gc_done_event_set = false;
        dprintf (2, ("heap %d: resetting gc_done_event", heap_number));
        gc_done_event.Reset();
    }
    exit_gc_done_event_lock();
}

void
gc_heap::enter_gc_done_event_lock()
{
    uint32_t dwSwitchCount = 0;
retry:

    if (Interlocked::CompareExchange(&gc_done_event_lock, 0, -1) >= 0)
    {
        while (gc_done_event_lock >= 0)
        {
            if  (g_num_processors > 1)
            {
                int spin_count = yp_spin_count_unit;
                for (int j = 0; j < spin_count; j++)
                {
                    if  (gc_done_event_lock < 0)
                        break;
                    YieldProcessor();           // indicate to the processor that we are spinning
                }
                if  (gc_done_event_lock >= 0)
                    GCToOSInterface::YieldThread(++dwSwitchCount);
            }
            else
                GCToOSInterface::YieldThread(++dwSwitchCount);
        }
        goto retry;
    }
}

void
gc_heap::exit_gc_done_event_lock()
{
    gc_done_event_lock = -1;
}

#ifndef MULTIPLE_HEAPS

#ifdef RECORD_LOH_STATE
int gc_heap::loh_state_index = 0;
gc_heap::loh_state_info gc_heap::last_loh_states[max_saved_loh_states];
#endif //RECORD_LOH_STATE

VOLATILE(int32_t) gc_heap::gc_done_event_lock;
VOLATILE(bool) gc_heap::gc_done_event_set;
GCEvent gc_heap::gc_done_event;
#endif //!MULTIPLE_HEAPS
VOLATILE(bool) gc_heap::internal_gc_done;

void gc_heap::add_saved_spinlock_info (
            bool loh_p,
            msl_enter_state enter_state,
            msl_take_state take_state,
            enter_msl_status msl_status)
{
#ifdef SPINLOCK_HISTORY
    if (!loh_p || (msl_status == msl_retry_different_heap))
    {
        return;
    }

    spinlock_info* current = &last_spinlock_info[spinlock_info_index];

    current->enter_state = enter_state;
    current->take_state = take_state;
    current->current_uoh_alloc_state = current_uoh_alloc_state;
    current->thread_id.SetToCurrentThread();
    current->loh_p = loh_p;
    dprintf (SPINLOCK_LOG, ("[%d]%s %s %s",
        heap_number,
        (loh_p ? "loh" : "soh"),
        ((enter_state == me_acquire) ? "E" : "L"),
        msl_take_state_str[take_state]));

    spinlock_info_index++;

    assert (spinlock_info_index <= max_saved_spinlock_info);

    if (spinlock_info_index >= max_saved_spinlock_info)
    {
        spinlock_info_index = 0;
    }
#else
    UNREFERENCED_PARAMETER(enter_state);
    UNREFERENCED_PARAMETER(take_state);
#endif //SPINLOCK_HISTORY
}

int
gc_heap::init_gc_heap (int h_number)
{
#ifdef MULTIPLE_HEAPS
#ifdef _DEBUG
    memset (committed_by_oh_per_heap, 0, sizeof (committed_by_oh_per_heap));
#endif //_DEBUG

    g_heaps [h_number] = this;

    time_bgc_last = 0;

#ifdef SPINLOCK_HISTORY
    spinlock_info_index = 0;
    memset (last_spinlock_info, 0, sizeof(last_spinlock_info));
#endif //SPINLOCK_HISTORY

    // initialize per heap members.
#ifndef USE_REGIONS
    ephemeral_low = (uint8_t*)1;

    ephemeral_high = MAX_PTR;
#endif //!USE_REGIONS

    gc_low = 0;

    gc_high = 0;

    ephemeral_heap_segment = 0;

    oomhist_index_per_heap = 0;

    freeable_uoh_segment = 0;

    condemned_generation_num = 0;

    blocking_collection = FALSE;

    generation_skip_ratio = 100;

#ifdef FEATURE_CARD_MARKING_STEALING
    n_eph_soh = 0;
    n_gen_soh = 0;
    n_eph_loh = 0;
    n_gen_loh = 0;
#endif //FEATURE_CARD_MARKING_STEALING
    mark_stack_tos = 0;

    mark_stack_bos = 0;

    mark_stack_array_length = 0;

    mark_stack_array = 0;

#if defined (_DEBUG) && defined (VERIFY_HEAP)
    verify_pinned_queue_p = FALSE;
#endif // _DEBUG && VERIFY_HEAP

#ifdef FEATURE_LOH_COMPACTION
    loh_pinned_queue_tos = 0;

    loh_pinned_queue_bos = 0;

    loh_pinned_queue_length = 0;

    loh_pinned_queue_decay = LOH_PIN_DECAY;

    loh_pinned_queue = 0;
#endif //FEATURE_LOH_COMPACTION

    min_overflow_address = MAX_PTR;

    max_overflow_address = 0;

    gen0_bricks_cleared = FALSE;

    gen0_must_clear_bricks = 0;

    allocation_quantum = CLR_SIZE;

    more_space_lock_soh = gc_lock;

    more_space_lock_uoh = gc_lock;

    loh_alloc_since_cg = 0;

#ifndef USE_REGIONS
    new_heap_segment = NULL;

    ro_segments_in_range = FALSE;
#endif //!USE_REGIONS

    gen0_allocated_after_gc_p = false;

#ifdef RECORD_LOH_STATE
    loh_state_index = 0;
#endif //RECORD_LOH_STATE

#ifdef USE_REGIONS
    new_gen0_regions_in_plns = 0;
    new_regions_in_prr = 0;
    new_regions_in_threading = 0;

    special_sweep_p = false;
#endif //USE_REGIONS

#endif //MULTIPLE_HEAPS

#ifdef MULTIPLE_HEAPS
    if (h_number > n_heaps)
    {
        assert (!"Number of heaps exceeded");
        return 0;
    }

    heap_number = h_number;
#endif //MULTIPLE_HEAPS

    memset (etw_allocation_running_amount, 0, sizeof (etw_allocation_running_amount));
    memset (allocated_since_last_gc, 0, sizeof (allocated_since_last_gc));
    memset (&oom_info, 0, sizeof (oom_info));
    memset (&fgm_result, 0, sizeof (fgm_result));
    memset (oomhist_per_heap, 0, sizeof (oomhist_per_heap));
    if (!gc_done_event.CreateManualEventNoThrow(FALSE))
    {
        return 0;
    }
    gc_done_event_lock = -1;
    gc_done_event_set = false;

#ifdef DYNAMIC_HEAP_COUNT
    hchist_index_per_heap = 0;
    memset (hchist_per_heap, 0, sizeof (hchist_per_heap));

#ifdef BACKGROUND_GC
    bgc_hchist_index_per_heap = 0;
    memset (bgc_hchist_per_heap, 0, sizeof (bgc_hchist_per_heap));
#endif //BACKGROUND_GC

    if (h_number != 0)
    {
        if (!gc_idle_thread_event.CreateAutoEventNoThrow (FALSE))
        {
            return 0;
        }

#ifdef BACKGROUND_GC
        if (!bgc_idle_thread_event.CreateAutoEventNoThrow (FALSE))
        {
            return 0;
        }
#endif //BACKGROUND_GC

        dprintf (9999, ("creating idle events for h%d", h_number));
    }
#endif //DYNAMIC_HEAP_COUNT

    if (!init_dynamic_data())
    {
        return 0;
    }

    uint32_t* ct = &g_gc_card_table [card_word (card_of (g_gc_lowest_address))];
    own_card_table (ct);
    card_table = translate_card_table (ct);

    brick_table = card_table_brick_table (ct);
    highest_address = card_table_highest_address (ct);
    lowest_address = card_table_lowest_address (ct);

#ifdef CARD_BUNDLE
    card_bundle_table = translate_card_bundle_table (card_table_card_bundle_table (ct), g_gc_lowest_address);
    assert (&card_bundle_table [card_bundle_word (cardw_card_bundle (card_word (card_of (g_gc_lowest_address))))] ==
            card_table_card_bundle_table (ct));
#endif //CARD_BUNDLE

#ifdef BACKGROUND_GC
    background_saved_highest_address = nullptr;
    background_saved_lowest_address = nullptr;
    if (gc_can_use_concurrent)
        mark_array = translate_mark_array (card_table_mark_array (&g_gc_card_table[card_word (card_of (g_gc_lowest_address))]));
    else
        mark_array = NULL;
#endif //BACKGROUND_GC

#ifdef USE_REGIONS
#ifdef STRESS_REGIONS
    // Handle table APIs expect coop so we temporarily switch to coop.
    disable_preemptive (true);
    pinning_handles_for_alloc = new (nothrow) (OBJECTHANDLE[PINNING_HANDLE_INITIAL_LENGTH]);

    for (int i = 0; i < PINNING_HANDLE_INITIAL_LENGTH; i++)
    {
        pinning_handles_for_alloc[i] = g_gcGlobalHandleStore->CreateHandleOfType (0, HNDTYPE_PINNED);
    }
    enable_preemptive();
    ph_index_per_heap = 0;
    pinning_seg_interval = 2;
    num_gen0_regions = 0;
    sip_seg_interval = 2;
    sip_seg_maxgen_interval = 3;
    num_condemned_regions = 0;
#endif //STRESS_REGIONS
    end_gen0_region_space = 0;
    end_gen0_region_committed_space = 0;
    gen0_pinned_free_space = 0;
    gen0_large_chunk_found = false;
    // REGIONS PERF TODO: we should really allocate the POH regions together just so that
    // they wouldn't prevent us from coalescing free regions to form a large virtual address
    // range.
    if (!initial_make_soh_regions (__this) ||
        !initial_make_uoh_regions (loh_generation, __this) ||
        !initial_make_uoh_regions (poh_generation, __this))
    {
        return 0;
    }

#else //USE_REGIONS

    heap_segment* seg = make_initial_segment (soh_gen0, h_number, __this);
    if (!seg)
        return 0;

    FIRE_EVENT(GCCreateSegment_V1, heap_segment_mem(seg),
                              (size_t)(heap_segment_reserved (seg) - heap_segment_mem(seg)),
                              gc_etw_segment_small_object_heap);

    seg_mapping_table_add_segment (seg, __this);
#ifdef MULTIPLE_HEAPS
    assert (heap_segment_heap (seg) == __this);
#endif //MULTIPLE_HEAPS

    uint8_t*  start = heap_segment_mem (seg);

    for (int i = max_generation; i >= 0; i--)
    {
        make_generation (i, seg, start);
        start += Align (min_obj_size);
    }

    heap_segment_allocated (seg) = start;
    alloc_allocated = start;
    heap_segment_used (seg) = start - plug_skew;
    ephemeral_heap_segment = seg;

    // Create segments for the large and pinned generations
    heap_segment* lseg = make_initial_segment(loh_generation, h_number, __this);
    if (!lseg)
        return 0;

    lseg->flags |= heap_segment_flags_loh;

    FIRE_EVENT(GCCreateSegment_V1, heap_segment_mem(lseg),
                              (size_t)(heap_segment_reserved (lseg) - heap_segment_mem(lseg)),
                              gc_etw_segment_large_object_heap);

    heap_segment* pseg = make_initial_segment (poh_generation, h_number, __this);
    if (!pseg)
        return 0;

    pseg->flags |= heap_segment_flags_poh;

    FIRE_EVENT(GCCreateSegment_V1, heap_segment_mem(pseg),
                              (size_t)(heap_segment_reserved (pseg) - heap_segment_mem(pseg)),
                              gc_etw_segment_pinned_object_heap);

    seg_mapping_table_add_segment (lseg, __this);
    seg_mapping_table_add_segment (pseg, __this);

    make_generation (loh_generation, lseg, heap_segment_mem (lseg));
    make_generation (poh_generation, pseg, heap_segment_mem (pseg));

    heap_segment_allocated (lseg) = heap_segment_mem (lseg) + Align (min_obj_size, get_alignment_constant (FALSE));
    heap_segment_used (lseg) = heap_segment_allocated (lseg) - plug_skew;

    heap_segment_allocated (pseg) = heap_segment_mem (pseg) + Align (min_obj_size, get_alignment_constant (FALSE));
    heap_segment_used (pseg) = heap_segment_allocated (pseg) - plug_skew;

    for (int gen_num = 0; gen_num < total_generation_count; gen_num++)
    {
        generation*  gen = generation_of (gen_num);
        make_unused_array (generation_allocation_start (gen), Align (min_obj_size));
    }

#ifdef MULTIPLE_HEAPS
    assert (heap_segment_heap (lseg) == __this);
    assert (heap_segment_heap (pseg) == __this);
#endif //MULTIPLE_HEAPS
#endif //USE_REGIONS

#ifdef MULTIPLE_HEAPS
    //initialize the alloc context heap
    generation_alloc_context (generation_of (soh_gen0))->set_alloc_heap(vm_heap);
    generation_alloc_context (generation_of (loh_generation))->set_alloc_heap(vm_heap);
    generation_alloc_context (generation_of (poh_generation))->set_alloc_heap(vm_heap);

#endif //MULTIPLE_HEAPS

    generation_of (max_generation)->free_list_allocator = allocator(NUM_GEN2_ALIST, BASE_GEN2_ALIST_BITS, gen2_alloc_list, max_generation);
    generation_of (loh_generation)->free_list_allocator = allocator(NUM_LOH_ALIST, BASE_LOH_ALIST_BITS, loh_alloc_list);
    generation_of (poh_generation)->free_list_allocator = allocator(NUM_POH_ALIST, BASE_POH_ALIST_BITS, poh_alloc_list);

    total_alloc_bytes_soh = 0;
    total_alloc_bytes_uoh = 0;

    //needs to be done after the dynamic data has been initialized
#ifdef MULTIPLE_HEAPS
#ifdef STRESS_DYNAMIC_HEAP_COUNT
    uoh_msl_before_gc_p = false;
#endif //STRESS_DYNAMIC_HEAP_COUNT
#else //MULTIPLE_HEAPS
    allocation_running_amount = dd_min_size (dynamic_data_of (0));
#endif //!MULTIPLE_HEAPS

    fgn_maxgen_percent = 0;
    fgn_last_alloc = dd_min_size (dynamic_data_of (0));

    mark* arr = new (nothrow) (mark [MARK_STACK_INITIAL_LENGTH]);
    if (!arr)
        return 0;

    make_mark_stack(arr);

#ifdef BACKGROUND_GC
    for (int i = uoh_start_generation; i < total_generation_count; i++)
    {
        uoh_a_no_bgc[i - uoh_start_generation] = 0;
        uoh_a_bgc_marking[i - uoh_start_generation] = 0;
        uoh_a_bgc_planning[i - uoh_start_generation] = 0;
    }
#ifdef BGC_SERVO_TUNING
    bgc_maxgen_end_fl_size = 0;
#endif //BGC_SERVO_TUNING
    freeable_soh_segment = 0;
    gchist_index_per_heap = 0;
    if (gc_can_use_concurrent)
    {
        uint8_t** b_arr = new (nothrow) (uint8_t * [MARK_STACK_INITIAL_LENGTH]);
        if (!b_arr)
            return 0;

        make_background_mark_stack(b_arr);
    }
#endif //BACKGROUND_GC

#ifndef USE_REGIONS
    ephemeral_low = generation_allocation_start(generation_of(max_generation - 1));
    ephemeral_high = heap_segment_reserved(ephemeral_heap_segment);
#endif //!USE_REGIONS

    if (heap_number == 0)
    {
        stomp_write_barrier_initialize(
#if defined(USE_REGIONS)
            ephemeral_low, ephemeral_high,
            map_region_to_generation_skewed, (uint8_t)min_segment_size_shr
#elif defined(MULTIPLE_HEAPS)
            reinterpret_cast<uint8_t*>(1), reinterpret_cast<uint8_t*>(~0)
#else
            ephemeral_low, ephemeral_high
#endif //MULTIPLE_HEAPS || USE_REGIONS
        );
    }

#ifdef MULTIPLE_HEAPS
    if (!create_gc_thread ())
        return 0;

#endif //MULTIPLE_HEAPS

#ifdef FEATURE_PREMORTEM_FINALIZATION
    HRESULT hr = AllocateCFinalize(&finalize_queue);
    if (FAILED(hr))
        return 0;
#endif // FEATURE_PREMORTEM_FINALIZATION

#ifdef USE_REGIONS
#ifdef MULTIPLE_HEAPS
    min_fl_list = 0;
    num_fl_items_rethreaded_stage2 = 0;
    free_list_space_per_heap = nullptr;
#endif //MULTIPLE_HEAPS
#else //USE_REGIONS
    max_free_space_items = MAX_NUM_FREE_SPACES;

    bestfit_seg = new (nothrow) seg_free_spaces (heap_number);

    if (!bestfit_seg)
    {
        return 0;
    }

    if (!bestfit_seg->alloc())
    {
        return 0;
    }
#endif //USE_REGIONS

    last_gc_before_oom = FALSE;

    sufficient_gen0_space_p = FALSE;

#ifdef MULTIPLE_HEAPS

#ifdef HEAP_ANALYZE

    heap_analyze_success = TRUE;

    internal_root_array  = 0;

    internal_root_array_index = 0;

    internal_root_array_length = initial_internal_roots;

    current_obj          = 0;

    current_obj_size     = 0;

#endif //HEAP_ANALYZE

#endif // MULTIPLE_HEAPS

#ifdef BACKGROUND_GC
    bgc_thread_id.Clear();

    if (!create_bgc_thread_support())
    {
        return 0;
    }

    bgc_alloc_lock = new (nothrow) exclusive_sync;
    if (!bgc_alloc_lock)
    {
        return 0;
    }

    bgc_alloc_lock->init();
    bgc_thread_running = 0;
    bgc_thread = 0;
    bgc_threads_timeout_cs.Initialize();
    current_bgc_state = bgc_not_in_process;
    background_soh_alloc_count = 0;
    bgc_overflow_count = 0;
    for (int i = uoh_start_generation; i < total_generation_count; i++)
    {
        end_uoh_size[i - uoh_start_generation] = dd_min_size (dynamic_data_of (i));
    }

    current_sweep_pos = 0;
#ifdef DOUBLY_LINKED_FL
    current_sweep_seg = 0;
#endif //DOUBLY_LINKED_FL

#endif //BACKGROUND_GC

#ifdef GC_CONFIG_DRIVEN
    memset(interesting_data_per_heap, 0, sizeof (interesting_data_per_heap));
    memset(compact_reasons_per_heap, 0, sizeof (compact_reasons_per_heap));
    memset(expand_mechanisms_per_heap, 0, sizeof (expand_mechanisms_per_heap));
    memset(interesting_mechanism_bits_per_heap, 0, sizeof (interesting_mechanism_bits_per_heap));
#endif //GC_CONFIG_DRIVEN

    return 1;
}

void
gc_heap::destroy_semi_shared()
{
//TODO: will need to move this to per heap
//#ifdef BACKGROUND_GC
//    if (c_mark_list)
//        delete c_mark_list;
//#endif //BACKGROUND_GC

    if (g_mark_list)
        delete[] g_mark_list;

#ifdef FEATURE_BASICFREEZE
    //destroy the segment map
    seg_table->delete_sorted_table();
    delete[] (char*)seg_table;
#endif //FEATURE_BASICFREEZE
}

void
gc_heap::self_destroy()
{
#ifdef BACKGROUND_GC
    kill_gc_thread();
#endif //BACKGROUND_GC

    if (gc_done_event.IsValid())
    {
        gc_done_event.CloseEvent();
    }

    // destroy every segment
    for (int i = get_start_generation_index(); i < total_generation_count; i++)
    {
        heap_segment* seg = heap_segment_rw (generation_start_segment (generation_of (i)));
        _ASSERTE(seg != NULL);

        while (seg)
        {
            heap_segment* next_seg = heap_segment_next_rw (seg);
            delete_heap_segment (seg);
            seg = next_seg;
        }
    }

    // get rid of the card table
    release_card_table (card_table);

    // destroy the mark stack
    delete[] mark_stack_array;

#ifdef FEATURE_PREMORTEM_FINALIZATION
    if (finalize_queue)
        delete finalize_queue;
#endif // FEATURE_PREMORTEM_FINALIZATION
}

void
gc_heap::destroy_gc_heap(gc_heap* heap)
{
    heap->self_destroy();
    delete heap;
}

// Destroys resources owned by gc. It is assumed that a last GC has been performed and that
// the finalizer queue has been drained.
void gc_heap::shutdown_gc()
{
    destroy_semi_shared();

#ifdef MULTIPLE_HEAPS
    //delete the heaps array
    delete[] g_heaps;
    destroy_thread_support();
    n_heaps = 0;
#endif //MULTIPLE_HEAPS
    //destroy seg_manager

    destroy_initial_memory();

    GCToOSInterface::Shutdown();
}

inline
BOOL gc_heap::size_fit_p (size_t size REQD_ALIGN_AND_OFFSET_DCL, uint8_t* alloc_pointer, uint8_t* alloc_limit,
                          uint8_t* old_loc, int use_padding)
{
    BOOL already_padded = FALSE;
#ifdef SHORT_PLUGS
    if ((old_loc != 0) && (use_padding & USE_PADDING_FRONT))
    {
        alloc_pointer = alloc_pointer + Align (min_obj_size);
        already_padded = TRUE;
    }
#endif //SHORT_PLUGS

    if (!((old_loc == 0) || same_large_alignment_p (old_loc, alloc_pointer)))
        size = size + switch_alignment_size (already_padded);

#ifdef FEATURE_STRUCTALIGN
    alloc_pointer = StructAlign(alloc_pointer, requiredAlignment, alignmentOffset);
#endif // FEATURE_STRUCTALIGN

    // in allocate_in_condemned_generation we can have this when we
    // set the alloc_limit to plan_allocated which could be less than
    // alloc_ptr
    if (alloc_limit < alloc_pointer)
    {
        return FALSE;
    }

    if (old_loc != 0)
    {
        return (((size_t)(alloc_limit - alloc_pointer) >= (size + ((use_padding & USE_PADDING_TAIL)? Align(min_obj_size) : 0)))
#ifdef SHORT_PLUGS
                ||((!(use_padding & USE_PADDING_FRONT)) && ((alloc_pointer + size) == alloc_limit))
#else //SHORT_PLUGS
                ||((alloc_pointer + size) == alloc_limit)
#endif //SHORT_PLUGS
            );
    }
    else
    {
        assert (size == Align (min_obj_size));
        return ((size_t)(alloc_limit - alloc_pointer) >= size);
    }
}

inline
BOOL gc_heap::a_size_fit_p (size_t size, uint8_t* alloc_pointer, uint8_t* alloc_limit,
                            int align_const)
{
    // We could have run into cases where this is true when alloc_allocated is the
    // the same as the seg committed.
    if (alloc_limit < alloc_pointer)
    {
        return FALSE;
    }

    return ((size_t)(alloc_limit - alloc_pointer) >= (size + Align(min_obj_size, align_const)));
}

// Grow by committing more pages
BOOL gc_heap::grow_heap_segment (heap_segment* seg, uint8_t* high_address, bool* hard_limit_exceeded_p)
{
    assert (high_address <= heap_segment_reserved (seg));

    if (hard_limit_exceeded_p)
        *hard_limit_exceeded_p = false;

    //return 0 if we are at the end of the segment.
    if (align_on_page (high_address) > heap_segment_reserved (seg))
        return FALSE;

    if (high_address <= heap_segment_committed (seg))
        return TRUE;

    size_t c_size = align_on_page ((size_t)(high_address - heap_segment_committed (seg)));
    c_size = max (c_size, commit_min_th);
    c_size = min (c_size, (size_t)(heap_segment_reserved (seg) - heap_segment_committed (seg)));

    if (c_size == 0)
        return FALSE;

    STRESS_LOG2(LF_GC, LL_INFO10000,
                "Growing heap_segment: %zx high address: %zx\n",
                (size_t)seg, (size_t)high_address);

    bool ret = virtual_commit (heap_segment_committed (seg), c_size, heap_segment_oh (seg), heap_number, hard_limit_exceeded_p);
    if (ret)
    {
        heap_segment_committed (seg) += c_size;

        STRESS_LOG1(LF_GC, LL_INFO10000, "New commit: %zx\n",
                    (size_t)heap_segment_committed (seg));

        assert (heap_segment_committed (seg) <= heap_segment_reserved (seg));
        assert (high_address <= heap_segment_committed (seg));

#if defined(MULTIPLE_HEAPS) && !defined(USE_REGIONS)
        // we should never increase committed beyond decommit target when gradual
        // decommit is in progress - if we do, this means commit and decommit are
        // going on at the same time.
        assert (!gradual_decommit_in_progress_p ||
                (seg != ephemeral_heap_segment) ||
                (heap_segment_committed (seg) <= heap_segment_decommit_target (seg)));
#endif //MULTIPLE_HEAPS && !USE_REGIONS
    }

    return !!ret;
}

inline
int gc_heap::grow_heap_segment (heap_segment* seg, uint8_t* allocated, uint8_t* old_loc, size_t size,
                                BOOL pad_front_p  REQD_ALIGN_AND_OFFSET_DCL)
{
    BOOL already_padded = FALSE;
#ifdef SHORT_PLUGS
    if ((old_loc != 0) && pad_front_p)
    {
        allocated = allocated + Align (min_obj_size);
        already_padded = TRUE;
    }
#endif //SHORT_PLUGS

    if (!((old_loc == 0) || same_large_alignment_p (old_loc, allocated)))
        size += switch_alignment_size (already_padded);

#ifdef FEATURE_STRUCTALIGN
    size_t pad = ComputeStructAlignPad(allocated, requiredAlignment, alignmentOffset);
    return grow_heap_segment (seg, allocated + pad + size);
#else // FEATURE_STRUCTALIGN
    return grow_heap_segment (seg, allocated + size);
#endif // FEATURE_STRUCTALIGN
}

// thread this object to the front of gen's free list and update stats.
void gc_heap::thread_free_item_front (generation* gen, uint8_t* free_start, size_t free_size)
{
    make_unused_array (free_start, free_size);
    generation_free_list_space (gen) += free_size;
    generation_allocator(gen)->thread_item_front (free_start, free_size);
    add_gen_free (gen->gen_num, free_size);

    if (gen->gen_num == max_generation)
    {
        dprintf (2, ("AO h%d: gen2F+: %p(%zd)->%zd, FO: %zd",
            heap_number, free_start, free_size,
            generation_free_list_space (gen), generation_free_obj_space (gen)));
    }
}

#ifdef DOUBLY_LINKED_FL
void gc_heap::thread_item_front_added (generation* gen, uint8_t* free_start, size_t free_size)
{
    make_unused_array (free_start, free_size);
    generation_free_list_space (gen) += free_size;
    int bucket_index = generation_allocator(gen)->thread_item_front_added (free_start, free_size);

    if (gen->gen_num == max_generation)
    {
        dprintf (2, ("AO [h%d] gen2FL+: %p(%zd)->%zd",
            heap_number, free_start, free_size, generation_free_list_space (gen)));
    }

    add_gen_free (gen->gen_num, free_size);
}
#endif //DOUBLY_LINKED_FL

// this is for free objects that are not on the free list; also update stats.
void gc_heap::make_free_obj (generation* gen, uint8_t* free_start, size_t free_size)
{
    make_unused_array (free_start, free_size);
    generation_free_obj_space (gen) += free_size;

    if (gen->gen_num == max_generation)
    {
        dprintf (2, ("AO [h%d] gen2FO+: %p(%zd)->%zd",
            heap_number, free_start, free_size, generation_free_obj_space (gen)));
    }
}

//used only in older generation allocation (i.e during gc).
void gc_heap::adjust_limit (uint8_t* start, size_t limit_size, generation* gen)
{
    dprintf (3, ("gc Expanding segment allocation"));
    heap_segment* seg = generation_allocation_segment (gen);
    if ((generation_allocation_limit (gen) != start) || (start != heap_segment_plan_allocated (seg)))
    {
        if (generation_allocation_limit (gen) == heap_segment_plan_allocated (seg))
        {
            assert (generation_allocation_pointer (gen) >= heap_segment_mem (seg));
            assert (generation_allocation_pointer (gen) <= heap_segment_committed (seg));
            heap_segment_plan_allocated (generation_allocation_segment (gen)) = generation_allocation_pointer (gen);
        }
        else
        {
            uint8_t*  hole = generation_allocation_pointer (gen);
            size_t  size = (generation_allocation_limit (gen) - generation_allocation_pointer (gen));

            if (size != 0)
            {
                dprintf (3, ("filling up hole: %p, size %zx", hole, size));
                size_t allocated_size = generation_allocation_pointer (gen) - generation_allocation_context_start_region (gen);
#ifdef DOUBLY_LINKED_FL
                if (gen->gen_num == max_generation)
                {
                    // For BGC since we need to thread the max_gen's free list as a doubly linked list we need to
                    // preserve 5 ptr-sized words: SB | MT | Len | Next | Prev
                    // This means we cannot simply make a filler free object right after what's allocated in this
                    // alloc context if that's < 5-ptr sized.
                    //
                    if (allocated_size <= min_free_item_no_prev)
                    {
                        // We can't make the free object just yet. Need to record the size.
                        size_t* filler_free_obj_size_location = (size_t*)(generation_allocation_context_start_region (gen) +
                                                                          min_free_item_no_prev);
                        size_t filler_free_obj_size = 0;
                        if (size >= (Align (min_free_list) + Align (min_obj_size)))
                        {

                            filler_free_obj_size = Align (min_obj_size);
                            size_t fl_size = size - filler_free_obj_size;
                            thread_item_front_added (gen, (hole + filler_free_obj_size), fl_size);
                        }
                        else
                        {
                            filler_free_obj_size = size;
                        }

                        generation_free_obj_space (gen) += filler_free_obj_size;
                        *filler_free_obj_size_location = filler_free_obj_size;
                        uint8_t* old_loc = generation_last_free_list_allocated (gen);

                        // check if old_loc happens to be in a saved plug_and_gap with a pinned plug after it
                        uint8_t* saved_plug_and_gap = nullptr;
                        if (saved_pinned_plug_index != INVALID_SAVED_PINNED_PLUG_INDEX)
                        {
                            saved_plug_and_gap = pinned_plug (pinned_plug_of (saved_pinned_plug_index)) - sizeof(plug_and_gap);

                            dprintf (3333, ("[h%d] sppi: %zd mtos: %zd old_loc: %p pp: %p(%zd) offs: %zd",
                                heap_number,
                                saved_pinned_plug_index,
                                mark_stack_tos,
                                old_loc,
                                pinned_plug (pinned_plug_of (saved_pinned_plug_index)),
                                pinned_len (pinned_plug_of (saved_pinned_plug_index)),
                                old_loc - saved_plug_and_gap));
                        }
                        size_t offset = old_loc - saved_plug_and_gap;
                        if (offset < sizeof(gap_reloc_pair))
                        {
                            // the object at old_loc must be at least min_obj_size
                            assert (offset <= sizeof(plug_and_gap) - min_obj_size);

                            // if so, set the bit in the saved info instead
                            set_free_obj_in_compact_bit ((uint8_t*)(&pinned_plug_of (saved_pinned_plug_index)->saved_pre_plug_reloc) + offset);
                        }
                        else
                        {
#ifdef _DEBUG
                            // check this looks like an object
                            header(old_loc)->Validate();
#endif //_DEBUG
                            set_free_obj_in_compact_bit (old_loc);
                        }

                        dprintf (3333, ("[h%d] ac: %p->%p((%zd < %zd), Pset %p s->%zd", heap_number,
                            generation_allocation_context_start_region (gen), generation_allocation_pointer (gen),
                            allocated_size, min_free_item_no_prev, filler_free_obj_size_location, filler_free_obj_size));
                    }
                    else
                    {
                        if (size >= Align (min_free_list))
                        {
                            thread_item_front_added (gen, hole, size);
                        }
                        else
                        {
                            make_free_obj (gen, hole, size);
                        }
                    }
                }
                else
#endif //DOUBLY_LINKED_FL
                {
                    // TODO: this should be written the same way as the above, ie, it should check
                    // allocated_size first, but it doesn't need to do MAKE_FREE_OBJ_IN_COMPACT
                    // related things.
                    if (size >= Align (min_free_list))
                    {
                        if (allocated_size < min_free_item_no_prev)
                        {
                            if (size >= (Align (min_free_list) + Align (min_obj_size)))
                            {
                                //split hole into min obj + threadable free item
                                make_free_obj (gen, hole, min_obj_size);
                                thread_free_item_front (gen, (hole + Align (min_obj_size)),
                                    (size - Align (min_obj_size)));
                            }
                            else
                            {
                                dprintf (3, ("allocated size too small, can't put back rest on free list %zx",
                                    allocated_size));
                                make_free_obj (gen, hole, size);
                            }
                        }
                        else
                        {
                            dprintf (3, ("threading hole in front of free list"));
                            thread_free_item_front (gen, hole, size);
                        }
                    }
                    else
                    {
                        make_free_obj (gen, hole, size);
                    }
                }
            }
        }
        generation_allocation_pointer (gen) = start;
        generation_allocation_context_start_region (gen) = start;
    }
    generation_allocation_limit (gen) = (start + limit_size);
}

void verify_mem_cleared (uint8_t* start, size_t size)
{
    if (!Aligned (size))
    {
        FATAL_GC_ERROR();
    }

    PTR_PTR curr_ptr = (PTR_PTR) start;
    for (size_t i = 0; i < size / sizeof(PTR_PTR); i++)
    {
        if (*(curr_ptr++) != 0)
        {
            FATAL_GC_ERROR();
        }
    }
}

#if defined (VERIFY_HEAP) && defined (BACKGROUND_GC)
void gc_heap::set_batch_mark_array_bits (uint8_t* start, uint8_t* end)
{
    size_t start_mark_bit = mark_bit_of (start);
    size_t end_mark_bit = mark_bit_of (end);
    unsigned int startbit = mark_bit_bit (start_mark_bit);
    unsigned int endbit = mark_bit_bit (end_mark_bit);
    size_t startwrd = mark_bit_word (start_mark_bit);
    size_t endwrd = mark_bit_word (end_mark_bit);

    dprintf (3, ("Setting all mark array bits between [%zx:%zx-[%zx:%zx",
        (size_t)start, (size_t)start_mark_bit,
        (size_t)end, (size_t)end_mark_bit));

    unsigned int firstwrd = ~(lowbits (~0, startbit));
    unsigned int lastwrd = ~(highbits (~0, endbit));

    if (startwrd == endwrd)
    {
        unsigned int wrd = firstwrd & lastwrd;
        mark_array[startwrd] |= wrd;
        return;
    }

    // set the first mark word.
    if (startbit)
    {
        mark_array[startwrd] |= firstwrd;
        startwrd++;
    }

    for (size_t wrdtmp = startwrd; wrdtmp < endwrd; wrdtmp++)
    {
        mark_array[wrdtmp] = ~(unsigned int)0;
    }

    // set the last mark word.
    if (endbit)
    {
        mark_array[endwrd] |= lastwrd;
    }
}

// makes sure that the mark array bits between start and end are 0.
void gc_heap::check_batch_mark_array_bits (uint8_t* start, uint8_t* end)
{
    size_t start_mark_bit = mark_bit_of (start);
    size_t end_mark_bit = mark_bit_of (end);
    unsigned int startbit = mark_bit_bit (start_mark_bit);
    unsigned int endbit = mark_bit_bit (end_mark_bit);
    size_t startwrd = mark_bit_word (start_mark_bit);
    size_t endwrd = mark_bit_word (end_mark_bit);

    //dprintf (3, ("Setting all mark array bits between [%zx:%zx-[%zx:%zx",
    //    (size_t)start, (size_t)start_mark_bit,
    //    (size_t)end, (size_t)end_mark_bit));

    unsigned int firstwrd = ~(lowbits (~0, startbit));
    unsigned int lastwrd = ~(highbits (~0, endbit));

    if (startwrd == endwrd)
    {
        unsigned int wrd = firstwrd & lastwrd;
        if (mark_array[startwrd] & wrd)
        {
            dprintf  (1, ("The %x portion of mark bits at 0x%zx:0x%x(addr: 0x%p) were not cleared",
                            wrd, startwrd,
                            mark_array [startwrd], mark_word_address (startwrd)));
            FATAL_GC_ERROR();
        }
        return;
    }

    // set the first mark word.
    if (startbit)
    {
        if (mark_array[startwrd] & firstwrd)
        {
            dprintf  (1, ("The %x portion of mark bits at 0x%zx:0x%x(addr: 0x%p) were not cleared",
                            firstwrd, startwrd,
                            mark_array [startwrd], mark_word_address (startwrd)));
            FATAL_GC_ERROR();
        }

        startwrd++;
    }

    for (size_t wrdtmp = startwrd; wrdtmp < endwrd; wrdtmp++)
    {
        if (mark_array[wrdtmp])
        {
            dprintf  (1, ("The mark bits at 0x%zx:0x%x(addr: 0x%p) were not cleared",
                            wrdtmp,
                            mark_array [wrdtmp], mark_word_address (wrdtmp)));
            FATAL_GC_ERROR();
        }
    }

    // set the last mark word.
    if (endbit)
    {
        if (mark_array[endwrd] & lastwrd)
        {
            dprintf  (1, ("The %x portion of mark bits at 0x%x:0x%x(addr: 0x%p) were not cleared",
                            lastwrd, lastwrd,
                            mark_array [lastwrd], mark_word_address (lastwrd)));
            FATAL_GC_ERROR();
        }
    }
}
#endif //VERIFY_HEAP && BACKGROUND_GC

allocator::allocator (unsigned int num_b, int fbb, alloc_list* b, int gen)
{
    assert (num_b < MAX_BUCKET_COUNT);
    num_buckets = num_b;
    first_bucket_bits = fbb;
    buckets = b;
    gen_number = gen;
}

alloc_list& allocator::alloc_list_of (unsigned int bn)
{
    assert (bn < num_buckets);
    if (bn == 0)
        return first_bucket;
    else
        return buckets [bn-1];
}

size_t& allocator::alloc_list_damage_count_of (unsigned int bn)
{
    assert (bn < num_buckets);
    if (bn == 0)
        return first_bucket.alloc_list_damage_count();
    else
        return buckets [bn-1].alloc_list_damage_count();
}

void allocator::unlink_item (unsigned int bn, uint8_t* item, uint8_t* prev_item, BOOL use_undo_p)
{
    alloc_list* al = &alloc_list_of (bn);
    uint8_t* next_item = free_list_slot(item);

#ifdef DOUBLY_LINKED_FL
    // if repair_list is TRUE yet use_undo_p is FALSE, it means we do need to make sure
    // this item does not look like it's on the free list as we will not have a chance to
    // do that later.
    BOOL repair_list = !discard_if_no_fit_p ();
#endif //DOUBLY_LINKED_FL

    if (prev_item)
    {
        if (use_undo_p && (free_list_undo (prev_item) == UNDO_EMPTY))
        {
            assert (item == free_list_slot (prev_item));
            free_list_undo (prev_item) = item;
            alloc_list_damage_count_of (bn)++;
        }

        free_list_slot (prev_item) = next_item;
    }
    else
    {
        al->alloc_list_head() = next_item;
    }
    if (al->alloc_list_tail() == item)
    {
        al->alloc_list_tail() = prev_item;
    }

#ifdef DOUBLY_LINKED_FL
    if (repair_list)
    {
        if (!use_undo_p)
        {
            free_list_prev (item) = PREV_EMPTY;
        }
    }

    if (gen_number == max_generation)
    {
        dprintf (3, ("[g%2d, b%2d]UL: %p->%p->%p (h: %p, t: %p)",
            gen_number, bn, free_list_prev (item), item, free_list_slot (item),
            al->alloc_list_head(), al->alloc_list_tail()));
        dprintf (3, ("[g%2d, b%2d]UL: exit, h->N: %p, h->P: %p, t->N: %p, t->P: %p",
            gen_number, bn,
            (al->alloc_list_head() ? free_list_slot (al->alloc_list_head()) : 0),
            (al->alloc_list_head() ? free_list_prev (al->alloc_list_head()) : 0),
            (al->alloc_list_tail() ? free_list_slot (al->alloc_list_tail()) : 0),
            (al->alloc_list_tail() ? free_list_prev (al->alloc_list_tail()) : 0)));
    }
#endif //DOUBLY_LINKED_FL

    if (al->alloc_list_head() == 0)
    {
        assert (al->alloc_list_tail() == 0);
    }
}

#ifdef DOUBLY_LINKED_FL
void allocator::unlink_item_no_undo (unsigned int bn, uint8_t* item)
{
    alloc_list* al = &alloc_list_of (bn);

    uint8_t* next_item = free_list_slot (item);
    uint8_t* prev_item = free_list_prev (item);

#ifdef FL_VERIFICATION
    {
        uint8_t* start = al->alloc_list_head();
        BOOL found_p = FALSE;
        while (start)
        {
            if (start == item)
            {
                found_p = TRUE;
                break;
            }

            start = free_list_slot (start);
        }

        if (!found_p)
        {
            dprintf (1, ("could not find %p in b%d!!!", item, a_l_number));
            FATAL_GC_ERROR();
        }
    }
#endif //FL_VERIFICATION

    if (prev_item)
    {
        free_list_slot (prev_item) = next_item;
    }
    else
    {
        al->alloc_list_head() = next_item;
    }

    if (next_item)
    {
        free_list_prev (next_item) = prev_item;
    }

    if (al->alloc_list_tail() == item)
    {
        al->alloc_list_tail() = prev_item;
    }

    free_list_prev (item) = PREV_EMPTY;

    if (gen_number == max_generation)
    {
        dprintf (3333, ("[g%2d, b%2d]ULN: %p->%p->%p (h: %p, t: %p)",
            gen_number, bn, free_list_prev (item), item, free_list_slot (item),
            al->alloc_list_head(), al->alloc_list_tail()));
        dprintf (3333, ("[g%2d, b%2d]ULN: exit: h->N: %p, h->P: %p, t->N: %p, t->P: %p",
            gen_number, bn,
            (al->alloc_list_head() ? free_list_slot (al->alloc_list_head()) : 0),
            (al->alloc_list_head() ? free_list_prev (al->alloc_list_head()) : 0),
            (al->alloc_list_tail() ? free_list_slot (al->alloc_list_tail()) : 0),
            (al->alloc_list_tail() ? free_list_prev (al->alloc_list_tail()) : 0)));
    }
}

void allocator::unlink_item_no_undo (uint8_t* item, size_t size)
{
    unsigned int bn = first_suitable_bucket (size);
    unlink_item_no_undo (bn, item);
}

void allocator::unlink_item_no_undo_added (unsigned int bn, uint8_t* item, uint8_t* previous_item)
{
    alloc_list* al = &alloc_list_of (bn);

    uint8_t* next_item = free_list_slot (item);
    uint8_t* prev_item = free_list_prev (item);

    assert (prev_item == previous_item);

    if (prev_item)
    {
        free_list_slot (prev_item) = next_item;
    }
    else
    {
        al->added_alloc_list_head() = next_item;
    }

    if (next_item)
    {
        free_list_prev (next_item) = prev_item;
    }

    if (al->added_alloc_list_tail() == item)
    {
        al->added_alloc_list_tail() = prev_item;
    }

    free_list_prev (item) = PREV_EMPTY;

    if (gen_number == max_generation)
    {
        dprintf (3333, ("[g%2d, b%2d]ULNA: %p->%p->%p (h: %p, t: %p)",
            gen_number, bn, free_list_prev (item), item, free_list_slot (item),
            al->added_alloc_list_head(), al->added_alloc_list_tail()));
        dprintf (3333, ("[g%2d, b%2d]ULNA: exit: h->N: %p, h->P: %p, t->N: %p, t->P: %p",
            gen_number, bn,
            (al->added_alloc_list_head() ? free_list_slot (al->added_alloc_list_head()) : 0),
            (al->added_alloc_list_head() ? free_list_prev (al->added_alloc_list_head()) : 0),
            (al->added_alloc_list_tail() ? free_list_slot (al->added_alloc_list_tail()) : 0),
            (al->added_alloc_list_tail() ? free_list_prev (al->added_alloc_list_tail()) : 0)));
    }
}

int allocator::thread_item_front_added (uint8_t* item, size_t size)
{
    unsigned int a_l_number = first_suitable_bucket (size);
    alloc_list* al = &alloc_list_of (a_l_number);

    free_list_slot (item) = al->added_alloc_list_head();
    free_list_prev (item) = 0;
    // this list's UNDO is not useful.
    free_list_undo (item) = UNDO_EMPTY;

    if (al->added_alloc_list_head() != 0)
    {
        free_list_prev (al->added_alloc_list_head()) = item;
    }

    al->added_alloc_list_head() = item;

    if (al->added_alloc_list_tail() == 0)
    {
        al->added_alloc_list_tail() = item;
    }

    if (gen_number == max_generation)
    {
        dprintf (3333, ("[g%2d, b%2d]TFFA: exit: %p->%p->%p (h: %p, t: %p)",
            gen_number, a_l_number,
            free_list_prev (item), item, free_list_slot (item),
            al->added_alloc_list_head(), al->added_alloc_list_tail()));
        dprintf (3333, ("[g%2d, b%2d]TFFA: h->N: %p, h->P: %p, t->N: %p, t->P: %p",
            gen_number, a_l_number,
            (al->added_alloc_list_head() ? free_list_slot (al->added_alloc_list_head()) : 0),
            (al->added_alloc_list_head() ? free_list_prev (al->added_alloc_list_head()) : 0),
            (al->added_alloc_list_tail() ? free_list_slot (al->added_alloc_list_tail()) : 0),
            (al->added_alloc_list_tail() ? free_list_prev (al->added_alloc_list_tail()) : 0)));
    }

    return a_l_number;
}
#endif //DOUBLY_LINKED_FL

#ifdef DYNAMIC_HEAP_COUNT
// This counts the total fl items, and print out the ones whose heap != this_hp
void allocator::count_items (gc_heap* this_hp, size_t* fl_items_count, size_t* fl_items_for_oh_count)
{
    uint64_t start_us = GetHighPrecisionTimeStamp();
    uint64_t end_us = 0;

    int align_const = get_alignment_constant (gen_number == max_generation);
    size_t num_fl_items = 0;
    // items whose heap != this_hp
    size_t num_fl_items_for_oh = 0;

    for (unsigned int i = 0; i < num_buckets; i++)
    {
        uint8_t* free_item = alloc_list_head_of (i);
        while (free_item)
        {
            assert (((CObjectHeader*)free_item)->IsFree());

            num_fl_items++;
            // Get the heap its region belongs to see if we need to put it back.
            heap_segment* region = gc_heap::region_of (free_item);
            dprintf (3, ("b#%2d FL %Ix region %Ix heap %d -> %d",
                i, free_item, (size_t)region, this_hp->heap_number, region->heap->heap_number));
            if (region->heap != this_hp)
            {
                num_fl_items_for_oh++;
            }

            free_item = free_list_slot (free_item);
        }
    }

    end_us = GetHighPrecisionTimeStamp();
    dprintf (3, ("total - %Id items out of %Id items are from a different heap in %I64d us",
        num_fl_items_for_oh, num_fl_items, (end_us - start_us)));

    *fl_items_count = num_fl_items;
    *fl_items_for_oh_count = num_fl_items_for_oh;
}

#ifdef DOUBLY_LINKED_FL
void min_fl_list_info::thread_item (uint8_t* item)
{
    free_list_slot (item) = 0;
    free_list_undo (item) = UNDO_EMPTY;
    assert (item != head);

    free_list_prev (item) = tail;

    if (head == 0)
    {
        head = item;
    }
    else
    {
        assert ((free_list_slot(head) != 0) || (tail == head));
        assert (item != tail);
        assert (free_list_slot(tail) == 0);

        free_list_slot (tail) = item;
    }

    tail = item;
}
#endif //DOUBLY_LINKED_FL

void min_fl_list_info::thread_item_no_prev (uint8_t* item)
{
    free_list_slot (item) = 0;
    free_list_undo (item) = UNDO_EMPTY;
    assert (item != head);

    if (head == 0)
    {
        head = item;
    }
    else
    {
        assert ((free_list_slot(head) != 0) || (tail == head));
        assert (item != tail);
        assert (free_list_slot(tail) == 0);

        free_list_slot (tail) = item;
    }

    tail = item;
}

// the min_fl_list array is arranged as chunks of n_heaps min_fl_list_info, the 1st chunk corresponds to the 1st bucket,
// and so on.
void allocator::rethread_items (size_t* num_total_fl_items, size_t* num_total_fl_items_rethreaded, gc_heap* current_heap,
                                min_fl_list_info* min_fl_list, size_t *free_list_space_per_heap, int num_heaps)
{
    uint64_t start_us = GetHighPrecisionTimeStamp();
    uint64_t end_us = 0;

    int align_const = get_alignment_constant (gen_number == max_generation);
    size_t num_fl_items = 0;
    size_t num_fl_items_rethreaded = 0;

    assert (num_buckets <= MAX_BUCKET_COUNT);

    for (unsigned int i = 0; i < num_buckets; i++)
    {
        // Get to the portion that corresponds to beginning of this bucket. We will be filling in entries for heaps
        // we can find FL items for.
        min_fl_list_info* current_bucket_min_fl_list = min_fl_list + (i * num_heaps);

        uint8_t* free_item = alloc_list_head_of (i);
        uint8_t* prev_item = nullptr;
        while (free_item)
        {
            assert (((CObjectHeader*)free_item)->IsFree());

            num_fl_items++;
            // Get the heap its region belongs to see if we need to put it back.
            heap_segment* region = gc_heap::region_of (free_item);
            dprintf (3, ("b#%2d FL %Ix region %Ix heap %d -> %d",
                i, free_item, (size_t)region, current_heap->heap_number, region->heap->heap_number));
            // need to keep track of heap and only check if it's not from our heap!!
            if (region->heap != current_heap)
            {
                num_fl_items_rethreaded++;

                size_t size_o = Align(size (free_item), align_const);
                uint8_t* next_item = free_list_slot (free_item);

                int hn = region->heap->heap_number;
#ifdef DOUBLY_LINKED_FL
                if (is_doubly_linked_p())
                {
                    unlink_item_no_undo (free_item, size_o);
                    current_bucket_min_fl_list[hn].thread_item (free_item);
                }
                else
#endif //DOUBLY_LINKED_FL
                {
                    unlink_item (i, free_item, prev_item, FALSE);
                    current_bucket_min_fl_list[hn].thread_item_no_prev (free_item);
                }
                free_list_space_per_heap[hn] += size_o;

                free_item = next_item;
            }
            else
            {
                prev_item = free_item;
                free_item = free_list_slot (free_item);
            }
        }
    }

    end_us = GetHighPrecisionTimeStamp();
    dprintf (8888, ("h%d total %Id items rethreaded out of %Id items in %I64d us (%I64dms)",
        current_heap->heap_number, num_fl_items_rethreaded, num_fl_items, (end_us - start_us), ((end_us - start_us) / 1000)));

    (*num_total_fl_items) += num_fl_items;
    (*num_total_fl_items_rethreaded) += num_fl_items_rethreaded;
}

// merge buckets from min_fl_list to their corresponding buckets to this FL.
void allocator::merge_items (gc_heap* current_heap, int to_num_heaps, int from_num_heaps)
{
    int this_hn = current_heap->heap_number;

    for (unsigned int i = 0; i < num_buckets; i++)
    {
        alloc_list* al = &alloc_list_of (i);
        uint8_t*& head = al->alloc_list_head ();
        uint8_t*& tail = al->alloc_list_tail ();

        for (int other_hn = 0; other_hn < from_num_heaps; other_hn++)
        {
            min_fl_list_info* current_bucket_min_fl_list = gc_heap::g_heaps[other_hn]->min_fl_list + (i * to_num_heaps);

            // get the fl corresponding to the heap we want to merge it onto.
            min_fl_list_info* current_heap_bucket_min_fl_list = &current_bucket_min_fl_list[this_hn];

            uint8_t* head_other_heap = current_heap_bucket_min_fl_list->head;

            if (head_other_heap)
            {
#ifdef DOUBLY_LINKED_FL
                if (is_doubly_linked_p())
                {
                    free_list_prev (head_other_heap) = tail;
                }
#endif //DOUBLY_LINKED_FL

                uint8_t* saved_head = head;
                uint8_t* saved_tail = tail;

                if (head)
                {
                    free_list_slot (tail) = head_other_heap;
                }
                else
                {
                    head = head_other_heap;
                }

                tail = current_heap_bucket_min_fl_list->tail;
            }
        }
    }
}
#endif //DYNAMIC_HEAP_COUNT

void allocator::clear()
{
    for (unsigned int i = 0; i < num_buckets; i++)
    {
        alloc_list_head_of (i) = 0;
        alloc_list_tail_of (i) = 0;
    }
}

//always thread to the end.
void allocator::thread_item (uint8_t* item, size_t size)
{
    unsigned int a_l_number = first_suitable_bucket (size);
    alloc_list* al = &alloc_list_of (a_l_number);
    uint8_t*& head = al->alloc_list_head();
    uint8_t*& tail = al->alloc_list_tail();

    if (al->alloc_list_head() == 0)
    {
        assert (al->alloc_list_tail() == 0);
    }

    free_list_slot (item) = 0;
    free_list_undo (item) = UNDO_EMPTY;
    assert (item != head);

#ifdef DOUBLY_LINKED_FL
    if (gen_number == max_generation)
    {
        free_list_prev (item) = tail;
    }
#endif //DOUBLY_LINKED_FL

    if (head == 0)
    {
        head = item;
    }
    else
    {
        assert ((free_list_slot(head) != 0) || (tail == head));
        assert (item != tail);
        assert (free_list_slot(tail) == 0);

        free_list_slot (tail) = item;
    }

    tail = item;

#ifdef DOUBLY_LINKED_FL
    if (gen_number == max_generation)
    {
        dprintf (3333, ("[g%2d, b%2d]TFE: %p->%p->%p (h: %p, t: %p)",
            gen_number, a_l_number,
            free_list_prev (item), item, free_list_slot (item),
            al->alloc_list_head(), al->alloc_list_tail()));
        dprintf (3333, ("[g%2d, b%2d]TFE: exit: h->N: %p, h->P: %p, t->N: %p, t->P: %p",
            gen_number, a_l_number,
            (al->alloc_list_head() ? free_list_slot (al->alloc_list_head()) : 0),
            (al->alloc_list_head() ? free_list_prev (al->alloc_list_head()) : 0),
            (al->alloc_list_tail() ? free_list_slot (al->alloc_list_tail()) : 0),
            (al->alloc_list_tail() ? free_list_prev (al->alloc_list_tail()) : 0)));
    }
#endif //DOUBLY_LINKED_FL
}

void allocator::thread_item_front (uint8_t* item, size_t size)
{
    unsigned int a_l_number = first_suitable_bucket (size);
    alloc_list* al = &alloc_list_of (a_l_number);

    if (al->alloc_list_head() == 0)
    {
        assert (al->alloc_list_tail() == 0);
    }

    free_list_slot (item) = al->alloc_list_head();
    free_list_undo (item) = UNDO_EMPTY;

    if (al->alloc_list_tail() == 0)
    {
        assert (al->alloc_list_head() == 0);
        al->alloc_list_tail() = al->alloc_list_head();
    }

#ifdef DOUBLY_LINKED_FL
    if (gen_number == max_generation)
    {
        if (al->alloc_list_head() != 0)
        {
            free_list_prev (al->alloc_list_head()) = item;
        }
    }
#endif //DOUBLY_LINKED_FL

    al->alloc_list_head() = item;
    if (al->alloc_list_tail() == 0)
    {
        al->alloc_list_tail() = item;
    }

#ifdef DOUBLY_LINKED_FL
    if (gen_number == max_generation)
    {
        free_list_prev (item) = 0;

        dprintf (3333, ("[g%2d, b%2d]TFF: exit: %p->%p->%p (h: %p, t: %p)",
            gen_number, a_l_number,
            free_list_prev (item), item, free_list_slot (item),
            al->alloc_list_head(), al->alloc_list_tail()));
        dprintf (3333, ("[g%2d, b%2d]TFF: h->N: %p, h->P: %p, t->N: %p, t->P: %p",
            gen_number, a_l_number,
            (al->alloc_list_head() ? free_list_slot (al->alloc_list_head()) : 0),
            (al->alloc_list_head() ? free_list_prev (al->alloc_list_head()) : 0),
            (al->alloc_list_tail() ? free_list_slot (al->alloc_list_tail()) : 0),
            (al->alloc_list_tail() ? free_list_prev (al->alloc_list_tail()) : 0)));
    }
#endif //DOUBLY_LINKED_FL
}

void allocator::copy_to_alloc_list (alloc_list* toalist)
{
    for (unsigned int i = 0; i < num_buckets; i++)
    {
        toalist [i] = alloc_list_of (i);
#ifdef FL_VERIFICATION
        size_t damage_count = alloc_list_damage_count_of (i);
        // We are only calling this method to copy to an empty list
        // so damage count is always 0
        assert (damage_count == 0);

        uint8_t* free_item = alloc_list_head_of (i);
        size_t count = 0;
        while (free_item)
        {
            count++;
            free_item = free_list_slot (free_item);
        }

        toalist[i].item_count = count;
#endif //FL_VERIFICATION
    }
}

void allocator::copy_from_alloc_list (alloc_list* fromalist)
{
    BOOL repair_list = !discard_if_no_fit_p ();
#ifdef DOUBLY_LINKED_FL
    BOOL bgc_repair_p = FALSE;
    if (gen_number == max_generation)
    {
        bgc_repair_p = TRUE;

        if (alloc_list_damage_count_of (0) != 0)
        {
            GCToOSInterface::DebugBreak();
        }

        uint8_t* b0_head = alloc_list_head_of (0);
        if (b0_head)
        {
            free_list_prev (b0_head) = 0;
        }

        added_alloc_list_head_of (0) = 0;
        added_alloc_list_tail_of (0) = 0;
    }

    unsigned int start_index = (bgc_repair_p ? 1 : 0);
#else
    unsigned int start_index = 0;

#endif //DOUBLY_LINKED_FL

    for (unsigned int i = start_index; i < num_buckets; i++)
    {
        size_t count = alloc_list_damage_count_of (i);

        alloc_list_of (i) = fromalist [i];
        assert (alloc_list_damage_count_of (i) == 0);

        if (repair_list)
        {
            //repair the list
            //new items may have been added during the plan phase
            //items may have been unlinked.
            uint8_t* free_item = alloc_list_head_of (i);

            while (free_item && count)
            {
                assert (((CObjectHeader*)free_item)->IsFree());
                if ((free_list_undo (free_item) != UNDO_EMPTY))
                {
                    count--;

                    free_list_slot (free_item) = free_list_undo (free_item);
                    free_list_undo (free_item) = UNDO_EMPTY;
                }

                free_item = free_list_slot (free_item);
            }

#ifdef DOUBLY_LINKED_FL
            if (bgc_repair_p)
            {
                added_alloc_list_head_of (i) = 0;
                added_alloc_list_tail_of (i) = 0;
            }
#endif //DOUBLY_LINKED_FL

#ifdef FL_VERIFICATION
            free_item = alloc_list_head_of (i);
            size_t item_count = 0;
            while (free_item)
            {
                item_count++;
                free_item = free_list_slot (free_item);
            }

            assert (item_count == alloc_list_of (i).item_count);
#endif //FL_VERIFICATION
        }

#ifdef DEBUG
        uint8_t* tail_item = alloc_list_tail_of (i);
        assert ((tail_item == 0) || (free_list_slot (tail_item) == 0));
#endif
    }
}

void allocator::commit_alloc_list_changes()
{
    BOOL repair_list = !discard_if_no_fit_p ();
#ifdef DOUBLY_LINKED_FL
    BOOL bgc_repair_p = FALSE;
    if (gen_number == max_generation)
    {
        bgc_repair_p = TRUE;
    }
#endif //DOUBLY_LINKED_FL

    if (repair_list)
    {
        for (unsigned int i = 0; i < num_buckets; i++)
        {
            //remove the undo info from list.
            uint8_t* free_item = alloc_list_head_of (i);

#ifdef DOUBLY_LINKED_FL
            if (bgc_repair_p)
            {
                dprintf (3, ("C[b%2d] ENTRY: h: %p t: %p", i,
                    alloc_list_head_of (i), alloc_list_tail_of (i)));
            }

            if (free_item && bgc_repair_p)
            {
                if (free_list_prev (free_item) != 0)
                    free_list_prev (free_item) = 0;
            }
#endif //DOUBLY_LINKED_FL

            size_t count = alloc_list_damage_count_of (i);

            while (free_item && count)
            {
                assert (((CObjectHeader*)free_item)->IsFree());

                if (free_list_undo (free_item) != UNDO_EMPTY)
                {
                    free_list_undo (free_item) = UNDO_EMPTY;

#ifdef DOUBLY_LINKED_FL
                    if (bgc_repair_p)
                    {
                        uint8_t* next_item = free_list_slot (free_item);
                        if (next_item && (free_list_prev (next_item) != free_item))
                            free_list_prev (next_item) = free_item;
                    }
#endif //DOUBLY_LINKED_FL

                    count--;
                }

                free_item = free_list_slot (free_item);
            }

            alloc_list_damage_count_of (i) = 0;

#ifdef DOUBLY_LINKED_FL
            if (bgc_repair_p)
            {
                uint8_t* head = alloc_list_head_of (i);
                uint8_t* tail_added = added_alloc_list_tail_of (i);

                if (tail_added)
                {
                    assert (free_list_slot (tail_added) == 0);

                    if (head)
                    {
                        free_list_slot (tail_added) = head;
                        free_list_prev (head) = tail_added;
                    }
                }

                uint8_t* head_added = added_alloc_list_head_of (i);

                if (head_added)
                {
                    alloc_list_head_of (i) = head_added;
                    uint8_t* final_head = alloc_list_head_of (i);

                    if (alloc_list_tail_of (i) == 0)
                    {
                        alloc_list_tail_of (i) = tail_added;
                    }
                }

                added_alloc_list_head_of (i) = 0;
                added_alloc_list_tail_of (i) = 0;
            }
#endif //DOUBLY_LINKED_FL
        }
    }
}

#ifdef USE_REGIONS
void allocator::thread_sip_fl (heap_segment* region)
{
    uint8_t* region_fl_head = region->free_list_head;
    uint8_t* region_fl_tail = region->free_list_tail;

    if (!region_fl_head)
    {
        assert (!region_fl_tail);
        assert (region->free_list_size == 0);
        return;
    }

    if (num_buckets == 1)
    {
        dprintf (REGIONS_LOG, ("threading gen%d region %p onto gen%d FL",
            heap_segment_gen_num (region), heap_segment_mem (region), gen_number));
        alloc_list* al = &alloc_list_of (0);
        uint8_t*& head = al->alloc_list_head();
        uint8_t*& tail = al->alloc_list_tail();

        if (tail == 0)
        {
            assert (head == 0);
            head = region_fl_head;
        }
        else
        {
            free_list_slot (tail) = region_fl_head;
        }

        tail = region_fl_tail;
    }
    else
    {
        dprintf (REGIONS_LOG, ("threading gen%d region %p onto gen%d bucketed FL",
            heap_segment_gen_num (region), heap_segment_mem (region), gen_number));
        // If we have a bucketed free list we'd need to go through the region's free list.
        uint8_t* region_fl_item = region_fl_head;
        size_t total_free_size = 0;
        while (region_fl_item)
        {
            uint8_t* next_fl_item = free_list_slot (region_fl_item);
            size_t size_item = size (region_fl_item);
            thread_item (region_fl_item, size_item);
            total_free_size += size_item;
            region_fl_item = next_fl_item;
        }
        assert (total_free_size == region->free_list_size);
    }
}
#endif //USE_REGIONS

#ifdef FEATURE_EVENT_TRACE
uint16_t allocator::count_largest_items (etw_bucket_info* bucket_info,
                                         size_t max_size,
                                         size_t max_item_count,
                                         size_t* recorded_fl_info_size)
{
    assert (gen_number == max_generation);

    size_t size_counted_total = 0;
    size_t items_counted_total = 0;
    uint16_t bucket_info_index = 0;
    for (int i = (num_buckets - 1); i >= 0; i--)
    {
        uint32_t items_counted = 0;
        size_t size_counted = 0;
        uint8_t* free_item = alloc_list_head_of ((unsigned int)i);
        while (free_item)
        {
            assert (((CObjectHeader*)free_item)->IsFree());

            size_t free_item_size = Align (size (free_item));
            size_counted_total += free_item_size;
            size_counted += free_item_size;
            items_counted_total++;
            items_counted++;

            if ((size_counted_total > max_size) || (items_counted > max_item_count))
            {
                bucket_info[bucket_info_index++].set ((uint16_t)i, items_counted, size_counted);
                *recorded_fl_info_size = size_counted_total;
                return bucket_info_index;
            }

            free_item = free_list_slot (free_item);
        }

        if (items_counted)
        {
            bucket_info[bucket_info_index++].set ((uint16_t)i, items_counted, size_counted);
        }
    }

    *recorded_fl_info_size = size_counted_total;
    return bucket_info_index;
}
#endif //FEATURE_EVENT_TRACE

void gc_heap::adjust_limit_clr (uint8_t* start, size_t limit_size, size_t size,
                                alloc_context* acontext, uint32_t flags,
                                heap_segment* seg, int align_const, int gen_number)
{
    bool uoh_p = (gen_number > 0);
    GCSpinLock* msl = uoh_p ? &more_space_lock_uoh : &more_space_lock_soh;
    uint64_t& total_alloc_bytes = uoh_p ? total_alloc_bytes_uoh : total_alloc_bytes_soh;

    size_t aligned_min_obj_size = Align(min_obj_size, align_const);

#ifdef USE_REGIONS
    if (seg)
    {
        assert (heap_segment_used (seg) <= heap_segment_committed (seg));
    }
#endif //USE_REGIONS

#ifdef MULTIPLE_HEAPS
    if (gen_number == 0)
    {
        if (!gen0_allocated_after_gc_p)
        {
            gen0_allocated_after_gc_p = true;
        }
    }
#endif //MULTIPLE_HEAPS

    dprintf (3, ("Expanding segment allocation [%zx, %zx[", (size_t)start,
               (size_t)start + limit_size - aligned_min_obj_size));

    if ((acontext->alloc_limit != start) &&
        (acontext->alloc_limit + aligned_min_obj_size)!= start)
    {
        uint8_t*  hole = acontext->alloc_ptr;
        if (hole != 0)
        {
            size_t  ac_size = (acontext->alloc_limit - acontext->alloc_ptr);
            dprintf (3, ("filling up hole [%zx, %zx[", (size_t)hole, (size_t)hole + ac_size + aligned_min_obj_size));
            // when we are finishing an allocation from a free list
            // we know that the free area was Align(min_obj_size) larger
            acontext->alloc_bytes -= ac_size;
            total_alloc_bytes -= ac_size;
            size_t free_obj_size = ac_size + aligned_min_obj_size;
            make_unused_array (hole, free_obj_size);
            generation_free_obj_space (generation_of (gen_number)) += free_obj_size;
        }
        acontext->alloc_ptr = start;
    }
    else
    {
        if (gen_number == 0)
        {
#ifdef USE_REGIONS
            if (acontext->alloc_ptr == 0)
            {
                acontext->alloc_ptr = start;
            }
            else
#endif //USE_REGIONS
            {
                size_t pad_size = aligned_min_obj_size;
                dprintf (3, ("contiguous ac: making min obj gap %p->%p(%zd)",
                    acontext->alloc_ptr, (acontext->alloc_ptr + pad_size), pad_size));
                make_unused_array (acontext->alloc_ptr, pad_size);
                acontext->alloc_ptr += pad_size;
            }
        }
    }
    acontext->alloc_limit = (start + limit_size - aligned_min_obj_size);
    size_t added_bytes = limit_size - ((gen_number <= max_generation) ? aligned_min_obj_size : 0);
    acontext->alloc_bytes += added_bytes;
    total_alloc_bytes     += added_bytes;

    size_t etw_allocation_amount = 0;
    bool fire_event_p = update_alloc_info (gen_number, added_bytes, &etw_allocation_amount);

    uint8_t* saved_used = 0;

    if (seg)
    {
        saved_used = heap_segment_used (seg);
    }

    if (seg == ephemeral_heap_segment)
    {
        //Sometimes the allocated size is advanced without clearing the
        //memory. Let's catch up here
        if (heap_segment_used (seg) < (alloc_allocated - plug_skew))
        {
            heap_segment_used (seg) = alloc_allocated - plug_skew;
            assert (heap_segment_mem (seg) <= heap_segment_used (seg));
            assert (heap_segment_used (seg) <= heap_segment_reserved (seg));
        }
    }
#ifdef BACKGROUND_GC
    else if (seg)
    {
        uint8_t* old_allocated = heap_segment_allocated (seg) - plug_skew - limit_size;
#ifdef FEATURE_LOH_COMPACTION
        if (gen_number == loh_generation)
        {
            old_allocated -= Align (loh_padding_obj_size, align_const);
        }
#endif //FEATURE_LOH_COMPACTION

        assert (heap_segment_used (seg) >= old_allocated);
    }
#endif //BACKGROUND_GC

    // we are going to clear a right-edge exclusive span [clear_start, clear_limit)
    // but will adjust for cases when object is ok to stay dirty or the space has not seen any use yet
    // NB: the size and limit_size include syncblock, which is to the -1 of the object start
    //     that effectively shifts the allocation by `plug_skew`
    uint8_t* clear_start = start - plug_skew;
    uint8_t* clear_limit = start + limit_size - plug_skew;

    if (flags & GC_ALLOC_ZEROING_OPTIONAL)
    {
        uint8_t* obj_start = acontext->alloc_ptr;
        assert(start >= obj_start);
        uint8_t* obj_end = obj_start + size - plug_skew;
        assert(obj_end >= clear_start);

        // if clearing at the object start, clear the syncblock.
        if(obj_start == start)
        {
            *(PTR_PTR)clear_start = 0;
        }
        // skip the rest of the object
        dprintf(3, ("zeroing optional: skipping object at %p->%p(%zd)",
            clear_start, obj_end, obj_end - clear_start));
        clear_start = obj_end;
    }

    // fetch the ephemeral_heap_segment *before* we release the msl
    // - ephemeral_heap_segment may change due to other threads allocating
    heap_segment* gen0_segment = ephemeral_heap_segment;

#ifdef BACKGROUND_GC
    {
        if (uoh_p && gc_heap::background_running_p())
        {
            uint8_t* obj = acontext->alloc_ptr;
            uint8_t* result = obj;
            uint8_t* current_lowest_address = background_saved_lowest_address;
            uint8_t* current_highest_address = background_saved_highest_address;

            if (current_c_gc_state == c_gc_state_planning)
            {
                dprintf (3, ("Concurrent allocation of a large object %zx",
                            (size_t)obj));
                //mark the new block specially so we know it is a new object
                if ((result < current_highest_address) && (result >= current_lowest_address))
                {
#ifdef DOUBLY_LINKED_FL
                    heap_segment* seg = seg_mapping_table_segment_of (result);
                    // if bgc_allocated is 0 it means it was allocated during bgc sweep,
                    // and since sweep does not look at this seg we cannot set the mark array bit.
                    uint8_t* background_allocated = heap_segment_background_allocated(seg);
                    if (background_allocated != 0)
#endif //DOUBLY_LINKED_FL
                    {
                        dprintf(3, ("Setting mark bit at address %zx",
                            (size_t)(&mark_array[mark_word_of(result)])));

                        mark_array_set_marked(result);
                    }
                }
            }
        }
    }
#endif //BACKGROUND_GC

    // check if space to clear is all dirty from prior use or only partially
    if ((seg == 0) || (clear_limit <= heap_segment_used (seg)))
    {
        add_saved_spinlock_info (uoh_p, me_release, mt_clr_mem, msl_entered);
        leave_spin_lock (msl);

        if (clear_start < clear_limit)
        {
            dprintf(3, ("clearing memory at %p for %zd bytes", clear_start, clear_limit - clear_start));
            memclr(clear_start, clear_limit - clear_start);
        }
    }
    else
    {
        // we only need to clear [clear_start, used) and only if clear_start < used
        uint8_t* used = heap_segment_used (seg);
        heap_segment_used (seg) = clear_limit;

        add_saved_spinlock_info (uoh_p, me_release, mt_clr_mem, msl_entered);
        leave_spin_lock (msl);

        if (clear_start < used)
        {
            if (used != saved_used)
            {
                FATAL_GC_ERROR();
            }

            dprintf (2, ("clearing memory before used at %p for %zd bytes", clear_start, used - clear_start));
            memclr (clear_start, used - clear_start);
        }
    }

#ifdef FEATURE_EVENT_TRACE
    if (fire_event_p)
    {
        fire_etw_allocation_event (etw_allocation_amount, gen_number, acontext->alloc_ptr, size);
    }
#endif //FEATURE_EVENT_TRACE

    //this portion can be done after we release the lock
    if (seg == gen0_segment ||
       ((seg == nullptr) && (gen_number == 0) && (limit_size >= CLR_SIZE / 2)))
    {
        if (gen0_must_clear_bricks > 0)
        {
            //set the brick table to speed up find_object
            size_t b = brick_of (acontext->alloc_ptr);
            set_brick (b, acontext->alloc_ptr - brick_address (b));
            b++;
            dprintf (3, ("Allocation Clearing bricks [%zx, %zx[",
                         b, brick_of (align_on_brick (start + limit_size))));
            volatile short* x = &brick_table [b];
            short* end_x = &brick_table [brick_of (align_on_brick (start + limit_size))];

            for (;x < end_x;x++)
                *x = -1;
        }
        else
        {
            gen0_bricks_cleared = FALSE;
        }
    }

    // verifying the memory is completely cleared.
    //if (!(flags & GC_ALLOC_ZEROING_OPTIONAL))
    //{
    //    verify_mem_cleared(start - plug_skew, limit_size);
    //}
}

size_t gc_heap::new_allocation_limit (size_t size, size_t physical_limit, int gen_number)
{
    dynamic_data* dd = dynamic_data_of (gen_number);
    ptrdiff_t new_alloc = dd_new_allocation (dd);
    assert (new_alloc == (ptrdiff_t)Align (new_alloc, get_alignment_constant (gen_number < uoh_start_generation)));

    ptrdiff_t logical_limit = max (new_alloc, (ptrdiff_t)size);
    size_t limit = min (logical_limit, (ptrdiff_t)physical_limit);
    assert (limit == Align (limit, get_alignment_constant (gen_number <= max_generation)));

    return limit;
}

size_t gc_heap::limit_from_size (size_t size, uint32_t flags, size_t physical_limit, int gen_number,
                                 int align_const)
{
    size_t padded_size = size + Align (min_obj_size, align_const);
    // for LOH this is not true...we could select a physical_limit that's exactly the same
    // as size.
    assert ((gen_number != 0) || (physical_limit >= padded_size));

    // For SOH if the size asked for is very small, we want to allocate more than just what's asked for if possible.
    // Unless we were told not to clean, then we will not force it.
    size_t min_size_to_allocate = ((gen_number == 0 && !(flags & GC_ALLOC_ZEROING_OPTIONAL)) ? allocation_quantum : 0);

    size_t desired_size_to_allocate  = max (padded_size, min_size_to_allocate);
    size_t new_physical_limit = min (physical_limit, desired_size_to_allocate);

    size_t new_limit = new_allocation_limit (padded_size,
                                             new_physical_limit,
                                             gen_number);
    assert (new_limit >= (size + Align (min_obj_size, align_const)));
    dprintf (3, ("h%d requested to allocate %zd bytes, actual size is %zd, phy limit: %zd",
        heap_number, size, new_limit, physical_limit));
    return new_limit;
}

void gc_heap::add_to_oom_history_per_heap()
{
    oom_history* current_hist = &oomhist_per_heap[oomhist_index_per_heap];
    memcpy (current_hist, &oom_info, sizeof (oom_info));
    oomhist_index_per_heap++;
    if (oomhist_index_per_heap == max_oom_history_count)
    {
        oomhist_index_per_heap = 0;
    }
}

void gc_heap::handle_oom (oom_reason reason, size_t alloc_size,
                          uint8_t* allocated, uint8_t* reserved)
{
    if (reason == oom_budget)
    {
        alloc_size = dd_min_size (dynamic_data_of (0)) / 2;
    }

    if ((reason == oom_budget) && ((!fgm_result.loh_p) && (fgm_result.fgm != fgm_no_failure)))
    {
        // This means during the last GC we needed to reserve and/or commit more memory
        // but we couldn't. We proceeded with the GC and ended up not having enough
        // memory at the end. This is a legitimate OOM situtation. Otherwise we
        // probably made a mistake and didn't expand the heap when we should have.
        reason = oom_low_mem;
    }

    oom_info.reason = reason;
    oom_info.allocated = allocated;
    oom_info.reserved = reserved;
    oom_info.alloc_size = alloc_size;
    oom_info.gc_index = settings.gc_index;
    oom_info.fgm = fgm_result.fgm;
    oom_info.size = fgm_result.size;
    oom_info.available_pagefile_mb = fgm_result.available_pagefile_mb;
    oom_info.loh_p = fgm_result.loh_p;

    add_to_oom_history_per_heap();
    fgm_result.fgm = fgm_no_failure;

    // Break early - before the more_space_lock is release so no other threads
    // could have allocated on the same heap when OOM happened.
    if (GCConfig::GetBreakOnOOM())
    {
        GCToOSInterface::DebugBreak();
    }
}

#ifdef BACKGROUND_GC
BOOL gc_heap::background_allowed_p()
{
    return ( gc_can_use_concurrent && ((settings.pause_mode == pause_interactive) || (settings.pause_mode == pause_sustained_low_latency)) );
}
#endif //BACKGROUND_GC

void gc_heap::check_for_full_gc (int gen_num, size_t size)
{
    BOOL should_notify = FALSE;
    // if we detect full gc because of the allocation budget specified this is TRUE;
    // it's FALSE if it's due to other factors.
    BOOL alloc_factor = TRUE;
    int n_initial = gen_num;
    BOOL local_blocking_collection = FALSE;
    BOOL local_elevation_requested = FALSE;
    int new_alloc_remain_percent = 0;

    if (full_gc_approach_event_set)
    {
        return;
    }

    if (gen_num < max_generation)
    {
        gen_num = max_generation;
    }

    dynamic_data* dd_full = dynamic_data_of (gen_num);
    ptrdiff_t new_alloc_remain = 0;
    uint32_t pct = (gen_num >= uoh_start_generation) ? fgn_loh_percent : fgn_maxgen_percent;

    for (int gen_index = 0; gen_index < total_generation_count; gen_index++)
    {
        dprintf (2, ("FGN: h#%d: gen%d: %zd(%zd)",
                     heap_number, gen_index,
                     dd_new_allocation (dynamic_data_of (gen_index)),
                     dd_desired_allocation (dynamic_data_of (gen_index))));
    }

    // For small object allocations we only check every fgn_check_quantum bytes.
    if (n_initial == 0)
    {
        dprintf (2, ("FGN: gen0 last recorded alloc: %zd", fgn_last_alloc));
        dynamic_data* dd_0 = dynamic_data_of (n_initial);
        if (((fgn_last_alloc - dd_new_allocation (dd_0)) < fgn_check_quantum) &&
            (dd_new_allocation (dd_0) >= 0))
        {
            return;
        }
        else
        {
            fgn_last_alloc = dd_new_allocation (dd_0);
            dprintf (2, ("FGN: gen0 last recorded alloc is now: %zd", fgn_last_alloc));
        }

        // We don't consider the size that came from soh 'cause it doesn't contribute to the
        // gen2 budget.
        size = 0;
    }

    int n = 0;
    for (int i = 1; i <= max_generation; i++)
    {
            if (get_new_allocation (i) <= 0)
            {
                n = i;
            }
            else
                break;
    }

    dprintf (2, ("FGN: h#%d: gen%d budget exceeded", heap_number, n));
    if (gen_num == max_generation)
    {
        // If it's small object heap we should first see if we will even be looking at gen2 budget
        // in the next GC or not. If not we should go directly to checking other factors.
        if (n < (max_generation - 1))
        {
            goto check_other_factors;
        }
    }

    new_alloc_remain = dd_new_allocation (dd_full) - size;

    new_alloc_remain_percent = (int)(((float)(new_alloc_remain) / (float)dd_desired_allocation (dd_full)) * 100);

    dprintf (2, ("FGN: alloc threshold for gen%d is %d%%, current threshold is %d%%",
                 gen_num, pct, new_alloc_remain_percent));

    if (new_alloc_remain_percent <= (int)pct)
    {
#ifdef BACKGROUND_GC
        // If background GC is enabled, we still want to check whether this will
        // be a blocking GC or not because we only want to notify when it's a
        // blocking full GC.
        if (background_allowed_p())
        {
            goto check_other_factors;
        }
#endif //BACKGROUND_GC

        should_notify = TRUE;
        goto done;
    }

check_other_factors:

    dprintf (2, ("FGC: checking other factors"));
    n = generation_to_condemn (n,
                               &local_blocking_collection,
                               &local_elevation_requested,
                               TRUE);

    if (local_elevation_requested && (n == max_generation))
    {
        if (settings.should_lock_elevation)
        {
            int local_elevation_locked_count = settings.elevation_locked_count + 1;
            if (local_elevation_locked_count != 6)
            {
                dprintf (2, ("FGN: lock count is %d - Condemning max_generation-1",
                    local_elevation_locked_count));
                n = max_generation - 1;
            }
        }
    }

    dprintf (2, ("FGN: we estimate gen%d will be collected", n));

#ifdef BACKGROUND_GC
    // When background GC is enabled it decreases the accuracy of our predictability -
    // by the time the GC happens, we may not be under BGC anymore. If we try to
    // predict often enough it should be ok.
    if ((n == max_generation) &&
        (gc_heap::background_running_p()))
    {
        n = max_generation - 1;
        dprintf (2, ("FGN: bgc - 1 instead of 2"));
    }

    if ((n == max_generation) && !local_blocking_collection)
    {
        if (!background_allowed_p())
        {
            local_blocking_collection = TRUE;
        }
    }
#endif //BACKGROUND_GC

    dprintf (2, ("FGN: we estimate gen%d will be collected: %s",
                       n,
                       (local_blocking_collection ? "blocking" : "background")));

    if ((n == max_generation) && local_blocking_collection)
    {
        alloc_factor = FALSE;
        should_notify = TRUE;
        goto done;
    }

done:

    if (should_notify)
    {
        dprintf (2, ("FGN: gen%d detecting full GC approaching(%s) (GC#%zd) (%d%% left in gen%d)",
                     n_initial,
                     (alloc_factor ? "alloc" : "other"),
                     dd_collection_count (dynamic_data_of (0)),
                     new_alloc_remain_percent,
                     gen_num));

        send_full_gc_notification (n_initial, alloc_factor);
    }
}

void gc_heap::send_full_gc_notification (int gen_num, BOOL due_to_alloc_p)
{
    if (!full_gc_approach_event_set)
    {
        assert (full_gc_approach_event.IsValid());
        FIRE_EVENT(GCFullNotify_V1, gen_num, due_to_alloc_p);

        full_gc_end_event.Reset();
        full_gc_approach_event.Set();
        full_gc_approach_event_set = true;
    }
}

wait_full_gc_status gc_heap::full_gc_wait (GCEvent *event, int time_out_ms)
{
#ifdef MULTIPLE_HEAPS
    gc_heap* hp = gc_heap::g_heaps[0];
#else
    gc_heap* hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS

    if (hp->fgn_maxgen_percent == 0)
    {
        return wait_full_gc_na;
    }

    uint32_t wait_result = user_thread_wait(event, FALSE, time_out_ms);

    if ((wait_result == WAIT_OBJECT_0) || (wait_result == WAIT_TIMEOUT))
    {
        if (hp->fgn_maxgen_percent == 0)
        {
            return wait_full_gc_cancelled;
        }

        if (wait_result == WAIT_OBJECT_0)
        {
#ifdef BACKGROUND_GC
            if (fgn_last_gc_was_concurrent)
            {
                fgn_last_gc_was_concurrent = FALSE;
                return wait_full_gc_na;
            }
            else
#endif //BACKGROUND_GC
            {
                return wait_full_gc_success;
            }
        }
        else
        {
            return wait_full_gc_timeout;
        }
    }
    else
    {
        return wait_full_gc_failed;
    }
}

size_t gc_heap::get_full_compact_gc_count()
{
    return full_gc_counts[gc_type_compacting];
}

// DTREVIEW - we should check this in dt_low_ephemeral_space_p
// as well.
inline
BOOL gc_heap::short_on_end_of_seg (heap_segment* seg)
{
    uint8_t* allocated = heap_segment_allocated (seg);

#ifdef USE_REGIONS
    assert (end_gen0_region_space != uninitialized_end_gen0_region_space);
    BOOL sufficient_p = sufficient_space_regions_for_allocation (end_gen0_region_space, end_space_after_gc());
#else
    BOOL sufficient_p = sufficient_space_end_seg (allocated,
                                                  heap_segment_committed (seg),
                                                  heap_segment_reserved (seg),
                                                  end_space_after_gc());
#endif //USE_REGIONS
    if (!sufficient_p)
    {
        if (sufficient_gen0_space_p)
        {
            dprintf (GTC_LOG, ("gen0 has enough free space"));
        }

        sufficient_p = sufficient_gen0_space_p;
    }

    return !sufficient_p;
}

inline
BOOL gc_heap::a_fit_free_list_p (int gen_number,
                                 size_t size,
                                 alloc_context* acontext,
                                 uint32_t flags,
                                 int align_const)
{
    BOOL can_fit = FALSE;
    generation* gen = generation_of (gen_number);
    allocator* gen_allocator = generation_allocator (gen);

    for (unsigned int a_l_idx = gen_allocator->first_suitable_bucket(size); a_l_idx < gen_allocator->number_of_buckets(); a_l_idx++)
    {
        uint8_t* free_list = gen_allocator->alloc_list_head_of (a_l_idx);
        uint8_t* prev_free_item = 0;

        while (free_list != 0)
        {
            dprintf (3, ("considering free list %zx", (size_t)free_list));
            size_t free_list_size = unused_array_size (free_list);
            if ((size + Align (min_obj_size, align_const)) <= free_list_size)
            {
                dprintf (3, ("Found adequate unused area: [%zx, size: %zd",
                                (size_t)free_list, free_list_size));

                gen_allocator->unlink_item (a_l_idx, free_list, prev_free_item, FALSE);
                // We ask for more Align (min_obj_size)
                // to make sure that we can insert a free object
                // in adjust_limit will set the limit lower
                size_t limit = limit_from_size (size, flags, free_list_size, gen_number, align_const);
                dd_new_allocation (dynamic_data_of (gen_number)) -= limit;

                uint8_t*  remain = (free_list + limit);
                size_t remain_size = (free_list_size - limit);
                if (remain_size >= Align(min_free_list, align_const))
                {
                    make_unused_array (remain, remain_size);
                    gen_allocator->thread_item_front (remain, remain_size);
                    assert (remain_size >= Align (min_obj_size, align_const));
                }
                else
                {
                    //absorb the entire free list
                    limit += remain_size;
                }
                generation_free_list_space (gen) -= limit;
                assert ((ptrdiff_t)generation_free_list_space (gen) >= 0);

                adjust_limit_clr (free_list, limit, size, acontext, flags, 0, align_const, gen_number);

                can_fit = TRUE;
                goto end;
            }
            else if (gen_allocator->discard_if_no_fit_p())
            {
                assert (prev_free_item == 0);
                dprintf (3, ("couldn't use this free area, discarding"));
                generation_free_obj_space (gen) += free_list_size;

                gen_allocator->unlink_item (a_l_idx, free_list, prev_free_item, FALSE);
                generation_free_list_space (gen) -= free_list_size;
                assert ((ptrdiff_t)generation_free_list_space (gen) >= 0);
            }
            else
            {
                prev_free_item = free_list;
            }
            free_list = free_list_slot (free_list);
        }
    }
end:
    return can_fit;
}


#ifdef BACKGROUND_GC
void gc_heap::bgc_uoh_alloc_clr (uint8_t* alloc_start,
                                 size_t size,
                                 alloc_context* acontext,
                                 uint32_t flags,
                                 int gen_number,
                                 int align_const,
                                 int lock_index,
                                 BOOL check_used_p,
                                 heap_segment* seg)
{
    make_unused_array (alloc_start, size);
#ifdef DOUBLY_LINKED_FL
    clear_prev_bit (alloc_start, size);
#endif //DOUBLY_LINKED_FL

    size_t size_of_array_base = sizeof(ArrayBase);

    bgc_alloc_lock->uoh_alloc_done_with_index (lock_index);

    // clear memory while not holding the lock.
    size_t size_to_skip = size_of_array_base;
    size_t size_to_clear = size - size_to_skip - plug_skew;
    size_t saved_size_to_clear = size_to_clear;
    if (check_used_p)
    {
        uint8_t* end = alloc_start + size - plug_skew;
        uint8_t* used = heap_segment_used (seg);
        if (used < end)
        {
            if ((alloc_start + size_to_skip) < used)
            {
                size_to_clear = used - (alloc_start + size_to_skip);
            }
            else
            {
                size_to_clear = 0;
            }
            dprintf (2, ("bgc uoh: setting used to %p", end));
            heap_segment_used (seg) = end;
        }

        dprintf (2, ("bgc uoh: used: %p, alloc: %p, end of alloc: %p, clear %zd bytes",
                     used, alloc_start, end, size_to_clear));
    }
    else
    {
        dprintf (2, ("bgc uoh: [%p-[%p(%zd)", alloc_start, alloc_start+size, size));
    }

#ifdef VERIFY_HEAP
    // since we filled in 0xcc for free object when we verify heap,
    // we need to make sure we clear those bytes.
    if (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_GC)
    {
        if (size_to_clear < saved_size_to_clear)
        {
            size_to_clear = saved_size_to_clear;
        }
    }
#endif //VERIFY_HEAP

    size_t allocated_size = size - Align (min_obj_size, align_const);
    total_alloc_bytes_uoh += allocated_size;
    size_t etw_allocation_amount = 0;
    bool fire_event_p = update_alloc_info (gen_number, allocated_size, &etw_allocation_amount);

    dprintf (SPINLOCK_LOG, ("[%d]Lmsl to clear uoh obj", heap_number));
    add_saved_spinlock_info (true, me_release, mt_clr_large_mem, msl_entered);
    leave_spin_lock (&more_space_lock_uoh);

#ifdef FEATURE_EVENT_TRACE
    if (fire_event_p)
    {
        fire_etw_allocation_event (etw_allocation_amount, gen_number, alloc_start, size);
    }
#endif //FEATURE_EVENT_TRACE

    ((void**) alloc_start)[-1] = 0;     //clear the sync block
    if (!(flags & GC_ALLOC_ZEROING_OPTIONAL))
    {
        memclr(alloc_start + size_to_skip, size_to_clear);
    }

#ifdef MULTIPLE_HEAPS
    assert (heap_of (alloc_start) == this);
#endif // MULTIPLE_HEAPS

    bgc_alloc_lock->uoh_alloc_set (alloc_start);

    acontext->alloc_ptr = alloc_start;
    acontext->alloc_limit = (alloc_start + size - Align (min_obj_size, align_const));

    // need to clear the rest of the object before we hand it out.
    clear_unused_array(alloc_start, size);
}
#endif //BACKGROUND_GC

BOOL gc_heap::a_fit_free_list_uoh_p (size_t size,
                                       alloc_context* acontext,
                                       uint32_t flags,
                                       int align_const,
                                       int gen_number)
{
    BOOL can_fit = FALSE;
    generation* gen = generation_of (gen_number);
    allocator* allocator = generation_allocator (gen);

#ifdef FEATURE_LOH_COMPACTION
    size_t loh_pad = (gen_number == loh_generation) ? Align (loh_padding_obj_size, align_const) : 0;
#endif //FEATURE_LOH_COMPACTION

#ifdef BACKGROUND_GC
    int cookie = -1;
#endif //BACKGROUND_GC

    for (unsigned int a_l_idx = allocator->first_suitable_bucket(size); a_l_idx < allocator->number_of_buckets(); a_l_idx++)
    {
        uint8_t* free_list = allocator->alloc_list_head_of (a_l_idx);
        uint8_t* prev_free_item = 0;
        while (free_list != 0)
        {
            dprintf (3, ("considering free list %zx", (size_t)free_list));

            size_t free_list_size = unused_array_size(free_list);

            ptrdiff_t diff = free_list_size - size;

#ifdef FEATURE_LOH_COMPACTION
            diff -= loh_pad;
#endif //FEATURE_LOH_COMPACTION

            // must fit exactly or leave formattable space
            if ((diff == 0) || (diff >= (ptrdiff_t)Align (min_obj_size, align_const)))
            {
#ifdef BACKGROUND_GC
#ifdef MULTIPLE_HEAPS
                assert (heap_of (free_list) == this);
#endif // MULTIPLE_HEAPS

                cookie = bgc_alloc_lock->uoh_alloc_set (free_list);
                bgc_track_uoh_alloc();
#endif //BACKGROUND_GC

                allocator->unlink_item (a_l_idx, free_list, prev_free_item, FALSE);
                remove_gen_free (gen_number, free_list_size);

                // Subtract min obj size because limit_from_size adds it. Not needed for LOH
                size_t limit = limit_from_size (size - Align(min_obj_size, align_const), flags, free_list_size,
                                                gen_number, align_const);
                dd_new_allocation (dynamic_data_of (gen_number)) -= limit;

                size_t saved_free_list_size = free_list_size;
#ifdef FEATURE_LOH_COMPACTION
                if (loh_pad)
                {
                    make_unused_array (free_list, loh_pad);
                    generation_free_obj_space (gen) += loh_pad;
                    limit -= loh_pad;
                    free_list += loh_pad;
                    free_list_size -= loh_pad;
                }
#endif //FEATURE_LOH_COMPACTION

                uint8_t*  remain = (free_list + limit);
                size_t remain_size = (free_list_size - limit);
                if (remain_size != 0)
                {
                    assert (remain_size >= Align (min_obj_size, align_const));
                    make_unused_array (remain, remain_size);
                }
                if (remain_size >= Align(min_free_list, align_const))
                {
                    uoh_thread_gap_front (remain, remain_size, gen);
                    add_gen_free (gen_number, remain_size);
                    assert (remain_size >= Align (min_obj_size, align_const));
                }
                else
                {
                    generation_free_obj_space (gen) += remain_size;
                }
                generation_free_list_space (gen) -= saved_free_list_size;
                assert ((ptrdiff_t)generation_free_list_space (gen) >= 0);
                generation_free_list_allocated (gen) += limit;

                dprintf (3, ("found fit on loh at %p", free_list));
#ifdef BACKGROUND_GC
                if (cookie != -1)
                {
                    bgc_uoh_alloc_clr (free_list, limit, acontext, flags, gen_number, align_const, cookie, FALSE, 0);
                }
                else
#endif //BACKGROUND_GC
                {
                    adjust_limit_clr (free_list, limit, size, acontext, flags, 0, align_const, gen_number);
                }

                //fix the limit to compensate for adjust_limit_clr making it too short
                acontext->alloc_limit += Align (min_obj_size, align_const);
                can_fit = TRUE;
                goto exit;
            }
            prev_free_item = free_list;
            free_list = free_list_slot (free_list);
        }
    }
exit:
    return can_fit;
}

BOOL gc_heap::a_fit_segment_end_p (int gen_number,
                                   heap_segment* seg,
                                   size_t size,
                                   alloc_context* acontext,
                                   uint32_t flags,
                                   int align_const,
                                   BOOL* commit_failed_p)
{
    *commit_failed_p = FALSE;
    size_t limit = 0;
    bool hard_limit_short_seg_end_p = false;
#ifdef BACKGROUND_GC
    int cookie = -1;
#endif //BACKGROUND_GC

    uint8_t*& allocated = ((gen_number == 0) ?
                                    alloc_allocated :
                                    heap_segment_allocated(seg));

    size_t pad = Align (min_obj_size, align_const);

#ifdef FEATURE_LOH_COMPACTION
    size_t loh_pad = Align (loh_padding_obj_size, align_const);
    if (gen_number == loh_generation)
    {
        pad += loh_pad;
    }
#endif //FEATURE_LOH_COMPACTION

    uint8_t* end = heap_segment_committed (seg) - pad;

    if (a_size_fit_p (size, allocated, end, align_const))
    {
        limit = limit_from_size (size,
                                 flags,
                                 (end - allocated),
                                 gen_number, align_const);
        goto found_fit;
    }

    end = heap_segment_reserved (seg) - pad;

    if ((heap_segment_reserved (seg) != heap_segment_committed (seg)) && (a_size_fit_p (size, allocated, end, align_const)))
    {
        limit = limit_from_size (size,
                                 flags,
                                 (end - allocated),
                                 gen_number, align_const);

        if (grow_heap_segment (seg, (allocated + limit), &hard_limit_short_seg_end_p))
        {
            goto found_fit;
        }

        else
        {
#ifdef USE_REGIONS
            *commit_failed_p = TRUE;
#else
            if (!hard_limit_short_seg_end_p)
            {
                dprintf (2, ("can't grow segment, doing a full gc"));
                *commit_failed_p = TRUE;
            }
            else
            {
                assert (heap_hard_limit);
            }
#endif // USE_REGIONS
        }
    }

    goto found_no_fit;

found_fit:
    dd_new_allocation (dynamic_data_of (gen_number)) -= limit;

#ifdef BACKGROUND_GC
    if (gen_number != 0)
    {
#ifdef MULTIPLE_HEAPS
        assert (heap_of (allocated) == this);
#endif // MULTIPLE_HEAPS

        cookie = bgc_alloc_lock->uoh_alloc_set (allocated);
        bgc_track_uoh_alloc();
    }
#endif //BACKGROUND_GC

#ifdef FEATURE_LOH_COMPACTION
    if (gen_number == loh_generation)
    {
        make_unused_array (allocated, loh_pad);
        generation_free_obj_space (generation_of (gen_number)) += loh_pad;
        allocated += loh_pad;
        limit -= loh_pad;
    }
#endif //FEATURE_LOH_COMPACTION

#if defined (VERIFY_HEAP) && defined (_DEBUG)
    // we are responsible for cleaning the syncblock and we will do it later
    // as a part of cleanup routine and when not holding the heap lock.
    // However, once we move "allocated" forward and if another thread initiate verification of
    // the previous object, it may consider the syncblock in the "next" eligible for validation.
    // (see also: object.cpp/Object::ValidateInner)
    // Make sure it will see cleaned up state to prevent triggering occasional verification failures.
    // And make sure the write happens before updating "allocated"
    ((void**)allocated)[-1] = 0;    // clear the sync block
    VOLATILE_MEMORY_BARRIER();
#endif //VERIFY_HEAP && _DEBUG

    uint8_t* old_alloc;
    old_alloc = allocated;
    dprintf (3, ("found fit at end of seg: %p", old_alloc));

#ifdef BACKGROUND_GC
    if (cookie != -1)
    {
        bgc_record_uoh_end_seg_allocation (gen_number, limit);
        allocated += limit;
        bgc_uoh_alloc_clr (old_alloc, limit, acontext, flags, gen_number, align_const, cookie, TRUE, seg);
    }
    else
#endif //BACKGROUND_GC
    {
        // In a contiguous AC case with GC_ALLOC_ZEROING_OPTIONAL, deduct unspent space from the limit to
        // clear only what is necessary.
        if ((flags & GC_ALLOC_ZEROING_OPTIONAL) &&
            ((allocated == acontext->alloc_limit) ||
             (allocated == (acontext->alloc_limit + Align (min_obj_size, align_const)))))
        {
            assert(gen_number == 0);
            assert(allocated > acontext->alloc_ptr);

            size_t extra = allocated - acontext->alloc_ptr;
            limit -= extra;

            // Since we are not consuming all the memory we already deducted from the budget,
            // we should put the extra back.
            dynamic_data* dd = dynamic_data_of (0);
            dd_new_allocation (dd) += extra;

            // add space for an AC continuity divider
            limit += Align(min_obj_size, align_const);
        }

#ifdef BACKGROUND_GC
        bgc_record_uoh_end_seg_allocation (gen_number, limit);
#endif

        allocated += limit;
        adjust_limit_clr (old_alloc, limit, size, acontext, flags, seg, align_const, gen_number);
    }

    return TRUE;

found_no_fit:

    return FALSE;
}

BOOL gc_heap::uoh_a_fit_segment_end_p (int gen_number,
                                       size_t size,
                                       alloc_context* acontext,
                                       uint32_t flags,
                                       int align_const,
                                       BOOL* commit_failed_p,
                                       oom_reason* oom_r)
{
    *commit_failed_p = FALSE;

    generation* gen = generation_of (gen_number);
    heap_segment* seg = generation_allocation_segment (gen);
    BOOL can_allocate_p = FALSE;

    while (seg)
    {
#ifdef BACKGROUND_GC
        if (seg->flags & heap_segment_flags_uoh_delete)
        {
            dprintf (3, ("h%d skipping seg %zx to be deleted", heap_number, (size_t)seg));
        }
        else
#endif //BACKGROUND_GC
        {
            if (a_fit_segment_end_p (gen_number, seg, (size - Align (min_obj_size, align_const)),
                                        acontext, flags, align_const, commit_failed_p))
            {
                acontext->alloc_limit += Align (min_obj_size, align_const);
                can_allocate_p = TRUE;
                break;
            }

            if (*commit_failed_p)
            {
                *oom_r = oom_cant_commit;
                break;
            }
        }

        seg = heap_segment_next_rw (seg);
    }

    if (can_allocate_p)
    {
        generation_end_seg_allocated (gen) += size;
    }

    return can_allocate_p;
}

#ifdef BACKGROUND_GC
inline
enter_msl_status gc_heap::wait_for_background (alloc_wait_reason awr, bool loh_p)
{
    GCSpinLock* msl = loh_p ? &more_space_lock_uoh : &more_space_lock_soh;
    enter_msl_status msl_status = msl_entered;

    dprintf (2, ("BGC is already in progress, waiting for it to finish"));
    add_saved_spinlock_info (loh_p, me_release, mt_wait_bgc, msl_status);
    leave_spin_lock (msl);
    background_gc_wait (awr);
    msl_status = enter_spin_lock_msl (msl);
    add_saved_spinlock_info (loh_p, me_acquire, mt_wait_bgc, msl_status);

    return msl_status;
}

bool gc_heap::wait_for_bgc_high_memory (alloc_wait_reason awr, bool loh_p, enter_msl_status* msl_status)
{
    bool wait_p = false;
    if (gc_heap::background_running_p())
    {
        uint32_t memory_load;
        get_memory_info (&memory_load);
        if (memory_load >= m_high_memory_load_th)
        {
            wait_p = true;
            dprintf (GTC_LOG, ("high mem - wait for BGC to finish, wait reason: %d", awr));
            *msl_status = wait_for_background (awr, loh_p);
        }
    }

    return wait_p;
}

#endif //BACKGROUND_GC

// We request to trigger an ephemeral GC but we may get a full compacting GC.
// return TRUE if that's the case.
BOOL gc_heap::trigger_ephemeral_gc (gc_reason gr, enter_msl_status* msl_status)
{
#ifdef BACKGROUND_GC
    wait_for_bgc_high_memory (awr_loh_oos_bgc, false, msl_status);
    if (*msl_status == msl_retry_different_heap) return FALSE;
#endif //BACKGROUND_GC

    BOOL did_full_compact_gc = FALSE;

    dprintf (1, ("h%d triggering a gen1 GC", heap_number));
    size_t last_full_compact_gc_count = get_full_compact_gc_count();
    vm_heap->GarbageCollectGeneration(max_generation - 1, gr);

#ifdef MULTIPLE_HEAPS
    *msl_status = enter_spin_lock_msl (&more_space_lock_soh);
    if (*msl_status == msl_retry_different_heap) return FALSE;
    add_saved_spinlock_info (false, me_acquire, mt_t_eph_gc, *msl_status);
#endif //MULTIPLE_HEAPS

    size_t current_full_compact_gc_count = get_full_compact_gc_count();

    if (current_full_compact_gc_count > last_full_compact_gc_count)
    {
        dprintf (2, ("attempted to trigger an ephemeral GC and got a full compacting GC"));
        did_full_compact_gc = TRUE;
    }

    return did_full_compact_gc;
}

BOOL gc_heap::soh_try_fit (int gen_number,
                           size_t size,
                           alloc_context* acontext,
                           uint32_t flags,
                           int align_const,
                           BOOL* commit_failed_p,
                           BOOL* short_seg_end_p)
{
    BOOL can_allocate = TRUE;
    if (short_seg_end_p)
    {
        *short_seg_end_p = FALSE;
    }

    can_allocate = a_fit_free_list_p (gen_number, size, acontext, flags, align_const);
    if (!can_allocate)
    {
        if (short_seg_end_p)
        {
            *short_seg_end_p = short_on_end_of_seg (ephemeral_heap_segment);
        }
        // If the caller doesn't care, we always try to fit at the end of seg;
        // otherwise we would only try if we are actually not short at end of seg.
        if (!short_seg_end_p || !(*short_seg_end_p))
        {
#ifdef USE_REGIONS
            while (ephemeral_heap_segment)
#endif //USE_REGIONS
            {
                can_allocate = a_fit_segment_end_p (gen_number, ephemeral_heap_segment, size,
                                                    acontext, flags, align_const, commit_failed_p);
#ifdef USE_REGIONS
                if (can_allocate)
                {
                    break;
                }

                dprintf (REGIONS_LOG, ("h%d fixing region %p end to alloc ptr: %p, alloc_allocated %p",
                    heap_number, heap_segment_mem (ephemeral_heap_segment), acontext->alloc_ptr,
                    alloc_allocated));

                fix_allocation_context (acontext, TRUE, FALSE);
                fix_youngest_allocation_area();

                heap_segment* next_seg = heap_segment_next (ephemeral_heap_segment);
                bool new_seg = false;

                if (!next_seg)
                {
                    assert (ephemeral_heap_segment == generation_tail_region (generation_of (gen_number)));
                    next_seg = get_new_region (gen_number);
                    new_seg = true;
                }

                if (next_seg)
                {
                    dprintf (REGIONS_LOG, ("eph seg %p -> next %p",
                        heap_segment_mem (ephemeral_heap_segment), heap_segment_mem (next_seg)));
                    ephemeral_heap_segment = next_seg;
                    if (new_seg)
                    {
                        GCToEEInterface::DiagAddNewRegion(
                            heap_segment_gen_num (next_seg),
                            heap_segment_mem (next_seg),
                            heap_segment_allocated (next_seg),
                            heap_segment_reserved (next_seg)
                        );
                    }
                }
                else
                {
                    *commit_failed_p = TRUE;
                    dprintf (REGIONS_LOG, ("couldn't get a new ephemeral region"));
                    return FALSE;
                }

                alloc_allocated = heap_segment_allocated (ephemeral_heap_segment);
                dprintf (REGIONS_LOG, ("h%d alloc_allocated is now %p", heap_number, alloc_allocated));
#endif //USE_REGIONS
            }
        }
    }

    return can_allocate;
}

allocation_state gc_heap::allocate_soh (int gen_number,
                                          size_t size,
                                          alloc_context* acontext,
                                          uint32_t flags,
                                          int align_const)
{
    enter_msl_status msl_status = msl_entered;

#if defined (BACKGROUND_GC) && !defined (MULTIPLE_HEAPS)
    if (gc_heap::background_running_p())
    {
        background_soh_alloc_count++;
        if ((background_soh_alloc_count % bgc_alloc_spin_count) == 0)
        {
            add_saved_spinlock_info (false, me_release, mt_alloc_small, msl_status);
            leave_spin_lock (&more_space_lock_soh);
            bool cooperative_mode = enable_preemptive();
            GCToOSInterface::Sleep (bgc_alloc_spin);
            disable_preemptive (cooperative_mode);

            msl_status = enter_spin_lock_msl (&more_space_lock_soh);
            if (msl_status == msl_retry_different_heap) return a_state_retry_allocate;

            add_saved_spinlock_info (false, me_acquire, mt_alloc_small, msl_status);
        }
        else
        {
            //GCToOSInterface::YieldThread (0);
        }
    }
#endif //BACKGROUND_GC && !MULTIPLE_HEAPS

    gc_reason gr = reason_oos_soh;
    oom_reason oom_r = oom_no_failure;

    // No variable values should be "carried over" from one state to the other.
    // That's why there are local variable for each state

    allocation_state soh_alloc_state = a_state_start;

    // If we can get a new seg it means allocation will succeed.
    while (1)
    {
        dprintf (3, ("[h%d]soh state is %s", heap_number, allocation_state_str[soh_alloc_state]));

        switch (soh_alloc_state)
        {
            case a_state_can_allocate:
            case a_state_cant_allocate:
            {
                goto exit;
            }
            case a_state_start:
            {
                soh_alloc_state = a_state_try_fit;
                break;
            }
            case a_state_try_fit:
            {
                BOOL commit_failed_p = FALSE;
                BOOL can_use_existing_p = FALSE;

                can_use_existing_p = soh_try_fit (gen_number, size, acontext, flags,
                                                  align_const, &commit_failed_p,
                                                  NULL);
                soh_alloc_state = (can_use_existing_p ?
                                        a_state_can_allocate :
                                        (commit_failed_p ?
                                            a_state_trigger_full_compact_gc :
                                            a_state_trigger_ephemeral_gc));
                break;
            }
            case a_state_try_fit_after_bgc:
            {
                BOOL commit_failed_p = FALSE;
                BOOL can_use_existing_p = FALSE;
                BOOL short_seg_end_p = FALSE;

                can_use_existing_p = soh_try_fit (gen_number, size, acontext, flags,
                                                  align_const, &commit_failed_p,
                                                  &short_seg_end_p);
                soh_alloc_state = (can_use_existing_p ?
                                        a_state_can_allocate :
                                        (short_seg_end_p ?
                                            a_state_trigger_2nd_ephemeral_gc :
                                            a_state_trigger_full_compact_gc));
                break;
            }
            case a_state_try_fit_after_cg:
            {
                BOOL commit_failed_p = FALSE;
                BOOL can_use_existing_p = FALSE;
                BOOL short_seg_end_p = FALSE;

                can_use_existing_p = soh_try_fit (gen_number, size, acontext, flags,
                                                  align_const, &commit_failed_p,
                                                  &short_seg_end_p);

                if (can_use_existing_p)
                {
                    soh_alloc_state = a_state_can_allocate;
                }
#ifdef MULTIPLE_HEAPS
                else if (gen0_allocated_after_gc_p)
                {
                    // some other threads already grabbed the more space lock and allocated
                    // so we should attempt an ephemeral GC again.
                    soh_alloc_state = a_state_trigger_ephemeral_gc;
                }
#endif //MULTIPLE_HEAPS
                else if (short_seg_end_p)
                {
                    soh_alloc_state = a_state_cant_allocate;
                    oom_r = oom_budget;
                }
                else
                {
                    assert (commit_failed_p || heap_hard_limit);
                    soh_alloc_state = a_state_cant_allocate;
                    oom_r = oom_cant_commit;
                }
                break;
            }
            case a_state_check_and_wait_for_bgc:
            {
                BOOL bgc_in_progress_p = FALSE;
                BOOL did_full_compacting_gc = FALSE;

                bgc_in_progress_p = check_and_wait_for_bgc (awr_gen0_oos_bgc, &did_full_compacting_gc, false, &msl_status);
                if (msl_status == msl_retry_different_heap) return a_state_retry_allocate;

                soh_alloc_state = (did_full_compacting_gc ?
                                        a_state_try_fit_after_cg :
                                        a_state_try_fit_after_bgc);
                break;
            }
            case a_state_trigger_ephemeral_gc:
            {
                BOOL commit_failed_p = FALSE;
                BOOL can_use_existing_p = FALSE;
                BOOL short_seg_end_p = FALSE;
                BOOL bgc_in_progress_p = FALSE;
                BOOL did_full_compacting_gc = FALSE;

                did_full_compacting_gc = trigger_ephemeral_gc (gr, &msl_status);
                if (msl_status == msl_retry_different_heap) return a_state_retry_allocate;

                if (did_full_compacting_gc)
                {
                    soh_alloc_state = a_state_try_fit_after_cg;
                }
                else
                {
                    can_use_existing_p = soh_try_fit (gen_number, size, acontext, flags,
                                                      align_const, &commit_failed_p,
                                                      &short_seg_end_p);
#ifdef BACKGROUND_GC
                    bgc_in_progress_p = gc_heap::background_running_p();
#endif //BACKGROUND_GC

                    if (can_use_existing_p)
                    {
                        soh_alloc_state = a_state_can_allocate;
                    }
                    else
                    {
                        if (short_seg_end_p)
                        {
#ifndef USE_REGIONS
                            if (should_expand_in_full_gc)
                            {
                                dprintf (2, ("gen1 GC wanted to expand!"));
                                soh_alloc_state = a_state_trigger_full_compact_gc;
                            }
                            else
#endif //!USE_REGIONS
                            {
                                soh_alloc_state = (bgc_in_progress_p ?
                                                        a_state_check_and_wait_for_bgc :
                                                        a_state_trigger_full_compact_gc);
                            }
                        }
                        else if (commit_failed_p)
                        {
                            soh_alloc_state = a_state_trigger_full_compact_gc;
                        }
                        else
                        {
#ifdef MULTIPLE_HEAPS
                            // some other threads already grabbed the more space lock and allocated
                            // so we should attempt an ephemeral GC again.
                            assert (gen0_allocated_after_gc_p);
                            soh_alloc_state = a_state_trigger_ephemeral_gc;
#else //MULTIPLE_HEAPS
                            assert (!"shouldn't get here");
#endif //MULTIPLE_HEAPS
                        }
                    }
                }
                break;
            }
            case a_state_trigger_2nd_ephemeral_gc:
            {
                BOOL commit_failed_p = FALSE;
                BOOL can_use_existing_p = FALSE;
                BOOL short_seg_end_p = FALSE;
                BOOL did_full_compacting_gc = FALSE;

                did_full_compacting_gc = trigger_ephemeral_gc (gr, &msl_status);
                if (msl_status == msl_retry_different_heap) return a_state_retry_allocate;

                if (did_full_compacting_gc)
                {
                    soh_alloc_state = a_state_try_fit_after_cg;
                }
                else
                {
                    can_use_existing_p = soh_try_fit (gen_number, size, acontext, flags,
                                                      align_const, &commit_failed_p,
                                                      &short_seg_end_p);
                    if (short_seg_end_p || commit_failed_p)
                    {
                        soh_alloc_state = a_state_trigger_full_compact_gc;
                    }
                    else
                    {
                        assert (can_use_existing_p);
                        soh_alloc_state = a_state_can_allocate;
                    }
                }
                break;
            }
            case a_state_trigger_full_compact_gc:
            {
                if (fgn_maxgen_percent)
                {
                    dprintf (2, ("FGN: SOH doing last GC before we throw OOM"));
                    send_full_gc_notification (max_generation, FALSE);
                }

                BOOL got_full_compacting_gc = FALSE;

                got_full_compacting_gc = trigger_full_compact_gc (gr, &oom_r, false, &msl_status);
                if (msl_status == msl_retry_different_heap) return a_state_retry_allocate;

                soh_alloc_state = (got_full_compacting_gc ? a_state_try_fit_after_cg : a_state_cant_allocate);
                break;
            }
            default:
            {
                assert (!"Invalid state!");
                break;
            }
        }
    }

exit:
    if (soh_alloc_state == a_state_cant_allocate)
    {
        assert (oom_r != oom_no_failure);
        handle_oom (oom_r,
                    size,
                    heap_segment_allocated (ephemeral_heap_segment),
                    heap_segment_reserved (ephemeral_heap_segment));

        add_saved_spinlock_info (false, me_release, mt_alloc_small_cant, msl_entered);
        leave_spin_lock (&more_space_lock_soh);
    }

    assert ((soh_alloc_state == a_state_can_allocate) ||
            (soh_alloc_state == a_state_cant_allocate) ||
            (soh_alloc_state == a_state_retry_allocate));

    return soh_alloc_state;
}

#ifdef BACKGROUND_GC
inline
void gc_heap::bgc_track_uoh_alloc()
{
    if (current_c_gc_state == c_gc_state_planning)
    {
        Interlocked::Increment (&uoh_alloc_thread_count);
        dprintf (3, ("h%d: inc lc: %d", heap_number, (int32_t)uoh_alloc_thread_count));
    }
}

inline
void gc_heap::bgc_untrack_uoh_alloc()
{
    if (current_c_gc_state == c_gc_state_planning)
    {
        Interlocked::Decrement (&uoh_alloc_thread_count);
        dprintf (3, ("h%d: dec lc: %d", heap_number, (int32_t)uoh_alloc_thread_count));
    }
}
#endif //BACKGROUND_GC

size_t gc_heap::get_uoh_seg_size (size_t size)
{
    size_t default_seg_size =
#ifdef USE_REGIONS
        global_region_allocator.get_large_region_alignment();
#else
        min_uoh_segment_size;
#endif //USE_REGIONS
    size_t align_size =  default_seg_size;
    int align_const = get_alignment_constant (FALSE);
    size_t large_seg_size = align_on_page (
        max (default_seg_size,
            ((size + 2 * Align(min_obj_size, align_const) + OS_PAGE_SIZE +
            align_size) / align_size * align_size)));
    return large_seg_size;
}

BOOL gc_heap::uoh_get_new_seg (int gen_number,
                               size_t size,
                               BOOL* did_full_compact_gc,
                               oom_reason* oom_r,
                               enter_msl_status* msl_status)
{
    *did_full_compact_gc = FALSE;

    size_t seg_size = get_uoh_seg_size (size);

    heap_segment* new_seg = get_uoh_segment (gen_number, seg_size, did_full_compact_gc, msl_status);
    if (*msl_status == msl_retry_different_heap) return FALSE;

    if (new_seg && (gen_number == loh_generation))
    {
        loh_alloc_since_cg += seg_size;
    }
    else
    {
        *oom_r = oom_loh;
    }

    return (new_seg != 0);
}

// PERF TODO: this is too aggressive; and in hard limit we should
// count the actual allocated bytes instead of only updating it during
// getting a new seg.
BOOL gc_heap::retry_full_compact_gc (size_t size)
{
    size_t seg_size = get_uoh_seg_size (size);

    if (loh_alloc_since_cg >= (2 * (uint64_t)seg_size))
    {
        return TRUE;
    }

#ifdef MULTIPLE_HEAPS
    uint64_t total_alloc_size = 0;
    for (int i = 0; i < n_heaps; i++)
    {
        total_alloc_size += g_heaps[i]->loh_alloc_since_cg;
    }

    if (total_alloc_size >= (2 * (uint64_t)seg_size))
    {
        return TRUE;
    }
#endif //MULTIPLE_HEAPS

    return FALSE;
}

BOOL gc_heap::check_and_wait_for_bgc (alloc_wait_reason awr,
                                      BOOL* did_full_compact_gc,
                                      bool loh_p,
                                      enter_msl_status* msl_status)
{
    BOOL bgc_in_progress = FALSE;
    *did_full_compact_gc = FALSE;
#ifdef BACKGROUND_GC
    if (gc_heap::background_running_p())
    {
        bgc_in_progress = TRUE;
        size_t last_full_compact_gc_count = get_full_compact_gc_count();
        *msl_status = wait_for_background (awr, loh_p);
        size_t current_full_compact_gc_count = get_full_compact_gc_count();
        if (current_full_compact_gc_count > last_full_compact_gc_count)
        {
            *did_full_compact_gc = TRUE;
        }
    }
#endif //BACKGROUND_GC

    return bgc_in_progress;
}

BOOL gc_heap::uoh_try_fit (int gen_number,
                           size_t size,
                           alloc_context* acontext,
                           uint32_t flags,
                           int align_const,
                           BOOL* commit_failed_p,
                           oom_reason* oom_r)
{
    BOOL can_allocate = TRUE;

    if (!a_fit_free_list_uoh_p (size, acontext, flags, align_const, gen_number))
    {
        can_allocate = uoh_a_fit_segment_end_p (gen_number, size,
                                                acontext, flags, align_const,
                                                commit_failed_p, oom_r);

    }

    return can_allocate;
}

BOOL gc_heap::trigger_full_compact_gc (gc_reason gr,
                                       oom_reason* oom_r,
                                       bool loh_p,
                                       enter_msl_status* msl_status)
{
    BOOL did_full_compact_gc = FALSE;

    size_t last_full_compact_gc_count = get_full_compact_gc_count();

    // Set this so the next GC will be a full compacting GC.
    if (!last_gc_before_oom)
    {
        last_gc_before_oom = TRUE;
    }

#ifdef BACKGROUND_GC
    if (gc_heap::background_running_p())
    {
        *msl_status = wait_for_background (((gr == reason_oos_soh) ? awr_gen0_oos_bgc : awr_loh_oos_bgc), loh_p);
        dprintf (2, ("waited for BGC - done"));
        if (*msl_status == msl_retry_different_heap) return FALSE;
    }
#endif //BACKGROUND_GC

    GCSpinLock* msl = loh_p ? &more_space_lock_uoh : &more_space_lock_soh;
    size_t current_full_compact_gc_count = get_full_compact_gc_count();
    if (current_full_compact_gc_count > last_full_compact_gc_count)
    {
        dprintf (3, ("a full compacting GC triggered while waiting for BGC (%zd->%zd)", last_full_compact_gc_count, current_full_compact_gc_count));
        assert (current_full_compact_gc_count > last_full_compact_gc_count);
        did_full_compact_gc = TRUE;
        goto exit;
    }

    dprintf (3, ("h%d full GC", heap_number));

    *msl_status = trigger_gc_for_alloc (max_generation, gr, msl, loh_p, mt_t_full_gc);

    current_full_compact_gc_count = get_full_compact_gc_count();

    if (current_full_compact_gc_count == last_full_compact_gc_count)
    {
        dprintf (2, ("attempted to trigger a full compacting GC but didn't get it"));
        // We requested a full GC but didn't get because of the elevation logic
        // which means we should fail.
        *oom_r = oom_unproductive_full_gc;
    }
    else
    {
        dprintf (3, ("h%d: T full compacting GC (%zd->%zd)",
            heap_number,
            last_full_compact_gc_count,
            current_full_compact_gc_count));

        assert (current_full_compact_gc_count > last_full_compact_gc_count);
        did_full_compact_gc = TRUE;
    }

exit:
    return did_full_compact_gc;
}

#ifdef RECORD_LOH_STATE
void gc_heap::add_saved_loh_state (allocation_state loh_state_to_save, EEThreadId thread_id)
{
    // When the state is can_allocate we already have released the more
    // space lock. So we are not logging states here since this code
    // is not thread safe.
    if (loh_state_to_save != a_state_can_allocate)
    {
        last_loh_states[loh_state_index].alloc_state = loh_state_to_save;
        last_loh_states[loh_state_index].gc_index = VolatileLoadWithoutBarrier (&settings.gc_index);
        last_loh_states[loh_state_index].thread_id = thread_id;
        loh_state_index++;

        if (loh_state_index == max_saved_loh_states)
        {
            loh_state_index = 0;
        }

        assert (loh_state_index < max_saved_loh_states);
    }
}
#endif //RECORD_LOH_STATE

bool gc_heap::should_retry_other_heap (int gen_number, size_t size)
{
#ifdef MULTIPLE_HEAPS
    if (heap_hard_limit)
    {
        size_t min_size = dd_min_size (g_heaps[0]->dynamic_data_of (gen_number));
        size_t slack_space = max (commit_min_th, min_size);
        bool retry_p = ((current_total_committed + size) < (heap_hard_limit - slack_space));
        dprintf (1, ("%zd - %zd - total committed %zd - size %zd = %zd, %s",
            heap_hard_limit, slack_space, current_total_committed, size,
            (heap_hard_limit - slack_space - current_total_committed - size),
            (retry_p ? "retry" : "no retry")));
        return retry_p;
    }
    else
#endif //MULTIPLE_HEAPS
    {
        return false;
    }
}

#ifdef BACKGROUND_GC
uoh_allocation_action gc_heap::get_bgc_allocate_action (int gen_number)
{
    int uoh_idx = gen_number - uoh_start_generation;

    // We always allocate normally if the total size is small enough.
    if (bgc_uoh_current_size[uoh_idx] < (dd_min_size (dynamic_data_of (gen_number)) * 10))
    {
        return uoh_alloc_normal;
    }

#ifndef USE_REGIONS
    // This is legacy behavior for segments - segments' sizes are usually very stable. But for regions we could
    // have released a bunch of regions into the free pool during the last gen2 GC so checking the last UOH size
    // doesn't make sense.
    if (bgc_begin_uoh_size[uoh_idx] >= (2 * end_uoh_size[uoh_idx]))
    {
        dprintf (3, ("h%d alloc-ed too much before bgc started, last end %Id, this start %Id, wait",
            heap_number, end_uoh_size[uoh_idx], bgc_begin_uoh_size[uoh_idx]));
        return uoh_alloc_wait;
    }
#endif //USE_REGIONS

    size_t size_increased = bgc_uoh_current_size[uoh_idx] - bgc_begin_uoh_size[uoh_idx];
    float size_increased_ratio = (float)size_increased / (float)bgc_begin_uoh_size[uoh_idx];

    if (size_increased_ratio < bgc_uoh_inc_ratio_alloc_normal)
    {
        return uoh_alloc_normal;
    }
    else if (size_increased_ratio > bgc_uoh_inc_ratio_alloc_wait)
    {
        return uoh_alloc_wait;
    }
    else
    {
        return uoh_alloc_yield;
    }
}

void gc_heap::bgc_record_uoh_allocation(int gen_number, size_t size)
{
    assert((gen_number >= uoh_start_generation) && (gen_number < total_generation_count));

    int uoh_idx = gen_number - uoh_start_generation;

    if (gc_heap::background_running_p())
    {
        if (current_c_gc_state == c_gc_state_planning)
        {
            uoh_a_bgc_planning[uoh_idx] += size;
        }
        else
        {
            uoh_a_bgc_marking[uoh_idx] += size;
        }
    }
    else
    {
        uoh_a_no_bgc[uoh_idx] += size;
    }
}

void gc_heap::bgc_record_uoh_end_seg_allocation (int gen_number, size_t size)
{
    if ((gen_number >= uoh_start_generation) && gc_heap::background_running_p())
    {
        int uoh_idx = gen_number - uoh_start_generation;
        bgc_uoh_current_size[uoh_idx] += size;

#ifdef SIMPLE_DPRINTF
        dynamic_data* dd_uoh = dynamic_data_of (gen_number);
        size_t gen_size = generation_size (gen_number);
        dprintf (3, ("h%d g%d size is now %Id (inc-ed %Id), size is %Id (gen size is %Id), budget %.3fmb, new alloc %.3fmb",
            heap_number, gen_number, bgc_uoh_current_size[uoh_idx],
            (bgc_uoh_current_size[uoh_idx] - bgc_begin_uoh_size[uoh_idx]), size, gen_size,
            mb (dd_desired_allocation (dd_uoh)), (dd_new_allocation (dd_uoh) / 1000.0 / 1000.0)));
#endif //SIMPLE_DPRINTF
    }
}
#endif //BACKGROUND_GC

allocation_state gc_heap::allocate_uoh (int gen_number,
                                          size_t size,
                                          alloc_context* acontext,
                                          uint32_t flags,
                                          int align_const)
{
    enter_msl_status msl_status = msl_entered;

    // No variable values should be "carried over" from one state to the other.
    // That's why there are local variable for each state
    allocation_state uoh_alloc_state = a_state_start;

#ifdef SPINLOCK_HISTORY
    current_uoh_alloc_state = uoh_alloc_state;
#endif //SPINLOCK_HISTORY

#ifdef RECORD_LOH_STATE
    EEThreadId current_thread_id;
    current_thread_id.SetToCurrentThread ();
#endif //RECORD_LOH_STATE

#ifdef BACKGROUND_GC
    bgc_record_uoh_allocation(gen_number, size);

    if (gc_heap::background_running_p())
    {
        uoh_allocation_action action = get_bgc_allocate_action (gen_number);

        if (action == uoh_alloc_yield)
        {
            add_saved_spinlock_info (true, me_release, mt_alloc_large, msl_status);
            leave_spin_lock (&more_space_lock_uoh);
            bool cooperative_mode = enable_preemptive();
            GCToOSInterface::YieldThread (0);
            disable_preemptive (cooperative_mode);

            msl_status = enter_spin_lock_msl (&more_space_lock_uoh);
            if (msl_status == msl_retry_different_heap) return a_state_retry_allocate;

            add_saved_spinlock_info (true, me_acquire, mt_alloc_large, msl_status);
            dprintf (SPINLOCK_LOG, ("[%d]spin Emsl uoh", heap_number));
        }
        else if (action == uoh_alloc_wait)
        {
            dynamic_data* dd_uoh = dynamic_data_of (loh_generation);
            dprintf (3, ("h%d WAIT loh begin %.3fmb, current size recorded is %.3fmb(begin+%.3fmb), budget %.3fmb, new alloc %.3fmb (alloc-ed %.3fmb)",
                heap_number, mb (bgc_begin_uoh_size[0]), mb (bgc_uoh_current_size[0]),
                mb (bgc_uoh_current_size[0] - bgc_begin_uoh_size[0]),
                mb (dd_desired_allocation (dd_uoh)), (dd_new_allocation (dd_uoh) / 1000.0 / 1000.0),
                mb (dd_desired_allocation (dd_uoh) - dd_new_allocation (dd_uoh))));

            msl_status = wait_for_background (awr_uoh_alloc_during_bgc, true);
            check_msl_status ("uoh a_state_acquire_seg", size);
        }
    }
#endif //BACKGROUND_GC

    gc_reason gr = reason_oos_loh;
    generation* gen = generation_of (gen_number);
    oom_reason oom_r = oom_no_failure;
    size_t current_full_compact_gc_count = 0;

    // If we can get a new seg it means allocation will succeed.
    while (1)
    {
        dprintf (3, ("[h%d]loh state is %s", heap_number, allocation_state_str[uoh_alloc_state]));

#ifdef SPINLOCK_HISTORY
        current_uoh_alloc_state = uoh_alloc_state;
#endif //SPINLOCK_HISTORY

#ifdef RECORD_LOH_STATE
        current_uoh_alloc_state = uoh_alloc_state;
        add_saved_loh_state (uoh_alloc_state, current_thread_id);
#endif //RECORD_LOH_STATE
        switch (uoh_alloc_state)
        {
            case a_state_can_allocate:
            case a_state_cant_allocate:
            {
                goto exit;
            }
            case a_state_start:
            {
                uoh_alloc_state = a_state_try_fit;
                break;
            }
            case a_state_try_fit:
            {
                BOOL commit_failed_p = FALSE;
                BOOL can_use_existing_p = FALSE;

                can_use_existing_p = uoh_try_fit (gen_number, size, acontext, flags,
                                                  align_const, &commit_failed_p, &oom_r);
                uoh_alloc_state = (can_use_existing_p ?
                                        a_state_can_allocate :
                                        (commit_failed_p ?
                                            a_state_trigger_full_compact_gc :
                                            a_state_acquire_seg));
                assert ((uoh_alloc_state == a_state_can_allocate) == (acontext->alloc_ptr != 0));
                break;
            }
            case a_state_try_fit_new_seg:
            {
                BOOL commit_failed_p = FALSE;
                BOOL can_use_existing_p = FALSE;

                can_use_existing_p = uoh_try_fit (gen_number, size, acontext, flags,
                                                  align_const, &commit_failed_p, &oom_r);
                // Even after we got a new seg it doesn't necessarily mean we can allocate,
                // another LOH allocating thread could have beat us to acquire the msl so
                // we need to try again.
                uoh_alloc_state = (can_use_existing_p ? a_state_can_allocate : a_state_try_fit);
                assert ((uoh_alloc_state == a_state_can_allocate) == (acontext->alloc_ptr != 0));
                break;
            }
            case a_state_try_fit_after_cg:
            {
                BOOL commit_failed_p = FALSE;
                BOOL can_use_existing_p = FALSE;

                can_use_existing_p = uoh_try_fit (gen_number, size, acontext, flags,
                                                  align_const, &commit_failed_p, &oom_r);
                // If we failed to commit, we bail right away 'cause we already did a
                // full compacting GC.
                uoh_alloc_state = (can_use_existing_p ?
                                        a_state_can_allocate :
                                        (commit_failed_p ?
                                            a_state_cant_allocate :
                                            a_state_acquire_seg_after_cg));
                assert ((uoh_alloc_state == a_state_can_allocate) == (acontext->alloc_ptr != 0));
                break;
            }
            case a_state_try_fit_after_bgc:
            {
                BOOL commit_failed_p = FALSE;
                BOOL can_use_existing_p = FALSE;

                can_use_existing_p = uoh_try_fit (gen_number, size, acontext, flags,
                                                  align_const, &commit_failed_p, &oom_r);
                uoh_alloc_state = (can_use_existing_p ?
                                        a_state_can_allocate :
                                        (commit_failed_p ?
                                            a_state_trigger_full_compact_gc :
                                            a_state_acquire_seg_after_bgc));
                assert ((uoh_alloc_state == a_state_can_allocate) == (acontext->alloc_ptr != 0));
                break;
            }
            case a_state_acquire_seg:
            {
                BOOL can_get_new_seg_p = FALSE;
                BOOL did_full_compacting_gc = FALSE;

                current_full_compact_gc_count = get_full_compact_gc_count();

                can_get_new_seg_p = uoh_get_new_seg (gen_number, size, &did_full_compacting_gc, &oom_r, &msl_status);
                check_msl_status ("uoh a_state_acquire_seg", size);

                uoh_alloc_state = (can_get_new_seg_p ?
                                        a_state_try_fit_new_seg :
                                        (did_full_compacting_gc ?
                                            a_state_check_retry_seg :
                                            a_state_check_and_wait_for_bgc));
                break;
            }
            case a_state_acquire_seg_after_cg:
            {
                BOOL can_get_new_seg_p = FALSE;
                BOOL did_full_compacting_gc = FALSE;

                current_full_compact_gc_count = get_full_compact_gc_count();

                can_get_new_seg_p = uoh_get_new_seg (gen_number, size, &did_full_compacting_gc, &oom_r, &msl_status);
                check_msl_status ("uoh a_state_acquire_seg_after_cg", size);

                // Since we release the msl before we try to allocate a seg, other
                // threads could have allocated a bunch of segments before us so
                // we might need to retry.
                uoh_alloc_state = (can_get_new_seg_p ?
                                        a_state_try_fit_after_cg :
                                        a_state_check_retry_seg);
                break;
            }
            case a_state_acquire_seg_after_bgc:
            {
                BOOL can_get_new_seg_p = FALSE;
                BOOL did_full_compacting_gc = FALSE;

                current_full_compact_gc_count = get_full_compact_gc_count();

                can_get_new_seg_p = uoh_get_new_seg (gen_number, size, &did_full_compacting_gc, &oom_r, &msl_status);
                check_msl_status ("uoh a_state_acquire_seg_after_bgc", size);

                uoh_alloc_state = (can_get_new_seg_p ?
                                        a_state_try_fit_new_seg :
                                        (did_full_compacting_gc ?
                                            a_state_check_retry_seg :
                                            a_state_trigger_full_compact_gc));
                assert ((uoh_alloc_state != a_state_cant_allocate) || (oom_r != oom_no_failure));
                break;
            }
            case a_state_check_and_wait_for_bgc:
            {
                BOOL bgc_in_progress_p = FALSE;
                BOOL did_full_compacting_gc = FALSE;

                bgc_in_progress_p = check_and_wait_for_bgc (awr_loh_oos_bgc, &did_full_compacting_gc, true, &msl_status);
                check_msl_status ("uoh a_state_check_and_wait_for_bgc", size);

                uoh_alloc_state = (!bgc_in_progress_p ?
                                        a_state_trigger_full_compact_gc :
                                        (did_full_compacting_gc ?
                                            a_state_try_fit_after_cg :
                                            a_state_try_fit_after_bgc));
                break;
            }
            case a_state_trigger_full_compact_gc:
            {
                if (fgn_maxgen_percent)
                {
                    dprintf (2, ("FGN: LOH doing last GC before we throw OOM"));
                    send_full_gc_notification (max_generation, FALSE);
                }

                BOOL got_full_compacting_gc = FALSE;

                got_full_compacting_gc = trigger_full_compact_gc (gr, &oom_r, true, &msl_status);
                check_msl_status ("uoh a_state_trigger_full_compact_gc", size);

                uoh_alloc_state = (got_full_compacting_gc ? a_state_try_fit_after_cg : a_state_cant_allocate);
                assert ((uoh_alloc_state != a_state_cant_allocate) || (oom_r != oom_no_failure));
                break;
            }
            case a_state_check_retry_seg:
            {
                BOOL should_retry_gc = retry_full_compact_gc (size);
                BOOL should_retry_get_seg = FALSE;
                if (!should_retry_gc)
                {
                    size_t last_full_compact_gc_count = current_full_compact_gc_count;
                    current_full_compact_gc_count = get_full_compact_gc_count();
                    if (current_full_compact_gc_count > last_full_compact_gc_count)
                    {
                        should_retry_get_seg = TRUE;
                    }
                }

                uoh_alloc_state = (should_retry_gc ?
                                        a_state_trigger_full_compact_gc :
                                        (should_retry_get_seg ?
                                            a_state_try_fit_after_cg :
                                            a_state_cant_allocate));
                assert ((uoh_alloc_state != a_state_cant_allocate) || (oom_r != oom_no_failure));
                break;
            }
            default:
            {
                assert (!"Invalid state!");
                break;
            }
        }
    }

exit:
    if (uoh_alloc_state == a_state_cant_allocate)
    {
        assert (oom_r != oom_no_failure);

        if ((oom_r != oom_cant_commit) && should_retry_other_heap (gen_number, size))
        {
            uoh_alloc_state = a_state_retry_allocate;
        }
        else
        {
            handle_oom (oom_r,
                        size,
                        0,
                        0);
        }
        add_saved_spinlock_info (true, me_release, mt_alloc_large_cant, msl_entered);
        leave_spin_lock (&more_space_lock_uoh);
    }

    assert ((uoh_alloc_state == a_state_can_allocate) ||
            (uoh_alloc_state == a_state_cant_allocate) ||
            (uoh_alloc_state == a_state_retry_allocate));
    return uoh_alloc_state;
}

// BGC's final mark phase will acquire the msl, so release it here and re-acquire.
enter_msl_status gc_heap::trigger_gc_for_alloc (int gen_number, gc_reason gr,
                                    GCSpinLock* msl, bool loh_p,
                                    msl_take_state take_state)
{
    enter_msl_status msl_status = msl_entered;

#ifdef BACKGROUND_GC
    if (loh_p)
    {
#ifdef MULTIPLE_HEAPS
#ifdef STRESS_DYNAMIC_HEAP_COUNT
        uoh_msl_before_gc_p = true;
#endif //STRESS_DYNAMIC_HEAP_COUNT
        dprintf (5555, ("h%d uoh alloc before GC", heap_number));
#endif //MULTIPLE_HEAPS
        add_saved_spinlock_info (loh_p, me_release, take_state, msl_status);
        leave_spin_lock (msl);
    }
#endif //BACKGROUND_GC

#ifdef MULTIPLE_HEAPS
    if (!loh_p)
    {
        add_saved_spinlock_info (loh_p, me_release, take_state, msl_status);
        leave_spin_lock (msl);
    }
#endif //MULTIPLE_HEAPS

    vm_heap->GarbageCollectGeneration (gen_number, gr);

#ifdef MULTIPLE_HEAPS
    if (!loh_p)
    {
        msl_status = enter_spin_lock_msl (msl);
        add_saved_spinlock_info (loh_p, me_acquire, take_state, msl_status);
    }
#endif //MULTIPLE_HEAPS

#ifdef BACKGROUND_GC
    if (loh_p)
    {
        msl_status = enter_spin_lock_msl (msl);
        add_saved_spinlock_info (loh_p, me_acquire, take_state, msl_status);
    }
#endif //BACKGROUND_GC

    return msl_status;
}

inline
bool gc_heap::update_alloc_info (int gen_number, size_t allocated_size, size_t* etw_allocation_amount)
{
    bool exceeded_p = false;
    int oh_index = gen_to_oh (gen_number);
    allocated_since_last_gc[oh_index] += allocated_size;

    size_t& etw_allocated = etw_allocation_running_amount[oh_index];
    etw_allocated += allocated_size;
    if (etw_allocated > etw_allocation_tick)
    {
        *etw_allocation_amount = etw_allocated;
        exceeded_p = true;
        etw_allocated = 0;
    }

    return exceeded_p;
}

allocation_state gc_heap::try_allocate_more_space (alloc_context* acontext, size_t size,
                                    uint32_t flags, int gen_number)
{
    enter_msl_status msl_status = msl_entered;

    if (gc_heap::gc_started)
    {
        wait_for_gc_done();
        //dprintf (5555, ("h%d TAMS g%d %Id returning a_state_retry_allocate!", heap_number, gen_number, size));

        return a_state_retry_allocate;
    }

    bool loh_p = (gen_number > 0);
    GCSpinLock* msl = loh_p ? &more_space_lock_uoh : &more_space_lock_soh;

#ifdef SYNCHRONIZATION_STATS
    int64_t msl_acquire_start = GCToOSInterface::QueryPerformanceCounter();
#endif //SYNCHRONIZATION_STATS

    msl_status = enter_spin_lock_msl (msl);
    check_msl_status ("TAMS", size);
    //if (msl_status == msl_retry_different_heap) return a_state_retry_allocate;
    add_saved_spinlock_info (loh_p, me_acquire, mt_try_alloc, msl_status);
    dprintf (SPINLOCK_LOG, ("[%d]Emsl for alloc", heap_number));
#ifdef SYNCHRONIZATION_STATS
    int64_t msl_acquire = GCToOSInterface::QueryPerformanceCounter() - msl_acquire_start;
    total_msl_acquire += msl_acquire;
    num_msl_acquired++;
    if (msl_acquire > 200)
    {
        num_high_msl_acquire++;
    }
    else
    {
        num_low_msl_acquire++;
    }
#endif //SYNCHRONIZATION_STATS

    dprintf (3, ("requested to allocate %zd bytes on gen%d", size, gen_number));

    int align_const = get_alignment_constant (gen_number <= max_generation);

    if (fgn_maxgen_percent)
    {
        check_for_full_gc (gen_number, size);
    }

#ifdef BGC_SERVO_TUNING
    if ((gen_number != 0) && bgc_tuning::should_trigger_bgc_loh())
    {
        msl_status = trigger_gc_for_alloc (max_generation, reason_bgc_tuning_loh, msl, loh_p, mt_try_servo_budget);
        if (msl_status == msl_retry_different_heap) return a_state_retry_allocate;
    }
    else
#endif //BGC_SERVO_TUNING
    {
        bool trigger_on_budget_loh_p =
#ifdef BGC_SERVO_TUNING
            !bgc_tuning::enable_fl_tuning;
#else
            true;
#endif //BGC_SERVO_TUNING

        bool check_budget_p = true;
        if (gen_number != 0)
        {
            check_budget_p = trigger_on_budget_loh_p;
        }

        if (check_budget_p && !(new_allocation_allowed (gen_number)))
        {
            if (fgn_maxgen_percent && (gen_number == 0))
            {
                // We only check gen0 every so often, so take this opportunity to check again.
                check_for_full_gc (gen_number, size);
            }

#ifdef BACKGROUND_GC
            bool recheck_p = wait_for_bgc_high_memory (awr_gen0_alloc, loh_p, &msl_status);
            if (msl_status == msl_retry_different_heap) return a_state_retry_allocate;
#endif //BACKGROUND_GC

#ifdef SYNCHRONIZATION_STATS
            bad_suspension++;
#endif //SYNCHRONIZATION_STATS
            dprintf (2, ("h%d running out of budget on gen%d, gc", heap_number, gen_number));

#ifdef BACKGROUND_GC
            bool trigger_gc_p = true;
            if (recheck_p)
                trigger_gc_p = !(new_allocation_allowed (gen_number));

            if (trigger_gc_p)
#endif //BACKGROUND_GC
            {
                if (!settings.concurrent || (gen_number == 0))
                {
                    msl_status = trigger_gc_for_alloc (0, ((gen_number == 0) ? reason_alloc_soh : reason_alloc_loh),
                                                       msl, loh_p, mt_try_budget);
                    if (msl_status == msl_retry_different_heap) return a_state_retry_allocate;
                }
            }
        }
    }

    allocation_state can_allocate = ((gen_number == 0) ?
        allocate_soh (gen_number, size, acontext, flags, align_const) :
        allocate_uoh (gen_number, size, acontext, flags, align_const));

    return can_allocate;
}

#ifdef MULTIPLE_HEAPS
void gc_heap::balance_heaps (alloc_context* acontext)
{
    if (acontext->get_alloc_count() < 4)
    {
        if (acontext->get_alloc_count() == 0)
        {
            int home_hp_num = heap_select::select_heap (acontext);
            acontext->set_home_heap (GCHeap::GetHeap (home_hp_num));
            gc_heap* hp = acontext->get_home_heap ()->pGenGCHeap;
            acontext->set_alloc_heap (acontext->get_home_heap ());
            hp->alloc_context_count++;

#ifdef HEAP_BALANCE_INSTRUMENTATION
            uint16_t ideal_proc_no = 0;
            GCToOSInterface::GetCurrentThreadIdealProc (&ideal_proc_no);

            uint32_t proc_no = GCToOSInterface::GetCurrentProcessorNumber ();

            add_to_hb_numa (proc_no, ideal_proc_no,
                home_hp_num, false, true, false);

            dprintf (HEAP_BALANCE_TEMP_LOG, ("TEMPafter GC: 1st alloc on p%3d, h%d, ip: %d",
                proc_no, home_hp_num, ideal_proc_no));
#endif //HEAP_BALANCE_INSTRUMENTATION
        }
    }
    else
    {
        BOOL set_home_heap = FALSE;
        gc_heap* home_hp = NULL;
        int proc_hp_num = 0;

#ifdef HEAP_BALANCE_INSTRUMENTATION
        bool alloc_count_p = true;
        bool multiple_procs_p = false;
        bool set_ideal_p = false;
        uint32_t proc_no = GCToOSInterface::GetCurrentProcessorNumber ();
        uint32_t last_proc_no = proc_no;
#endif //HEAP_BALANCE_INSTRUMENTATION

        if (heap_select::can_find_heap_fast ())
        {
            assert (acontext->get_home_heap () != NULL);
            home_hp = acontext->get_home_heap ()->pGenGCHeap;
            proc_hp_num = heap_select::select_heap (acontext);

            if (home_hp != gc_heap::g_heaps[proc_hp_num])
            {
#ifdef HEAP_BALANCE_INSTRUMENTATION
                alloc_count_p = false;
#endif //HEAP_BALANCE_INSTRUMENTATION
                set_home_heap = TRUE;
            }
            else if ((acontext->get_alloc_count() & 15) == 0)
                set_home_heap = TRUE;
        }
        else
        {
            if ((acontext->get_alloc_count() & 3) == 0)
                set_home_heap = TRUE;
        }

        if (set_home_heap)
        {
            /*
                        // Since we are balancing up to MAX_SUPPORTED_CPUS, no need for this.
                        if (n_heaps > MAX_SUPPORTED_CPUS)
                        {
                            // on machines with many processors cache affinity is really king, so don't even try
                            // to balance on these.
                            acontext->home_heap = GCHeap::GetHeap( heap_select::select_heap(acontext));
                            acontext->alloc_heap = acontext->home_heap;
                        }
                        else
            */
            {
                gc_heap* org_hp = acontext->get_alloc_heap ()->pGenGCHeap;
                int org_hp_num = org_hp->heap_number;
                int final_alloc_hp_num = org_hp_num;

                dynamic_data* dd = org_hp->dynamic_data_of (0);
                ptrdiff_t org_size = dd_new_allocation (dd);
                ptrdiff_t total_size = (ptrdiff_t)dd_desired_allocation (dd);

#ifdef HEAP_BALANCE_INSTRUMENTATION
                dprintf (HEAP_BALANCE_TEMP_LOG, ("TEMP[p%3d] ph h%3d, hh: %3d, ah: %3d (%dmb-%dmb), ac: %5d(%s)",
                    proc_no, proc_hp_num, home_hp->heap_number,
                    org_hp_num, (total_size / 1024 / 1024), (org_size / 1024 / 1024),
                    acontext->get_alloc_count(),
                    ((proc_hp_num == home_hp->heap_number) ? "AC" : "H")));
#endif //HEAP_BALANCE_INSTRUMENTATION

                int org_alloc_context_count;
                int max_alloc_context_count;
                gc_heap* max_hp;
                int max_hp_num = 0;
                ptrdiff_t max_size;
                size_t local_delta = max (((size_t)org_size >> 6), min_gen0_balance_delta);
                size_t delta = local_delta;

                if (((size_t)org_size + 2 * delta) >= (size_t)total_size)
                {
                    acontext->inc_alloc_count();
                    return;
                }

#ifdef HEAP_BALANCE_INSTRUMENTATION
                proc_no = GCToOSInterface::GetCurrentProcessorNumber ();
                if (proc_no != last_proc_no)
                {
                    dprintf (HEAP_BALANCE_TEMP_LOG, ("TEMPSP: %d->%d", last_proc_no, proc_no));
                    multiple_procs_p = true;
                    last_proc_no = proc_no;
                }

                int new_home_hp_num = heap_select::proc_no_to_heap_no[proc_no];
#else
                int new_home_hp_num = heap_select::select_heap(acontext);
#endif //HEAP_BALANCE_INSTRUMENTATION
                gc_heap* new_home_hp = gc_heap::g_heaps[new_home_hp_num];
                acontext->set_home_heap (new_home_hp->vm_heap);

                int start, end, finish;
                heap_select::get_heap_range_for_heap (new_home_hp_num, &start, &end);
                finish = start + n_heaps;

                do
                {
                    max_hp = org_hp;
                    max_hp_num = org_hp_num;
                    max_size = org_size + delta;
                    org_alloc_context_count = org_hp->alloc_context_count;
                    max_alloc_context_count = org_alloc_context_count;
                    if (org_hp == new_home_hp)
                        max_size = max_size + delta;

                    if (max_alloc_context_count > 1)
                        max_size /= max_alloc_context_count;

                    // check if the new home heap has more space
                    if (org_hp != new_home_hp)
                    {
                        dd = new_home_hp->dynamic_data_of(0);
                        ptrdiff_t size = dd_new_allocation(dd);

                        // favor new home heap over org heap
                        size += delta * 2;

                        int new_home_hp_alloc_context_count = new_home_hp->alloc_context_count;
                        if (new_home_hp_alloc_context_count > 0)
                            size /= (new_home_hp_alloc_context_count + 1);

                        if (size > max_size)
                        {
#ifdef HEAP_BALANCE_INSTRUMENTATION
                            dprintf(HEAP_BALANCE_TEMP_LOG, ("TEMPorg h%d(%dmb), m h%d(%dmb)",
                                org_hp_num, (max_size / 1024 / 1024),
                                new_home_hp_num, (size / 1024 / 1024)));
#endif //HEAP_BALANCE_INSTRUMENTATION

                            max_hp = new_home_hp;
                            max_size = size;
                            max_hp_num = new_home_hp_num;
                            max_alloc_context_count = new_home_hp_alloc_context_count;
                        }
                    }

                    // consider heaps both inside our local NUMA node,
                    // and outside, but with different thresholds
                    enum
                    {
                        LOCAL_NUMA_NODE,
                        REMOTE_NUMA_NODE
                    };

                    for (int pass = LOCAL_NUMA_NODE; pass <= REMOTE_NUMA_NODE; pass++)
                    {
                        int count = end - start;
                        int max_tries = min(count, 4);

                        // we will consider max_tries consecutive (in a circular sense)
                        // other heaps from a semi random starting point

                        // alloc_count often increases by multiples of 16 (due to logic at top of routine),
                        // and we want to advance the starting point by 4 between successive calls,
                        // therefore the shift right by 2 bits
                        int heap_num = start + ((acontext->get_alloc_count() >> 2) + new_home_hp_num) % count;

#ifdef HEAP_BALANCE_INSTRUMENTATION
                        dprintf(HEAP_BALANCE_TEMP_LOG, ("TEMP starting at h%d (home_heap_num = %d, alloc_count = %d)", heap_num, new_home_hp_num, acontext->get_alloc_count()));
#endif //HEAP_BALANCE_INSTRUMENTATION

                        for (int tries = max_tries; --tries >= 0; heap_num++)
                        {
                            // wrap around if we hit the end of our range
                            if (heap_num >= end)
                                heap_num -= count;
                            // wrap around if we hit the end of the heap numbers
                            while (heap_num >= n_heaps)
                                heap_num -= n_heaps;

                            assert (heap_num < n_heaps);
                            gc_heap* hp = gc_heap::g_heaps[heap_num];
                            dd = hp->dynamic_data_of(0);
                            ptrdiff_t size = dd_new_allocation(dd);

#ifdef HEAP_BALANCE_INSTRUMENTATION
                            dprintf(HEAP_BALANCE_TEMP_LOG, ("TEMP looking at h%d(%dmb)",
                                heap_num, (size / 1024 / 1024)));
#endif //HEAP_BALANCE_INSTRUMENTATION
                            // if the size is not bigger than what we already have,
                            // give up immediately, as it can't be a winner...
                            // this is a micro-optimization to avoid fetching the
                            // alloc_context_count and possibly dividing by it
                            if (size <= max_size)
                                continue;

                            int hp_alloc_context_count = hp->alloc_context_count;

                            if (hp_alloc_context_count > 0)
                            {
                                size /= (hp_alloc_context_count + 1);
                            }

                            if (size > max_size)
                            {
#ifdef HEAP_BALANCE_INSTRUMENTATION
                                dprintf(HEAP_BALANCE_TEMP_LOG, ("TEMPorg h%d(%dmb), m h%d(%dmb)",
                                    org_hp_num, (max_size / 1024 / 1024),
                                    hp->heap_number, (size / 1024 / 1024)));
#endif //HEAP_BALANCE_INSTRUMENTATION

                                max_hp = hp;
                                max_size = size;
                                max_hp_num = max_hp->heap_number;
                                max_alloc_context_count = hp_alloc_context_count;
                            }
                        }

                        if ((max_hp == org_hp) && (end < finish))
                        {
                            start = end; end = finish;
                            delta = local_delta * 2; // Make it twice as hard to balance to remote nodes on NUMA.
                        }
                        else
                        {
                            // we already found a better heap, or there are no remote NUMA nodes
                            break;
                        }
                    }
                }
                while (org_alloc_context_count != org_hp->alloc_context_count ||
                       max_alloc_context_count != max_hp->alloc_context_count);

#ifdef HEAP_BALANCE_INSTRUMENTATION
                uint16_t ideal_proc_no_before_set_ideal = 0;
                GCToOSInterface::GetCurrentThreadIdealProc (&ideal_proc_no_before_set_ideal);
#endif //HEAP_BALANCE_INSTRUMENTATION

                if (max_hp != org_hp)
                {
                    final_alloc_hp_num = max_hp->heap_number;

                    org_hp->alloc_context_count--;
                    max_hp->alloc_context_count++;

                    acontext->set_alloc_heap (GCHeap::GetHeap (final_alloc_hp_num));
                    if (!gc_thread_no_affinitize_p)
                    {
                        uint16_t src_proc_no = heap_select::find_proc_no_from_heap_no (org_hp->heap_number);
                        uint16_t dst_proc_no = heap_select::find_proc_no_from_heap_no (max_hp->heap_number);

                        dprintf (HEAP_BALANCE_TEMP_LOG, ("TEMPSW! h%d(p%d)->h%d(p%d)",
                            org_hp_num, src_proc_no, final_alloc_hp_num, dst_proc_no));

#ifdef HEAP_BALANCE_INSTRUMENTATION
                        int current_proc_no_before_set_ideal = GCToOSInterface::GetCurrentProcessorNumber ();
                        if ((uint16_t)current_proc_no_before_set_ideal != last_proc_no)
                        {
                            dprintf (HEAP_BALANCE_TEMP_LOG, ("TEMPSPa: %d->%d", last_proc_no, current_proc_no_before_set_ideal));
                            multiple_procs_p = true;
                        }
#endif //HEAP_BALANCE_INSTRUMENTATION

                        if (!GCToOSInterface::SetCurrentThreadIdealAffinity (src_proc_no, dst_proc_no))
                        {
                            dprintf (HEAP_BALANCE_TEMP_LOG, ("TEMPFailed to set the ideal processor for heap %d %d->%d",
                                org_hp->heap_number, (int)src_proc_no, (int)dst_proc_no));
                        }
#ifdef HEAP_BALANCE_INSTRUMENTATION
                        else
                        {
                            set_ideal_p = true;
                        }
#endif //HEAP_BALANCE_INSTRUMENTATION
                    }
                }

#ifdef HEAP_BALANCE_INSTRUMENTATION
                add_to_hb_numa (proc_no, ideal_proc_no_before_set_ideal,
                    final_alloc_hp_num, multiple_procs_p, alloc_count_p, set_ideal_p);
#endif //HEAP_BALANCE_INSTRUMENTATION
            }
        }
    }
    acontext->inc_alloc_count();
}

ptrdiff_t gc_heap::get_balance_heaps_uoh_effective_budget (int generation_num)
{
#ifndef USE_REGIONS
    if (heap_hard_limit)
    {
        const ptrdiff_t free_list_space = generation_free_list_space (generation_of (generation_num));
        heap_segment* seg = generation_start_segment (generation_of (generation_num));
        assert (heap_segment_next (seg) == nullptr);
        const ptrdiff_t allocated = heap_segment_allocated (seg) - seg->mem;
        // We could calculate the actual end_of_seg_space by taking reserved - allocated,
        // but all heaps have the same reserved memory and this value is only used for comparison.
        return free_list_space - allocated;
    }
    else
#endif // !USE_REGIONS
    {
        return dd_new_allocation (dynamic_data_of (generation_num));
    }
}

gc_heap* gc_heap::balance_heaps_uoh (alloc_context* acontext, size_t alloc_size, int generation_num)
{
    const int home_hp_num = heap_select::select_heap(acontext);
    dprintf (3, ("[h%d] LA: %zd", home_hp_num, alloc_size));
    gc_heap* home_hp = GCHeap::GetHeap(home_hp_num)->pGenGCHeap;
    dynamic_data* dd = home_hp->dynamic_data_of (generation_num);
    const ptrdiff_t home_hp_size = home_hp->get_balance_heaps_uoh_effective_budget (generation_num);

    size_t delta = dd_min_size (dd) / 2;
    int start, end;
    heap_select::get_heap_range_for_heap(home_hp_num, &start, &end);
    const int finish = start + n_heaps;

try_again:
    gc_heap* max_hp = home_hp;
    ptrdiff_t max_size = home_hp_size + delta;

    dprintf (3, ("home hp: %d, max size: %zd",
        home_hp_num,
        max_size));

    for (int i = start; i < end; i++)
    {
        gc_heap* hp = GCHeap::GetHeap(i%n_heaps)->pGenGCHeap;
        const ptrdiff_t size = hp->get_balance_heaps_uoh_effective_budget (generation_num);

        dprintf (3, ("hp: %d, size: %zd", hp->heap_number, size));
        if (size > max_size)
        {
            max_hp = hp;
            max_size = size;
            dprintf (3, ("max hp: %d, max size: %zd",
                max_hp->heap_number,
                max_size));
        }
    }

    if ((max_hp == home_hp) && (end < finish))
    {
        start = end; end = finish;
        delta = dd_min_size (dd) * 3 / 2; // Make it harder to balance to remote nodes on NUMA.
        goto try_again;
    }

    if (max_hp != home_hp)
    {
        dprintf (3, ("uoh: %d(%zd)->%d(%zd)",
            home_hp->heap_number, dd_new_allocation (home_hp->dynamic_data_of (generation_num)),
            max_hp->heap_number, dd_new_allocation (max_hp->dynamic_data_of (generation_num))));
    }

    return max_hp;
}

gc_heap* gc_heap::balance_heaps_uoh_hard_limit_retry (alloc_context* acontext, size_t alloc_size, int generation_num)
{
    assert (heap_hard_limit);
#ifdef USE_REGIONS
    return balance_heaps_uoh (acontext, alloc_size, generation_num);
#else //USE_REGIONS
    const int home_heap = heap_select::select_heap(acontext);
    dprintf (3, ("[h%d] balance_heaps_loh_hard_limit_retry alloc_size: %zd", home_heap, alloc_size));
    int start, end;
    heap_select::get_heap_range_for_heap (home_heap, &start, &end);
    const int finish = start + n_heaps;

    gc_heap* max_hp = nullptr;
    size_t max_end_of_seg_space = alloc_size; // Must be more than this much, or return NULL

try_again:
    {
        for (int i = start; i < end; i++)
        {
            gc_heap* hp = GCHeap::GetHeap (i%n_heaps)->pGenGCHeap;
            heap_segment* seg = generation_start_segment (hp->generation_of (generation_num));
            // With a hard limit, there is only one segment.
            assert (heap_segment_next (seg) == nullptr);
            const size_t end_of_seg_space = heap_segment_reserved (seg) - heap_segment_allocated (seg);
            if (end_of_seg_space >= max_end_of_seg_space)
            {
                dprintf (3, ("Switching heaps in hard_limit_retry! To: [h%d], New end_of_seg_space: %zd", hp->heap_number, end_of_seg_space));
                max_end_of_seg_space = end_of_seg_space;
                max_hp = hp;
            }
        }
    }

    // Only switch to a remote NUMA node if we didn't find space on this one.
    if ((max_hp == nullptr) && (end < finish))
    {
        start = end; end = finish;
        goto try_again;
    }

    return max_hp;
#endif //USE_REGIONS
}
#endif //MULTIPLE_HEAPS

BOOL gc_heap::allocate_more_space(alloc_context* acontext, size_t size,
                                   uint32_t flags, int alloc_generation_number)
{
    allocation_state status = a_state_start;
    int retry_count = 0;

    gc_heap* saved_alloc_heap = 0;

    do
    {
#ifdef MULTIPLE_HEAPS
        if (alloc_generation_number == 0)
        {
            balance_heaps (acontext);
            status = acontext->get_alloc_heap ()->pGenGCHeap->try_allocate_more_space (acontext, size, flags, alloc_generation_number);
        }
        else
        {
            uint64_t start_us = GetHighPrecisionTimeStamp ();

            gc_heap* alloc_heap;
            if (heap_hard_limit && (status == a_state_retry_allocate))
            {
                alloc_heap = balance_heaps_uoh_hard_limit_retry (acontext, size, alloc_generation_number);
                if (alloc_heap == nullptr || (retry_count++ == UOH_ALLOCATION_RETRY_MAX_COUNT))
                {
                    return false;
                }
            }
            else
            {
                alloc_heap = balance_heaps_uoh (acontext, size, alloc_generation_number);
                dprintf (3, ("uoh alloc %Id on h%d", size, alloc_heap->heap_number));
                saved_alloc_heap = alloc_heap;
            }

            bool alloced_on_retry = (status == a_state_retry_allocate);

            status = alloc_heap->try_allocate_more_space (acontext, size, flags, alloc_generation_number);
            dprintf (3, ("UOH h%d %Id returned from TAMS, s %d", alloc_heap->heap_number, size, status));

            uint64_t end_us = GetHighPrecisionTimeStamp ();

            if (status == a_state_retry_allocate)
            {
                // This records that we had to retry due to decommissioned heaps or GC in progress
                dprintf (5555, ("UOH h%d alloc %Id retry!", alloc_heap->heap_number, size));
            }
            else
            {
                if (alloced_on_retry)
                {
                    dprintf (5555, ("UOH h%d allocated %Id on retry (%I64dus)", alloc_heap->heap_number, size, (end_us - start_us)));
                }
            }
        }
#else
        status = try_allocate_more_space (acontext, size, flags, alloc_generation_number);
#endif //MULTIPLE_HEAPS
    }
    while (status == a_state_retry_allocate);

    return (status == a_state_can_allocate);
}

inline
CObjectHeader* gc_heap::allocate (size_t jsize, alloc_context* acontext, uint32_t flags)
{
    size_t size = Align (jsize);
    assert (size >= Align (min_obj_size));
    {
    retry:
        uint8_t*  result = acontext->alloc_ptr;
        acontext->alloc_ptr+=size;
        if (acontext->alloc_ptr <= acontext->alloc_limit)
        {
            CObjectHeader* obj = (CObjectHeader*)result;
            assert (obj != 0);
            return obj;
        }
        else
        {
            acontext->alloc_ptr -= size;

#ifdef _MSC_VER
#pragma inline_depth(0)
#endif //_MSC_VER

            if (! allocate_more_space (acontext, size, flags, 0))
                return 0;

#ifdef _MSC_VER
#pragma inline_depth(20)
#endif //_MSC_VER

            goto retry;
        }
    }
}

void  gc_heap::leave_allocation_segment (generation* gen)
{
    adjust_limit (0, 0, gen);
}

void gc_heap::init_free_and_plug()
{
#ifdef FREE_USAGE_STATS
    int i = (settings.concurrent ? max_generation : 0);

    for (; i <= settings.condemned_generation; i++)
    {
        generation* gen = generation_of (i);
#ifdef DOUBLY_LINKED_FL
        print_free_and_plug ("BGC");
#else
        memset (gen->gen_free_spaces, 0, sizeof (gen->gen_free_spaces));
#endif //DOUBLY_LINKED_FL
        memset (gen->gen_plugs, 0, sizeof (gen->gen_plugs));
        memset (gen->gen_current_pinned_free_spaces, 0, sizeof (gen->gen_current_pinned_free_spaces));
    }

    if (settings.condemned_generation != max_generation)
    {
        for (int i = (settings.condemned_generation + 1); i <= max_generation; i++)
        {
            generation* gen = generation_of (i);
            memset (gen->gen_plugs, 0, sizeof (gen->gen_plugs));
        }
    }
#endif //FREE_USAGE_STATS
}

void gc_heap::print_free_and_plug (const char* msg)
{
#ifdef FREE_USAGE_STATS
    int older_gen = ((settings.condemned_generation == max_generation) ? max_generation : (settings.condemned_generation + 1));
    for (int i = 0; i <= older_gen; i++)
    {
        generation* gen = generation_of (i);
        for (int j = 0; j < NUM_GEN_POWER2; j++)
        {
            if ((gen->gen_free_spaces[j] != 0) || (gen->gen_plugs[j] != 0))
            {
                dprintf (2, ("[%s][h%d][%s#%d]gen%d: 2^%d: F: %zd, P: %zd",
                    msg,
                    heap_number,
                    (settings.concurrent ? "BGC" : "GC"),
                    settings.gc_index,
                    i,
                    (j + 9), gen->gen_free_spaces[j], gen->gen_plugs[j]));
            }
        }
    }
#else
    UNREFERENCED_PARAMETER(msg);
#endif //FREE_USAGE_STATS
}

// replace with allocator::first_suitable_bucket
int gc_heap::find_bucket (size_t size)
{
    size_t sz = BASE_GEN_SIZE;
    int i = 0;

    for (; i < (NUM_GEN_POWER2 - 1); i++)
    {
        if (size < sz)
        {
            break;
        }
        sz = sz * 2;
    }

    return i;
}

void gc_heap::add_gen_plug (int gen_number, size_t plug_size)
{
#ifdef FREE_USAGE_STATS
    dprintf (3, ("adding plug size %zd to gen%d", plug_size, gen_number));
    generation* gen = generation_of (gen_number);
    size_t sz = BASE_GEN_SIZE;
    int i = find_bucket (plug_size);

    (gen->gen_plugs[i])++;
#else
    UNREFERENCED_PARAMETER(gen_number);
    UNREFERENCED_PARAMETER(plug_size);
#endif //FREE_USAGE_STATS
}

void gc_heap::add_item_to_current_pinned_free (int gen_number, size_t free_size)
{
#ifdef FREE_USAGE_STATS
    generation* gen = generation_of (gen_number);
    size_t sz = BASE_GEN_SIZE;
    int i = find_bucket (free_size);

    (gen->gen_current_pinned_free_spaces[i])++;
    generation_pinned_free_obj_space (gen) += free_size;
    dprintf (3, ("left pin free %zd(2^%d) to gen%d, total %zd bytes (%zd)",
        free_size, (i + 10), gen_number,
        generation_pinned_free_obj_space (gen),
        gen->gen_current_pinned_free_spaces[i]));
#else
    UNREFERENCED_PARAMETER(gen_number);
    UNREFERENCED_PARAMETER(free_size);
#endif //FREE_USAGE_STATS
}

// This is only for items large enough to be on the FL
// Ideally we should keep track of smaller ones too but for now
// it's easier to make the accounting right
void gc_heap::add_gen_free (int gen_number, size_t free_size)
{
#ifdef FREE_USAGE_STATS
    dprintf (3, ("adding free size %zd to gen%d", free_size, gen_number));
    if (free_size < min_free_list)
        return;

    generation* gen = generation_of (gen_number);
    size_t sz = BASE_GEN_SIZE;
    int i = find_bucket (free_size);

    (gen->gen_free_spaces[i])++;
    if (gen_number == max_generation)
    {
        dprintf (3, ("Mb b%d: f+ %zd (%zd)",
            i, free_size, gen->gen_free_spaces[i]));
    }
#else
    UNREFERENCED_PARAMETER(gen_number);
    UNREFERENCED_PARAMETER(free_size);
#endif //FREE_USAGE_STATS
}

void gc_heap::remove_gen_free (int gen_number, size_t free_size)
{
#ifdef FREE_USAGE_STATS
    dprintf (3, ("removing free %zd from gen%d", free_size, gen_number));
    if (free_size < min_free_list)
        return;

    generation* gen = generation_of (gen_number);
    size_t sz = BASE_GEN_SIZE;
    int i = find_bucket (free_size);

    (gen->gen_free_spaces[i])--;
    if (gen_number == max_generation)
    {
        dprintf (3, ("Mb b%d: f- %zd (%zd)",
            i, free_size, gen->gen_free_spaces[i]));
    }
#else
    UNREFERENCED_PARAMETER(gen_number);
    UNREFERENCED_PARAMETER(free_size);
#endif //FREE_USAGE_STATS
}

#ifdef DOUBLY_LINKED_FL
// This is only called on free spaces.
BOOL gc_heap::should_set_bgc_mark_bit (uint8_t* o)
{
    if (!current_sweep_seg)
    {
        assert (current_bgc_state == bgc_not_in_process);
        return FALSE;
    }

    // This is cheaper so I am doing this comparison first before having to get the seg for o.
    if (in_range_for_segment (o, current_sweep_seg))
    {
        // The current sweep seg could have free spaces beyond its background_allocated so we need
        // to check for that.
        if ((o >= current_sweep_pos) && (o < heap_segment_background_allocated (current_sweep_seg)))
        {
#ifndef USE_REGIONS
            if (current_sweep_seg == saved_sweep_ephemeral_seg)
            {
                return (o < saved_sweep_ephemeral_start);
            }
            else
#endif //!USE_REGIONS
            {
                return TRUE;
            }
        }
        else
            return FALSE;
    }
    else
    {
        // We can have segments outside the BGC range that were allocated during mark - and we
        // wouldn't have committed the mark array for them and their background_allocated would be
        // non-zero. Don't set mark bits for those.
        // The ones allocated during BGC sweep would have their background_allocated as 0.
        if ((o >= background_saved_lowest_address) && (o < background_saved_highest_address))
        {
            heap_segment* seg = seg_mapping_table_segment_of (o);
            // if bgc_allocated is 0 it means it was allocated during bgc sweep,
            // and everything on it should be considered live.
            uint8_t* background_allocated = heap_segment_background_allocated (seg);
            if (background_allocated == 0)
                return FALSE;
            // During BGC sweep gen1 GCs could add some free spaces in gen2.
            // If we use those, we should not set the mark bits on them.
            // They could either be a newly allocated seg which is covered by the
            // above case; or they are on a seg that's seen but beyond what BGC mark
            // saw.
            else if (o >= background_allocated)
                return FALSE;
            else
                return (!heap_segment_swept_p (seg));
        }
        else
            return FALSE;
    }
}
#endif //DOUBLY_LINKED_FL

uint8_t* gc_heap::allocate_in_older_generation (generation* gen, size_t size,
                                                int from_gen_number,
                                                uint8_t* old_loc REQD_ALIGN_AND_OFFSET_DCL)
{
    size = Align (size);
    assert (size >= Align (min_obj_size));
    assert (from_gen_number < max_generation);
    assert (from_gen_number >= 0);
    assert (generation_of (from_gen_number + 1) == gen);

#ifdef DOUBLY_LINKED_FL
    BOOL consider_bgc_mark_p        = FALSE;
    BOOL check_current_sweep_p      = FALSE;
    BOOL check_saved_sweep_p        = FALSE;
    BOOL try_added_list_p       = (gen->gen_num == max_generation);
    BOOL record_free_list_allocated_p = ((gen->gen_num == max_generation) &&
                                         (current_c_gc_state == c_gc_state_planning));
#endif //DOUBLY_LINKED_FL

    allocator* gen_allocator = generation_allocator (gen);
    BOOL discard_p = gen_allocator->discard_if_no_fit_p ();
#ifdef SHORT_PLUGS
    int pad_in_front = ((old_loc != 0) && ((from_gen_number+1) != max_generation)) ? USE_PADDING_FRONT : 0;
#else //SHORT_PLUGS
    int pad_in_front = 0;
#endif //SHORT_PLUGS

    size_t real_size = size + Align (min_obj_size);
    if (pad_in_front)
        real_size += Align (min_obj_size);

#ifdef RESPECT_LARGE_ALIGNMENT
    real_size += switch_alignment_size (pad_in_front);
#endif //RESPECT_LARGE_ALIGNMENT

    if (! (size_fit_p (size REQD_ALIGN_AND_OFFSET_ARG, generation_allocation_pointer (gen),
                       generation_allocation_limit (gen), old_loc, USE_PADDING_TAIL | pad_in_front)))
    {
        for (unsigned int a_l_idx = gen_allocator->first_suitable_bucket(real_size * 2);
             a_l_idx < gen_allocator->number_of_buckets(); a_l_idx++)
        {
            uint8_t* free_list = 0;
            uint8_t* prev_free_item = 0;

            BOOL use_undo_p = !discard_p;

#ifdef DOUBLY_LINKED_FL
            if (a_l_idx == 0)
            {
                use_undo_p = FALSE;
            }

            if (try_added_list_p)
            {
                free_list = gen_allocator->added_alloc_list_head_of (a_l_idx);
                while (free_list != 0)
                {
                    dprintf (3, ("considering free list in added list%zx", (size_t)free_list));

                    size_t free_list_size = unused_array_size (free_list);

                    if (size_fit_p (size REQD_ALIGN_AND_OFFSET_ARG, free_list, (free_list + free_list_size),
                                    old_loc, USE_PADDING_TAIL | pad_in_front))
                    {
                        dprintf (4, ("F:%zx-%zd",
                                    (size_t)free_list, free_list_size));

                        gen_allocator->unlink_item_no_undo_added (a_l_idx, free_list, prev_free_item);
                        generation_free_list_space (gen) -= free_list_size;
                        assert ((ptrdiff_t)generation_free_list_space (gen) >= 0);

                        remove_gen_free (gen->gen_num, free_list_size);

                        if (record_free_list_allocated_p)
                        {
                            generation_set_bgc_mark_bit_p (gen) = should_set_bgc_mark_bit (free_list);
                            dprintf (3333, ("SFA: %p->%p(%d)", free_list, (free_list + free_list_size),
                                (generation_set_bgc_mark_bit_p (gen) ? 1 : 0)));
                        }
                        adjust_limit (free_list, free_list_size, gen);
                        generation_allocate_end_seg_p (gen) = FALSE;

                        goto finished;
                    }
                    // We do first fit on bucket 0 because we are not guaranteed to find a fit there.
                    else if (a_l_idx == 0)
                    {
                        dprintf (3, ("couldn't use this free area, discarding"));
                        generation_free_obj_space (gen) += free_list_size;

                        gen_allocator->unlink_item_no_undo_added (a_l_idx, free_list, prev_free_item);
                        generation_free_list_space (gen) -= free_list_size;
                        assert ((ptrdiff_t)generation_free_list_space (gen) >= 0);

                        remove_gen_free (gen->gen_num, free_list_size);
                    }
                    else
                    {
                        prev_free_item = free_list;
                    }
                    free_list = free_list_slot (free_list);
                }
            }
#endif //DOUBLY_LINKED_FL

            free_list = gen_allocator->alloc_list_head_of (a_l_idx);
            prev_free_item = 0;

            while (free_list != 0)
            {
                dprintf (3, ("considering free list %zx", (size_t)free_list));

                size_t free_list_size = unused_array_size (free_list);

                if (size_fit_p (size REQD_ALIGN_AND_OFFSET_ARG, free_list, (free_list + free_list_size),
                                old_loc, USE_PADDING_TAIL | pad_in_front))
                {
                    dprintf (4, ("F:%zx-%zd",
                                    (size_t)free_list, free_list_size));

                    gen_allocator->unlink_item (a_l_idx, free_list, prev_free_item, use_undo_p);
                    generation_free_list_space (gen) -= free_list_size;
                    assert ((ptrdiff_t)generation_free_list_space (gen) >= 0);
                    remove_gen_free (gen->gen_num, free_list_size);

#ifdef DOUBLY_LINKED_FL
                    if (!discard_p && !use_undo_p)
                    {
                        gen2_removed_no_undo += free_list_size;
                        dprintf (3, ("h%d: remove with no undo %zd = %zd",
                            heap_number, free_list_size, gen2_removed_no_undo));
                    }

                    if (record_free_list_allocated_p)
                    {
                        generation_set_bgc_mark_bit_p (gen) = should_set_bgc_mark_bit (free_list);
                        dprintf (3333, ("SF: %p(%d)", free_list, (generation_set_bgc_mark_bit_p (gen) ? 1 : 0)));
                    }
#endif //DOUBLY_LINKED_FL

                    adjust_limit (free_list, free_list_size, gen);
                    generation_allocate_end_seg_p (gen) = FALSE;
                    goto finished;
                }
                // We do first fit on bucket 0 because we are not guaranteed to find a fit there.
                else if (discard_p || (a_l_idx == 0))
                {
                    dprintf (3, ("couldn't use this free area, discarding"));
                    generation_free_obj_space (gen) += free_list_size;

                    gen_allocator->unlink_item (a_l_idx, free_list, prev_free_item, FALSE);
                    generation_free_list_space (gen) -= free_list_size;
                    assert ((ptrdiff_t)generation_free_list_space (gen) >= 0);
                    remove_gen_free (gen->gen_num, free_list_size);

#ifdef DOUBLY_LINKED_FL
                    if (!discard_p)
                    {
                        gen2_removed_no_undo += free_list_size;
                        dprintf (3, ("h%d: b0 remove with no undo %zd = %zd",
                            heap_number, free_list_size, gen2_removed_no_undo));
                    }
#endif //DOUBLY_LINKED_FL
                }
                else
                {
                    prev_free_item = free_list;
                }
                free_list = free_list_slot (free_list);
            }
        }
#ifdef USE_REGIONS
        // We don't want to always go back to the first region since there might be many.
        heap_segment* seg = generation_allocation_segment (gen);
        dprintf (3, ("end of seg, starting from alloc seg %p", heap_segment_mem (seg)));
        assert (seg != ephemeral_heap_segment);
        while (true)
#else
        //go back to the beginning of the segment list
        heap_segment* seg = heap_segment_rw (generation_start_segment (gen));
        if (seg != generation_allocation_segment (gen))
        {
            leave_allocation_segment (gen);
            generation_allocation_segment (gen) = seg;
        }
        while (seg != ephemeral_heap_segment)
#endif //USE_REGIONS
        {
            if (size_fit_p(size REQD_ALIGN_AND_OFFSET_ARG, heap_segment_plan_allocated (seg),
                           heap_segment_committed (seg), old_loc, USE_PADDING_TAIL | pad_in_front))
            {
                adjust_limit (heap_segment_plan_allocated (seg),
                              (heap_segment_committed (seg) - heap_segment_plan_allocated (seg)),
                              gen);
                generation_allocate_end_seg_p (gen) = TRUE;
                heap_segment_plan_allocated (seg) =
                    heap_segment_committed (seg);
                dprintf (3, ("seg %p is used for end of seg alloc", heap_segment_mem (seg)));
                goto finished;
            }
            else
            {
                if (size_fit_p (size REQD_ALIGN_AND_OFFSET_ARG, heap_segment_plan_allocated (seg),
                                heap_segment_reserved (seg), old_loc, USE_PADDING_TAIL | pad_in_front) &&
                    grow_heap_segment (seg, heap_segment_plan_allocated (seg), old_loc, size, pad_in_front REQD_ALIGN_AND_OFFSET_ARG))
                {
                    adjust_limit (heap_segment_plan_allocated (seg),
                                  (heap_segment_committed (seg) - heap_segment_plan_allocated (seg)),
                                  gen);
                    generation_allocate_end_seg_p (gen) = TRUE;
                    heap_segment_plan_allocated (seg) =
                        heap_segment_committed (seg);
                    dprintf (3, ("seg %p is used for end of seg alloc after grow, %p",
                        heap_segment_mem (seg), heap_segment_committed (seg)));

                    goto finished;
                }
                else
                {
                    leave_allocation_segment (gen);
                    heap_segment*   next_seg = heap_segment_next_rw (seg);

#ifdef USE_REGIONS
                    assert (next_seg != ephemeral_heap_segment);
#endif //USE_REGIONS

                    if (next_seg)
                    {
                        generation_allocation_segment (gen) = next_seg;
                        generation_allocation_pointer (gen) = heap_segment_mem (next_seg);
                        generation_allocation_limit (gen) = generation_allocation_pointer (gen);
                        dprintf (3, ("alloc region advanced to %p", heap_segment_mem (next_seg)));
                    }
                    else
                    {
                        size = 0;
                        goto finished;
                    }
                }
            }
            seg = generation_allocation_segment (gen);
        }
        //No need to fix the last region. Will be done later
        size = 0;
        goto finished;
    }

finished:
    if (0 == size)
    {
        return 0;
    }
    else
    {
        uint8_t*  result = generation_allocation_pointer (gen);
        size_t pad = 0;

#ifdef SHORT_PLUGS
        if ((pad_in_front & USE_PADDING_FRONT) &&
            (((generation_allocation_pointer (gen) - generation_allocation_context_start_region (gen))==0) ||
             ((generation_allocation_pointer (gen) - generation_allocation_context_start_region (gen))>=DESIRED_PLUG_LENGTH)))
        {
            pad = Align (min_obj_size);
            set_plug_padded (old_loc);
        }
#endif //SHORT_PLUGS

#ifdef FEATURE_STRUCTALIGN
        _ASSERTE(!old_loc || alignmentOffset != 0);
        _ASSERTE(old_loc || requiredAlignment == DATA_ALIGNMENT);
        if (old_loc != 0)
        {
            size_t pad1 = ComputeStructAlignPad(result+pad, requiredAlignment, alignmentOffset);
            set_node_aligninfo (old_loc, requiredAlignment, pad1);
            pad += pad1;
        }
#else // FEATURE_STRUCTALIGN
        if (!((old_loc == 0) || same_large_alignment_p (old_loc, result+pad)))
        {
            pad += switch_alignment_size (pad != 0);
            set_node_realigned (old_loc);
            dprintf (3, ("Allocation realignment old_loc: %zx, new_loc:%zx",
                         (size_t)old_loc, (size_t)(result+pad)));
            assert (same_large_alignment_p (result + pad, old_loc));
        }
#endif // FEATURE_STRUCTALIGN
        dprintf (3, ("Allocate %zd bytes", size));

        if ((old_loc == 0) || (pad != 0))
        {
            //allocating a non plug or a gap, so reset the start region
            generation_allocation_context_start_region (gen) = generation_allocation_pointer (gen);
        }

        generation_allocation_pointer (gen) += size + pad;
        assert (generation_allocation_pointer (gen) <= generation_allocation_limit (gen));

        generation_free_obj_space (gen) += pad;

        if (generation_allocate_end_seg_p (gen))
        {
            generation_end_seg_allocated (gen) += size;
        }
        else
        {
#ifdef DOUBLY_LINKED_FL
            if (generation_set_bgc_mark_bit_p (gen))
            {
                dprintf (2, ("IOM: %p(->%p(%zd) (%zx-%zx)", old_loc, result, pad,
                        (size_t)(&mark_array [mark_word_of (result)]),
                        (size_t)(mark_array [mark_word_of (result)])));

                set_plug_bgc_mark_bit (old_loc);
            }

            generation_last_free_list_allocated (gen) = old_loc;
#endif //DOUBLY_LINKED_FL

            generation_free_list_allocated (gen) += size;
        }
        generation_allocation_size (gen) += size;

        dprintf (3, ("aio: ptr: %p, limit: %p, sr: %p",
            generation_allocation_pointer (gen), generation_allocation_limit (gen),
            generation_allocation_context_start_region (gen)));

        return (result + pad);
    }
}

#ifndef USE_REGIONS
void gc_heap::repair_allocation_in_expanded_heap (generation* consing_gen)
{
    //make sure that every generation has a planned allocation start
    int  gen_number = max_generation - 1;
    while (gen_number>= 0)
    {
        generation* gen = generation_of (gen_number);
        if (0 == generation_plan_allocation_start (gen))
        {
            realloc_plan_generation_start (gen, consing_gen);

            assert (generation_plan_allocation_start (gen));
        }
        gen_number--;
    }

    // now we know the planned allocation size
    size_t  size = (generation_allocation_limit (consing_gen) - generation_allocation_pointer (consing_gen));
    heap_segment* seg = generation_allocation_segment (consing_gen);
    if (generation_allocation_limit (consing_gen) == heap_segment_plan_allocated (seg))
    {
        if (size != 0)
        {
            heap_segment_plan_allocated (seg) = generation_allocation_pointer (consing_gen);
        }
    }
    else
    {
        assert (settings.condemned_generation == max_generation);
        uint8_t* first_address = generation_allocation_limit (consing_gen);
        //look through the pinned plugs for relevant ones.
        //Look for the right pinned plug to start from.
        size_t mi = 0;
        mark* m = 0;
        while (mi != mark_stack_tos)
        {
            m = pinned_plug_of (mi);
            if ((pinned_plug (m) == first_address))
                break;
            else
                mi++;
        }
        assert (mi != mark_stack_tos);
        pinned_len (m) = size;
    }
}

//tododefrag optimize for new segment (plan_allocated == mem)
uint8_t* gc_heap::allocate_in_expanded_heap (generation* gen,
                                          size_t size,
                                          BOOL& adjacentp,
                                          uint8_t* old_loc,
#ifdef SHORT_PLUGS
                                          BOOL set_padding_on_saved_p,
                                          mark* pinned_plug_entry,
#endif //SHORT_PLUGS
                                          BOOL consider_bestfit,
                                          int active_new_gen_number
                                          REQD_ALIGN_AND_OFFSET_DCL)
{
    dprintf (3, ("aie: P: %p, size: %zx", old_loc, size));

    size = Align (size);
    assert (size >= Align (min_obj_size));
#ifdef SHORT_PLUGS
    int pad_in_front = ((old_loc != 0) && (active_new_gen_number != max_generation)) ? USE_PADDING_FRONT : 0;
#else //SHORT_PLUGS
    int pad_in_front = 0;
#endif //SHORT_PLUGS

    if (consider_bestfit && use_bestfit)
    {
        assert (bestfit_seg);
        dprintf (SEG_REUSE_LOG_1, ("reallocating 0x%p in expanded heap, size: %zd",
                    old_loc, size));
        return bestfit_seg->fit (old_loc,
                                 size REQD_ALIGN_AND_OFFSET_ARG);
    }

    heap_segment* seg = generation_allocation_segment (gen);

    if (! (size_fit_p (size REQD_ALIGN_AND_OFFSET_ARG, generation_allocation_pointer (gen),
                       generation_allocation_limit (gen), old_loc,
                       ((generation_allocation_limit (gen) !=
                          heap_segment_plan_allocated (seg))? USE_PADDING_TAIL : 0) | pad_in_front)))
    {
        dprintf (3, ("aie: can't fit: ptr: %p, limit: %p", generation_allocation_pointer (gen),
            generation_allocation_limit (gen)));

        adjacentp = FALSE;
        uint8_t* first_address = (generation_allocation_limit (gen) ?
                               generation_allocation_limit (gen) :
                               heap_segment_mem (seg));
        assert (in_range_for_segment (first_address, seg));

        uint8_t* end_address   = heap_segment_reserved (seg);

        dprintf (3, ("aie: first_addr: %p, gen alloc limit: %p, end_address: %p",
            first_address, generation_allocation_limit (gen), end_address));

        size_t mi = 0;
        mark* m = 0;

        if (heap_segment_allocated (seg) != heap_segment_mem (seg))
        {
            assert (settings.condemned_generation == max_generation);
            //look through the pinned plugs for relevant ones.
            //Look for the right pinned plug to start from.
            while (mi != mark_stack_tos)
            {
                m = pinned_plug_of (mi);
                if ((pinned_plug (m) >= first_address) && (pinned_plug (m) < end_address))
                {
                    dprintf (3, ("aie: found pin: %p", pinned_plug (m)));
                    break;
                }
                else
                    mi++;
            }
            if (mi != mark_stack_tos)
            {
                //fix old free list.
                size_t  hsize = (generation_allocation_limit (gen) - generation_allocation_pointer (gen));
                {
                    dprintf(3,("gc filling up hole"));
                    ptrdiff_t mi1 = (ptrdiff_t)mi;
                    while ((mi1 >= 0) &&
                           (pinned_plug (pinned_plug_of(mi1)) != generation_allocation_limit (gen)))
                    {
                        dprintf (3, ("aie: checking pin %p", pinned_plug (pinned_plug_of(mi1))));
                        mi1--;
                    }
                    if (mi1 >= 0)
                    {
                        size_t saved_pinned_len = pinned_len (pinned_plug_of(mi1));
                        pinned_len (pinned_plug_of(mi1)) = hsize;
                        dprintf (3, ("changing %p len %zx->%zx",
                            pinned_plug (pinned_plug_of(mi1)),
                            saved_pinned_len, pinned_len (pinned_plug_of(mi1))));
                    }
                }
            }
        }
        else
        {
            assert (generation_allocation_limit (gen) ==
                    generation_allocation_pointer (gen));
            mi = mark_stack_tos;
        }

        while ((mi != mark_stack_tos) && in_range_for_segment (pinned_plug (m), seg))
        {
            size_t len = pinned_len (m);
            uint8_t*  free_list = (pinned_plug (m) - len);
            dprintf (3, ("aie: testing free item: %p->%p(%zx)",
                free_list, (free_list + len), len));
            if (size_fit_p (size REQD_ALIGN_AND_OFFSET_ARG, free_list, (free_list + len), old_loc, USE_PADDING_TAIL | pad_in_front))
            {
                dprintf (3, ("aie: Found adequate unused area: %zx, size: %zd",
                            (size_t)free_list, len));
                {
                    generation_allocation_pointer (gen) = free_list;
                    generation_allocation_context_start_region (gen) = generation_allocation_pointer (gen);
                    generation_allocation_limit (gen) = (free_list + len);
                }
                goto allocate_in_free;
            }
            mi++;
            m = pinned_plug_of (mi);
        }

        //switch to the end of the segment.
        generation_allocation_pointer (gen) = heap_segment_plan_allocated (seg);
        generation_allocation_context_start_region (gen) = generation_allocation_pointer (gen);
        heap_segment_plan_allocated (seg) = heap_segment_committed (seg);
        generation_allocation_limit (gen) = heap_segment_plan_allocated (seg);
        dprintf (3, ("aie: switching to end of seg: %p->%p(%zx)",
            generation_allocation_pointer (gen), generation_allocation_limit (gen),
            (generation_allocation_limit (gen) - generation_allocation_pointer (gen))));

        if (!size_fit_p (size REQD_ALIGN_AND_OFFSET_ARG, generation_allocation_pointer (gen),
                         generation_allocation_limit (gen), old_loc, USE_PADDING_TAIL | pad_in_front))
        {
            dprintf (3, ("aie: ptr: %p, limit: %p, can't alloc", generation_allocation_pointer (gen),
                generation_allocation_limit (gen)));
            assert (!"Can't allocate if no free space");
            return 0;
        }
    }
    else
    {
        adjacentp = TRUE;
    }

allocate_in_free:
    {
        uint8_t*  result = generation_allocation_pointer (gen);
        size_t pad = 0;

#ifdef SHORT_PLUGS
        if ((pad_in_front & USE_PADDING_FRONT) &&
            (((generation_allocation_pointer (gen) - generation_allocation_context_start_region (gen))==0) ||
             ((generation_allocation_pointer (gen) - generation_allocation_context_start_region (gen))>=DESIRED_PLUG_LENGTH)))

        {
            pad = Align (min_obj_size);
            set_padding_in_expand (old_loc, set_padding_on_saved_p, pinned_plug_entry);
        }
#endif //SHORT_PLUGS

#ifdef FEATURE_STRUCTALIGN
        _ASSERTE(!old_loc || alignmentOffset != 0);
        _ASSERTE(old_loc || requiredAlignment == DATA_ALIGNMENT);
        if (old_loc != 0)
        {
            size_t pad1 = ComputeStructAlignPad(result+pad, requiredAlignment, alignmentOffset);
            set_node_aligninfo (old_loc, requiredAlignment, pad1);
            pad += pad1;
            adjacentp = FALSE;
        }
#else // FEATURE_STRUCTALIGN
        if (!((old_loc == 0) || same_large_alignment_p (old_loc, result+pad)))
        {
            pad += switch_alignment_size (pad != 0);
            set_node_realigned (old_loc);
            dprintf (3, ("Allocation realignment old_loc: %zx, new_loc:%zx",
                         (size_t)old_loc, (size_t)(result+pad)));
            assert (same_large_alignment_p (result + pad, old_loc));
            adjacentp = FALSE;
        }
#endif // FEATURE_STRUCTALIGN

        if ((old_loc == 0) || (pad != 0))
        {
            //allocating a non plug or a gap, so reset the start region
            generation_allocation_context_start_region (gen) = generation_allocation_pointer (gen);
        }

        generation_allocation_pointer (gen) += size + pad;
        assert (generation_allocation_pointer (gen) <= generation_allocation_limit (gen));
        dprintf (3, ("Allocated in expanded heap %zx:%zd", (size_t)(result+pad), size));

        dprintf (3, ("aie: ptr: %p, limit: %p, sr: %p",
            generation_allocation_pointer (gen), generation_allocation_limit (gen),
            generation_allocation_context_start_region (gen)));

        return result + pad;
    }
}

generation*  gc_heap::ensure_ephemeral_heap_segment (generation* consing_gen)
{
    heap_segment* seg = generation_allocation_segment (consing_gen);
    if (seg != ephemeral_heap_segment)
    {
        assert (generation_allocation_pointer (consing_gen)>= heap_segment_mem (seg));
        assert (generation_allocation_pointer (consing_gen)<= heap_segment_committed (seg));

        //fix the allocated size of the segment.
        heap_segment_plan_allocated (seg) = generation_allocation_pointer (consing_gen);

        generation* new_consing_gen = generation_of (max_generation - 1);
        generation_allocation_pointer (new_consing_gen) =
                heap_segment_mem (ephemeral_heap_segment);
        generation_allocation_limit (new_consing_gen) =
            generation_allocation_pointer (new_consing_gen);
        generation_allocation_context_start_region (new_consing_gen) =
            generation_allocation_pointer (new_consing_gen);
        generation_allocation_segment (new_consing_gen) = ephemeral_heap_segment;

        return new_consing_gen;
    }
    else
        return consing_gen;
}
#endif //!USE_REGIONS

inline
void gc_heap::init_alloc_info (generation* gen, heap_segment* seg)
{
    generation_allocation_segment (gen) = seg;
    generation_allocation_pointer (gen) = heap_segment_mem (seg);
    generation_allocation_limit (gen) = generation_allocation_pointer (gen);
    generation_allocation_context_start_region (gen) = generation_allocation_pointer (gen);
}

inline
heap_segment* gc_heap::get_next_alloc_seg (generation* gen)
{
#ifdef USE_REGIONS
    heap_segment* saved_region = generation_allocation_segment (gen);
    int gen_num = heap_segment_gen_num (saved_region);

    heap_segment* region = saved_region;

    while (1)
    {
        region = heap_segment_non_sip (region);

        if (region)
        {
            break;
        }
        else
        {
            if (gen_num > 0)
            {
                gen_num--;
                region = generation_start_segment (generation_of (gen_num));
                dprintf (REGIONS_LOG, ("h%d next alloc region: switching to next gen%d start %zx(%p)",
                    heap_number, heap_segment_gen_num (region), (size_t)region,
                    heap_segment_mem (region)));
            }
            else
            {
                assert (!"ran out regions when getting the next alloc seg!");
            }
        }
    }

    if (region != saved_region)
    {
        dprintf (REGIONS_LOG, ("init allocate region for gen%d to %p(%d)",
            gen->gen_num, heap_segment_mem (region), heap_segment_gen_num (region)));
        init_alloc_info (gen, region);
    }

    return region;
#else
    return generation_allocation_segment (gen);
#endif //USE_REGIONS
}

bool gc_heap::decide_on_gen1_pin_promotion (float pin_frag_ratio, float pin_surv_ratio)
{
    return ((pin_frag_ratio > 0.15) && (pin_surv_ratio > 0.30));
}

// Add the size of the pinned plug to the higher generation's pinned allocations.
void gc_heap::attribute_pin_higher_gen_alloc (
#ifdef USE_REGIONS
                                              heap_segment* seg, int to_gen_number,
#endif
                                              uint8_t* plug, size_t len)
{
    //find out which gen this pinned plug came from
    int frgn = object_gennum (plug);
    if ((frgn != (int)max_generation) && settings.promotion)
    {
        generation_pinned_allocation_sweep_size (generation_of (frgn + 1)) += len;

#ifdef USE_REGIONS
        // With regions it's a bit more complicated since we only set the plan_gen_num
        // of a region after we've planned it. This means if the pinning plug is in the
        // the same seg we are planning, we haven't set its plan_gen_num yet. So we
        // need to check for that first.
        int togn = (in_range_for_segment (plug, seg) ? to_gen_number : object_gennum_plan (plug));
#else
        int togn = object_gennum_plan (plug);
#endif //USE_REGIONS
        if (frgn < togn)
        {
            generation_pinned_allocation_compact_size (generation_of (togn)) += len;
        }
    }
}

#ifdef USE_REGIONS
void gc_heap::attribute_pin_higher_gen_alloc (int frgn, int togn, size_t len)
{
    if ((frgn != (int)max_generation) && settings.promotion)
    {
        generation_pinned_allocation_sweep_size (generation_of (frgn + 1)) += len;

        if (frgn < togn)
        {
            generation_pinned_allocation_compact_size (generation_of (togn)) += len;
        }
    }
}
#endif //USE_REGIONS

uint8_t* gc_heap::allocate_in_condemned_generations (generation* gen,
                                                  size_t size,
                                                  int from_gen_number,
#ifdef SHORT_PLUGS
                                                  BOOL* convert_to_pinned_p,
                                                  uint8_t* next_pinned_plug,
                                                  heap_segment* current_seg,
#endif //SHORT_PLUGS
                                                  uint8_t* old_loc
                                                  REQD_ALIGN_AND_OFFSET_DCL)
{
#ifndef USE_REGIONS
    // Make sure that the youngest generation gap hasn't been allocated
    if (settings.promotion)
    {
        assert (generation_plan_allocation_start (youngest_generation) == 0);
    }
#endif //!USE_REGIONS

    size = Align (size);
    assert (size >= Align (min_obj_size));
    int to_gen_number = from_gen_number;
    if (from_gen_number != (int)max_generation)
    {
        to_gen_number = from_gen_number + (settings.promotion ? 1 : 0);
    }

    dprintf (3, ("aic gen%d: s: %zd, ac: %p-%p", gen->gen_num, size,
            generation_allocation_pointer (gen), generation_allocation_limit (gen)));

#ifdef SHORT_PLUGS
    int pad_in_front = ((old_loc != 0) && (to_gen_number != max_generation)) ? USE_PADDING_FRONT : 0;
#else //SHORT_PLUGS
    int pad_in_front = 0;
#endif //SHORT_PLUGS

    if ((from_gen_number != -1) && (from_gen_number != (int)max_generation) && settings.promotion)
    {
        generation_condemned_allocated (generation_of (from_gen_number + (settings.promotion ? 1 : 0))) += size;
        generation_allocation_size (generation_of (from_gen_number + (settings.promotion ? 1 : 0))) += size;
    }
retry:
    {
        heap_segment* seg = get_next_alloc_seg (gen);
        if (! (size_fit_p (size REQD_ALIGN_AND_OFFSET_ARG, generation_allocation_pointer (gen),
                           generation_allocation_limit (gen), old_loc,
                           ((generation_allocation_limit (gen) != heap_segment_plan_allocated (seg))?USE_PADDING_TAIL:0)|pad_in_front)))
        {
            if ((! (pinned_plug_que_empty_p()) &&
                 (generation_allocation_limit (gen) ==
                  pinned_plug (oldest_pin()))))
            {
                size_t entry = deque_pinned_plug();
                mark* pinned_plug_entry = pinned_plug_of (entry);
                size_t len = pinned_len (pinned_plug_entry);
                uint8_t* plug = pinned_plug (pinned_plug_entry);
                set_new_pin_info (pinned_plug_entry, generation_allocation_pointer (gen));

#ifdef USE_REGIONS
                if (to_gen_number == 0)
                {
                    update_planned_gen0_free_space (pinned_len (pinned_plug_entry), plug);
                    dprintf (REGIONS_LOG, ("aic: not promotion, gen0 added free space %zd at %p",
                                    pinned_len (pinned_plug_entry), plug));
                }
#endif //USE_REGIONS

#ifdef FREE_USAGE_STATS
                generation_allocated_in_pinned_free (gen) += generation_allocated_since_last_pin (gen);
                dprintf (3, ("allocated %zd so far within pin %zx, total->%zd",
                    generation_allocated_since_last_pin (gen),
                    plug,
                    generation_allocated_in_pinned_free (gen)));
                generation_allocated_since_last_pin (gen) = 0;

                add_item_to_current_pinned_free (gen->gen_num, pinned_len (pinned_plug_of (entry)));
#endif //FREE_USAGE_STATS

                dprintf (3, ("mark stack bos: %zd, tos: %zd, aic: p %p len: %zx->%zx",
                    mark_stack_bos, mark_stack_tos, plug, len, pinned_len (pinned_plug_of (entry))));

                assert(mark_stack_array[entry].len == 0 ||
                       mark_stack_array[entry].len >= Align(min_obj_size));
                generation_allocation_pointer (gen) = plug + len;
                generation_allocation_context_start_region (gen) = generation_allocation_pointer (gen);
                generation_allocation_limit (gen) = heap_segment_plan_allocated (seg);
                set_allocator_next_pin (gen);
                attribute_pin_higher_gen_alloc (
#ifdef USE_REGIONS
                                                seg, to_gen_number,
#endif
                                                plug, len);

                goto retry;
            }

            if (generation_allocation_limit (gen) != heap_segment_plan_allocated (seg))
            {
                generation_allocation_limit (gen) = heap_segment_plan_allocated (seg);
                dprintf (3, ("changed limit to plan alloc: %p", generation_allocation_limit (gen)));
            }
            else
            {
                if (heap_segment_plan_allocated (seg) != heap_segment_committed (seg))
                {
                    heap_segment_plan_allocated (seg) = heap_segment_committed (seg);
                    generation_allocation_limit (gen) = heap_segment_plan_allocated (seg);
                    dprintf (3, ("changed limit to commit: %p", generation_allocation_limit (gen)));
                }
                else
                {
#if !defined(RESPECT_LARGE_ALIGNMENT) && !defined(USE_REGIONS)
                    assert (gen != youngest_generation);
#endif //!RESPECT_LARGE_ALIGNMENT && !USE_REGIONS

                    if (size_fit_p (size REQD_ALIGN_AND_OFFSET_ARG, generation_allocation_pointer (gen),
                                    heap_segment_reserved (seg), old_loc, USE_PADDING_TAIL | pad_in_front) &&
                        (grow_heap_segment (seg, generation_allocation_pointer (gen), old_loc,
                                            size, pad_in_front REQD_ALIGN_AND_OFFSET_ARG)))
                    {
                        dprintf (3, ("Expanded segment allocation by committing more memory"));
                        heap_segment_plan_allocated (seg) = heap_segment_committed (seg);
                        generation_allocation_limit (gen) = heap_segment_plan_allocated (seg);
                    }
                    else
                    {
                        heap_segment*   next_seg = heap_segment_next (seg);
                        dprintf (REGIONS_LOG, ("aic next: %p(%p,%p) -> %p(%p,%p)",
                            heap_segment_mem (seg), heap_segment_allocated (seg), heap_segment_plan_allocated (seg),
                            (next_seg ? heap_segment_mem (next_seg) : 0),
                            (next_seg ? heap_segment_allocated (next_seg) : 0),
                            (next_seg ? heap_segment_plan_allocated (next_seg) : 0)));
                        assert (generation_allocation_pointer (gen)>=
                                heap_segment_mem (seg));
                        // Verify that all pinned plugs for this segment are consumed
                        if (!pinned_plug_que_empty_p() &&
                            ((pinned_plug (oldest_pin()) < heap_segment_allocated (seg)) &&
                             (pinned_plug (oldest_pin()) >= generation_allocation_pointer (gen))))
                        {
                            LOG((LF_GC, LL_INFO10, "remaining pinned plug %zx while leaving segment on allocation",
                                         pinned_plug (oldest_pin())));
                            FATAL_GC_ERROR();
                        }
                        assert (generation_allocation_pointer (gen)>=
                                heap_segment_mem (seg));
                        assert (generation_allocation_pointer (gen)<=
                                heap_segment_committed (seg));
                        heap_segment_plan_allocated (seg) = generation_allocation_pointer (gen);

#ifdef USE_REGIONS
                        set_region_plan_gen_num (seg, to_gen_number);
                        if ((next_seg == 0) && (heap_segment_gen_num (seg) > 0))
                        {
                            // We need to switch to a younger gen's segments so the allocate seg will be in
                            // sync with the pins.
                            next_seg = generation_start_segment (generation_of (heap_segment_gen_num (seg) - 1));
                            dprintf (REGIONS_LOG, ("h%d aic: switching to next gen%d start %zx(%p)",
                                heap_number, heap_segment_gen_num (next_seg), (size_t)next_seg,
                                heap_segment_mem (next_seg)));
                        }
#endif //USE_REGIONS

                        if (next_seg)
                        {
                            init_alloc_info (gen, next_seg);
                        }
                        else
                        {
#ifdef USE_REGIONS
                            assert (!"should not happen for regions!");
#else
                            return 0; //should only happen during allocation of generation 0 gap
                            // in that case we are going to grow the heap anyway
#endif //USE_REGIONS
                        }
                    }
                }
            }
            set_allocator_next_pin (gen);

            goto retry;
        }
    }

    {
        assert (generation_allocation_pointer (gen)>=
                heap_segment_mem (generation_allocation_segment (gen)));
        uint8_t* result = generation_allocation_pointer (gen);
        size_t pad = 0;
#ifdef SHORT_PLUGS
        if ((pad_in_front & USE_PADDING_FRONT) &&
            (((generation_allocation_pointer (gen) - generation_allocation_context_start_region (gen))==0) ||
             ((generation_allocation_pointer (gen) - generation_allocation_context_start_region (gen))>=DESIRED_PLUG_LENGTH)))
        {
            ptrdiff_t dist = old_loc - result;
            if (dist == 0)
            {
                dprintf (3, ("old alloc: %p, same as new alloc, not padding", old_loc));
                pad = 0;
            }
            else
            {
                if ((dist > 0) && (dist < (ptrdiff_t)Align (min_obj_size)))
                {
                    dprintf (1, ("old alloc: %p, only %zd bytes > new alloc! Shouldn't happen", old_loc, dist));
                    FATAL_GC_ERROR();
                }

                pad = Align (min_obj_size);
                set_plug_padded (old_loc);
            }
        }
#endif //SHORT_PLUGS
#ifdef FEATURE_STRUCTALIGN
        _ASSERTE(!old_loc || alignmentOffset != 0);
        _ASSERTE(old_loc || requiredAlignment == DATA_ALIGNMENT);
        if ((old_loc != 0))
        {
            size_t pad1 = ComputeStructAlignPad(result+pad, requiredAlignment, alignmentOffset);
            set_node_aligninfo (old_loc, requiredAlignment, pad1);
            pad += pad1;
        }
#else // FEATURE_STRUCTALIGN
        if (!((old_loc == 0) || same_large_alignment_p (old_loc, result+pad)))
        {
            pad += switch_alignment_size (pad != 0);
            set_node_realigned(old_loc);
            dprintf (3, ("Allocation realignment old_loc: %zx, new_loc:%zx",
                         (size_t)old_loc, (size_t)(result+pad)));
            assert (same_large_alignment_p (result + pad, old_loc));
        }
#endif // FEATURE_STRUCTALIGN

#ifdef SHORT_PLUGS
        if ((next_pinned_plug != 0) && (pad != 0) && (generation_allocation_segment (gen) == current_seg))
        {
            assert (old_loc != 0);
            ptrdiff_t dist_to_next_pin = (ptrdiff_t)(next_pinned_plug - (generation_allocation_pointer (gen) + size + pad));
            assert (dist_to_next_pin >= 0);

            if ((dist_to_next_pin >= 0) && (dist_to_next_pin < (ptrdiff_t)Align (min_obj_size)))
            {
                dprintf (3, ("%p->(%p,%p),%p(%zx)(%zx),NP->PP",
                    old_loc,
                    generation_allocation_pointer (gen),
                    generation_allocation_limit (gen),
                    next_pinned_plug,
                    size,
                    dist_to_next_pin));
                clear_plug_padded (old_loc);
                pad = 0;
                *convert_to_pinned_p = TRUE;
                record_interesting_data_point (idp_converted_pin);

                return 0;
            }
        }
#endif //SHORT_PLUGS

        if ((old_loc == 0) || (pad != 0))
        {
            //allocating a non plug or a gap, so reset the start region
            generation_allocation_context_start_region (gen) = generation_allocation_pointer (gen);
        }

        generation_allocation_pointer (gen) += size + pad;
        assert (generation_allocation_pointer (gen) <= generation_allocation_limit (gen));

        if ((pad > 0) && (to_gen_number >= 0))
        {
            generation_free_obj_space (generation_of (to_gen_number)) += pad;
        }

#ifdef FREE_USAGE_STATS
        generation_allocated_since_last_pin (gen) += size;
#endif //FREE_USAGE_STATS

        dprintf (3, ("aic: old: %p ptr: %p, limit: %p, sr: %p, res: %p, pad: %zd",
            old_loc,
            generation_allocation_pointer (gen), generation_allocation_limit (gen),
            generation_allocation_context_start_region (gen),
            result, (size_t)pad));

        assert (result + pad);
        return result + pad;
    }
}

int gc_heap::joined_generation_to_condemn (BOOL should_evaluate_elevation,
                                           int initial_gen,
                                           int current_gen,
                                           BOOL* blocking_collection_p
                                           STRESS_HEAP_ARG(int n_original))
{
    gc_data_global.gen_to_condemn_reasons.init();
#ifdef BGC_SERVO_TUNING
    if (settings.entry_memory_load == 0)
    {
        uint32_t current_memory_load = 0;
        uint64_t current_available_physical = 0;
        get_memory_info (&current_memory_load, &current_available_physical);

        settings.entry_memory_load = current_memory_load;
        settings.entry_available_physical_mem = current_available_physical;
    }
#endif //BGC_SERVO_TUNING

    int n = current_gen;
#ifdef MULTIPLE_HEAPS
    BOOL joined_last_gc_before_oom = FALSE;
    for (int i = 0; i < n_heaps; i++)
    {
        if (g_heaps[i]->last_gc_before_oom)
        {
            dprintf (GTC_LOG, ("h%d is setting blocking to TRUE", i));
            joined_last_gc_before_oom = TRUE;
            break;
        }
    }
#else
    BOOL joined_last_gc_before_oom = last_gc_before_oom;
#endif //MULTIPLE_HEAPS

    if (joined_last_gc_before_oom && settings.pause_mode != pause_low_latency)
    {
        assert (*blocking_collection_p);
    }

    if (should_evaluate_elevation && (n == max_generation))
    {
        dprintf (GTC_LOG, ("lock: %d(%d)",
            (settings.should_lock_elevation ? 1 : 0),
            settings.elevation_locked_count));

        if (settings.should_lock_elevation)
        {
            settings.elevation_locked_count++;
            if (settings.elevation_locked_count == 6)
            {
                settings.elevation_locked_count = 0;
            }
            else
            {
                n = max_generation - 1;
                gc_data_global.gen_to_condemn_reasons.set_condition(gen_joined_avoid_unproductive);
                settings.elevation_reduced = TRUE;
            }
        }
        else
        {
            settings.elevation_locked_count = 0;
        }
    }
    else
    {
        settings.should_lock_elevation = FALSE;
        settings.elevation_locked_count = 0;
    }

    if (provisional_mode_triggered && (n == max_generation))
    {
        // There are a few cases where we should not reduce the generation.
        if ((initial_gen == max_generation) || (settings.reason == reason_alloc_loh))
        {
            // If we are doing a full GC in the provisional mode, we always
            // make it blocking because we don't want to get into a situation
            // where foreground GCs are asking for a compacting full GC right away
            // and not getting it.
            dprintf (GTC_LOG, ("full GC induced, not reducing gen"));
            if (initial_gen == max_generation)
            {
                gc_data_global.gen_to_condemn_reasons.set_condition(gen_joined_pm_induced_fullgc_p);
            }
            else
            {
                gc_data_global.gen_to_condemn_reasons.set_condition(gen_joined_pm_alloc_loh);
            }
            *blocking_collection_p = TRUE;
        }
        else if (
#ifndef USE_REGIONS
                 should_expand_in_full_gc ||
#endif //!USE_REGIONS
                 joined_last_gc_before_oom)
        {
            dprintf (GTC_LOG, ("need full blocking GCs to expand heap or avoid OOM, not reducing gen"));
            assert (*blocking_collection_p);
        }
        else
        {
            dprintf (GTC_LOG, ("reducing gen in PM: %d->%d->%d", initial_gen, n, (max_generation - 1)));
            gc_data_global.gen_to_condemn_reasons.set_condition(gen_joined_gen1_in_pm);
            n = max_generation - 1;
        }
    }

#ifndef USE_REGIONS
    if (should_expand_in_full_gc)
    {
        should_expand_in_full_gc = FALSE;
    }
#endif //!USE_REGIONS

    if (heap_hard_limit)
    {
        // If we have already consumed 90% of the limit, we should check to see if we should compact LOH.
        // TODO: should unify this with gen2.
        dprintf (GTC_LOG, ("committed %zd is %d%% of limit %zd",
            current_total_committed, (int)((float)current_total_committed * 100.0 / (float)heap_hard_limit),
            heap_hard_limit));

        bool full_compact_gc_p = false;

        if (joined_last_gc_before_oom)
        {
            gc_data_global.gen_to_condemn_reasons.set_condition(gen_joined_limit_before_oom);
            full_compact_gc_p = true;
        }
        else if (((uint64_t)current_total_committed * (uint64_t)10) >= ((uint64_t)heap_hard_limit * (uint64_t)9))
        {
            size_t loh_frag = get_total_gen_fragmentation (loh_generation);

            // If the LOH frag is >= 1/8 it's worth compacting it
            if (loh_frag >= heap_hard_limit / 8)
            {
                dprintf (GTC_LOG, ("loh frag: %zd > 1/8 of limit %zd", loh_frag, (heap_hard_limit / 8)));
                gc_data_global.gen_to_condemn_reasons.set_condition(gen_joined_limit_loh_frag);
                full_compact_gc_p = true;
            }
            else
            {
                // If there's not much fragmentation but it looks like it'll be productive to
                // collect LOH, do that.
                size_t est_loh_reclaim = get_total_gen_estimated_reclaim (loh_generation);
                if (est_loh_reclaim >= heap_hard_limit / 8)
                {
                    gc_data_global.gen_to_condemn_reasons.set_condition(gen_joined_limit_loh_reclaim);
                    full_compact_gc_p = true;
                }
                dprintf (GTC_LOG, ("loh est reclaim: %zd, 1/8 of limit %zd", est_loh_reclaim, (heap_hard_limit / 8)));
            }
        }

        if (full_compact_gc_p)
        {
            n = max_generation;
            *blocking_collection_p = TRUE;
            settings.loh_compaction = TRUE;
            dprintf (GTC_LOG, ("compacting LOH due to hard limit"));
        }
    }

    if ((conserve_mem_setting != 0) && (n == max_generation))
    {
        float frag_limit = 1.0f - conserve_mem_setting / 10.0f;

        size_t loh_size = get_total_gen_size (loh_generation);
        size_t gen2_size = get_total_gen_size (max_generation);
        float loh_frag_ratio = 0.0f;
        float combined_frag_ratio = 0.0f;
        if (loh_size != 0)
        {
            size_t loh_frag  = get_total_gen_fragmentation (loh_generation);
            size_t gen2_frag = get_total_gen_fragmentation (max_generation);
            loh_frag_ratio = (float)loh_frag / (float)loh_size;
            combined_frag_ratio = (float)(gen2_frag + loh_frag) / (float)(gen2_size + loh_size);
        }
        if (combined_frag_ratio > frag_limit)
        {
            dprintf (GTC_LOG, ("combined frag: %f > limit %f, loh frag: %f", combined_frag_ratio, frag_limit, loh_frag_ratio));
            gc_data_global.gen_to_condemn_reasons.set_condition (gen_max_high_frag_p);

            n = max_generation;
            *blocking_collection_p = TRUE;
            if (loh_frag_ratio > frag_limit)
            {
                settings.loh_compaction = TRUE;

                dprintf (GTC_LOG, ("compacting LOH due to GCConserveMem setting"));
            }
        }
    }

    if (settings.reason == reason_induced_aggressive)
    {
        gc_data_global.gen_to_condemn_reasons.set_condition (gen_joined_aggressive);
        settings.loh_compaction = TRUE;
    }

#ifdef BGC_SERVO_TUNING
    if (bgc_tuning::should_trigger_ngc2())
    {
        gc_data_global.gen_to_condemn_reasons.set_condition(gen_joined_servo_ngc);
        n = max_generation;
        *blocking_collection_p = TRUE;
    }

    if ((n < max_generation) && !gc_heap::background_running_p() &&
        bgc_tuning::stepping_trigger (settings.entry_memory_load, get_current_gc_index (max_generation)))
    {
        gc_data_global.gen_to_condemn_reasons.set_condition(gen_joined_servo_initial);
        n = max_generation;
        saved_bgc_tuning_reason = reason_bgc_stepping;
    }

    if ((n < max_generation) && bgc_tuning::should_trigger_bgc())
    {
        gc_data_global.gen_to_condemn_reasons.set_condition(gen_joined_servo_bgc);
        n = max_generation;
    }

    if (n == (max_generation - 1))
    {
        if (bgc_tuning::should_delay_alloc (max_generation))
        {
            gc_data_global.gen_to_condemn_reasons.set_condition(gen_joined_servo_postpone);
            n -= 1;
        }
    }
#endif //BGC_SERVO_TUNING

    if ((n == max_generation) && (*blocking_collection_p == FALSE))
    {
        // If we are doing a gen2 we should reset elevation regardless and let the gen2
        // decide if we should lock again or in the bgc case by design we will not retract
        // gen1 start.
        settings.should_lock_elevation = FALSE;
        settings.elevation_locked_count = 0;
        dprintf (GTC_LOG, ("doing bgc, reset elevation"));
    }

#ifdef STRESS_HEAP
#ifdef BACKGROUND_GC
    // We can only do Concurrent GC Stress if the caller did not explicitly ask for all
    // generations to be collected,
    //
    // [LOCALGC TODO] STRESS_HEAP is not defined for a standalone GC so there are multiple
    // things that need to be fixed in this code block.
    if (n_original != max_generation &&
        g_pConfig->GetGCStressLevel() && gc_can_use_concurrent)
    {
#ifndef FEATURE_NATIVEAOT
        if (*blocking_collection_p)
        {
            // We call StressHeap() a lot for Concurrent GC Stress. However,
            // if we can not do a concurrent collection, no need to stress anymore.
            // @TODO: Enable stress when the memory pressure goes down again
            GCStressPolicy::GlobalDisable();
        }
        else
#endif // !FEATURE_NATIVEAOT
        {
            gc_data_global.gen_to_condemn_reasons.set_condition(gen_joined_stress);
            n = max_generation;
        }
    }
#endif //BACKGROUND_GC
#endif //STRESS_HEAP

#ifdef BACKGROUND_GC
#ifdef DYNAMIC_HEAP_COUNT
    if (trigger_bgc_for_rethreading_p)
    {
        if (background_running_p())
        {
            // trigger_bgc_for_rethreading_p being true indicates we did not change gen2 FL items when we changed HC.
            // So some heaps could have no FL at all which means if we did a gen1 GC during this BGC we would increase
            // gen2 size. We chose to prioritize not increasing gen2 size so we disallow gen1 GCs.
            if (n != 0)
            {
                n = 0;
            }
        }
        else
        {
            dprintf (6666, ("was going to be g%d %s GC, HC change request this GC to be a BGC unless it's an NGC2",
                n, (*blocking_collection_p ? "blocking" : "non blocking")));

            // If we already decided to do a blocking gen2 which would also achieve the purpose of building up a new
            // gen2 FL, let it happen; otherwise we want to trigger a BGC.
            if (!((n == max_generation) && *blocking_collection_p))
            {
                n = max_generation;

#ifdef STRESS_DYNAMIC_HEAP_COUNT
                if (bgc_to_ngc2_ratio)
                {
                    int r = (int)gc_rand::get_rand ((bgc_to_ngc2_ratio + 1) * 10);
                    dprintf (6666, ("%d - making this full GC %s", r, ((r < 10) ? "NGC2" : "BGC")));
                    if (r < 10)
                    {
                        *blocking_collection_p = TRUE;
                    }
                }
#endif //STRESS_DYNAMIC_HEAP_COUNT
            }
        }
    }
    else
#endif //DYNAMIC_HEAP_COUNT
    if ((n == max_generation) && background_running_p())
    {
        n = max_generation - 1;
        dprintf (GTC_LOG, ("bgc in progress - 1 instead of 2"));
    }
#endif //BACKGROUND_GC

#ifdef DYNAMIC_HEAP_COUNT
    if (trigger_initial_gen2_p)
    {
#ifdef BACKGROUND_GC
        assert (!trigger_bgc_for_rethreading_p);
        assert (!background_running_p());
#endif //BACKGROUND_GC

        if (n != max_generation)
        {
            n = max_generation;
            *blocking_collection_p = FALSE;

            dprintf (6666, ("doing the 1st gen2 GC requested by DATAS"));
        }

        trigger_initial_gen2_p = false;
    }
#endif //DYNAMIC_HEAP_COUNT

    return n;
}

inline
size_t get_survived_size (gc_history_per_heap* hist)
{
    size_t surv_size = 0;
    gc_generation_data* gen_data;

    for (int gen_number = 0; gen_number < total_generation_count; gen_number++)
    {
        gen_data = &(hist->gen_data[gen_number]);
        surv_size += (gen_data->size_after -
                      gen_data->free_list_space_after -
                      gen_data->free_obj_space_after);
    }

    return surv_size;
}

size_t gc_heap::get_total_survived_size()
{
    size_t total_surv_size = 0;
#ifdef MULTIPLE_HEAPS
    for (int i = 0; i < gc_heap::n_heaps; i++)
    {
        gc_heap* hp = gc_heap::g_heaps[i];
        gc_history_per_heap* current_gc_data_per_heap = hp->get_gc_data_per_heap();
        total_surv_size += get_survived_size (current_gc_data_per_heap);
    }
#else
    gc_history_per_heap* current_gc_data_per_heap = get_gc_data_per_heap();
    total_surv_size = get_survived_size (current_gc_data_per_heap);
#endif //MULTIPLE_HEAPS
    return total_surv_size;
}

void gc_heap::get_total_allocated_since_last_gc (size_t* oh_allocated)
{
    memset (oh_allocated, 0, (total_oh_count * sizeof (size_t)));
    size_t total_allocated_size = 0;

#ifdef MULTIPLE_HEAPS
    for (int i = 0; i < gc_heap::n_heaps; i++)
    {
        gc_heap* hp = gc_heap::g_heaps[i];
#else //MULTIPLE_HEAPS
    {
        gc_heap* hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS
        for (int oh_idx = 0; oh_idx < total_oh_count; oh_idx++)
        {
            oh_allocated[oh_idx] += hp->allocated_since_last_gc[oh_idx];
            hp->allocated_since_last_gc[oh_idx] = 0;
        }
    }
}

// Gets what's allocated on both SOH, LOH, etc that hasn't been collected.
size_t gc_heap::get_current_allocated()
{
    dynamic_data* dd = dynamic_data_of (0);
    size_t current_alloc = dd_desired_allocation (dd) - dd_new_allocation (dd);
    for (int i = uoh_start_generation; i < total_generation_count; i++)
    {
        dynamic_data* dd = dynamic_data_of (i);
        current_alloc += dd_desired_allocation (dd) - dd_new_allocation (dd);
    }
    return current_alloc;
}

size_t gc_heap::get_total_allocated()
{
    size_t total_current_allocated = 0;
#ifdef MULTIPLE_HEAPS
    for (int i = 0; i < gc_heap::n_heaps; i++)
    {
        gc_heap* hp = gc_heap::g_heaps[i];
        total_current_allocated += hp->get_current_allocated();
    }
#else
    total_current_allocated = get_current_allocated();
#endif //MULTIPLE_HEAPS
    return total_current_allocated;
}

size_t gc_heap::get_total_promoted()
{
    size_t total_promoted_size = 0;
    int highest_gen = ((settings.condemned_generation == max_generation) ?
                       (total_generation_count - 1) : settings.condemned_generation);
#ifdef MULTIPLE_HEAPS
    for (int i = 0; i < gc_heap::n_heaps; i++)
    {
        gc_heap* hp = gc_heap::g_heaps[i];
#else //MULTIPLE_HEAPS
    {
        gc_heap* hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS
        for (int gen_number = 0; gen_number <= highest_gen; gen_number++)
        {
            total_promoted_size += dd_promoted_size (hp->dynamic_data_of (gen_number));
        }
    }
    return total_promoted_size;
}

#ifdef BGC_SERVO_TUNING
size_t gc_heap::get_total_generation_size (int gen_number)
{
    size_t total_generation_size = 0;
#ifdef MULTIPLE_HEAPS
    for (int i = 0; i < gc_heap::n_heaps; i++)
    {
        gc_heap* hp = gc_heap::g_heaps[i];
#else //MULTIPLE_HEAPS
    {
        gc_heap* hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS

        total_generation_size += hp->generation_size (gen_number);
    }
    return total_generation_size;
}

// gets all that's allocated into the gen. This is only used for gen2/3
// for servo tuning.
size_t gc_heap::get_total_servo_alloc (int gen_number)
{
    size_t total_alloc = 0;

#ifdef MULTIPLE_HEAPS
    for (int i = 0; i < gc_heap::n_heaps; i++)
    {
        gc_heap* hp = gc_heap::g_heaps[i];
#else //MULTIPLE_HEAPS
    {
        gc_heap* hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS
        generation* gen = hp->generation_of (gen_number);
        total_alloc += generation_free_list_allocated (gen);
        total_alloc += generation_end_seg_allocated (gen);
        total_alloc += generation_condemned_allocated (gen);
        total_alloc += generation_sweep_allocated (gen);
    }

    return total_alloc;
}

size_t gc_heap::get_total_bgc_promoted()
{
    size_t total_bgc_promoted = 0;
#ifdef MULTIPLE_HEAPS
    int num_heaps = gc_heap::n_heaps;
#else //MULTIPLE_HEAPS
    int num_heaps = 1;
#endif //MULTIPLE_HEAPS

    for (int i = 0; i < num_heaps; i++)
    {
        total_bgc_promoted += bpromoted_bytes (i);
    }
    return total_bgc_promoted;
}

// This is called after compute_new_dynamic_data is called, at which point
// dd_current_size is calculated.
size_t gc_heap::get_total_surv_size (int gen_number)
{
    size_t total_surv_size = 0;
#ifdef MULTIPLE_HEAPS
    for (int i = 0; i < gc_heap::n_heaps; i++)
    {
        gc_heap* hp = gc_heap::g_heaps[i];
#else //MULTIPLE_HEAPS
    {
        gc_heap* hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS
        total_surv_size += dd_current_size (hp->dynamic_data_of (gen_number));
    }
    return total_surv_size;
}

size_t gc_heap::get_total_begin_data_size (int gen_number)
{
    size_t total_begin_data_size = 0;
#ifdef MULTIPLE_HEAPS
    for (int i = 0; i < gc_heap::n_heaps; i++)
    {
        gc_heap* hp = gc_heap::g_heaps[i];
#else //MULTIPLE_HEAPS
    {
        gc_heap* hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS

        total_begin_data_size += dd_begin_data_size (hp->dynamic_data_of (gen_number));
    }
    return total_begin_data_size;
}

size_t gc_heap::get_total_generation_fl_size (int gen_number)
{
    size_t total_generation_fl_size = 0;
#ifdef MULTIPLE_HEAPS
    for (int i = 0; i < gc_heap::n_heaps; i++)
    {
        gc_heap* hp = gc_heap::g_heaps[i];
#else //MULTIPLE_HEAPS
    {
        gc_heap* hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS
        total_generation_fl_size += generation_free_list_space (hp->generation_of (gen_number));
    }
    return total_generation_fl_size;
}

size_t gc_heap::get_current_gc_index (int gen_number)
{
#ifdef MULTIPLE_HEAPS
    gc_heap* hp = gc_heap::g_heaps[0];
    return dd_collection_count (hp->dynamic_data_of (gen_number));
#else
    return dd_collection_count (dynamic_data_of (gen_number));
#endif //MULTIPLE_HEAPS
}
#endif //BGC_SERVO_TUNING

size_t gc_heap::current_generation_size (int gen_number)
{
    dynamic_data* dd = dynamic_data_of (gen_number);
    size_t gen_size = (dd_current_size (dd) + dd_desired_allocation (dd)
                        - dd_new_allocation (dd));

    return gen_size;
}

#ifdef USE_REGIONS
// We may need a new empty region while doing a GC so try to get one now, if we don't have any
// reserve in the free region list.
bool gc_heap::try_get_new_free_region()
{
    heap_segment* region = 0;
    if (free_regions[basic_free_region].get_num_free_regions() > 0)
    {
        dprintf (REGIONS_LOG, ("h%d has %zd free regions %p", heap_number, free_regions[basic_free_region].get_num_free_regions(),
            heap_segment_mem (free_regions[basic_free_region].get_first_free_region())));
        return true;
    }
    else
    {
        region = allocate_new_region (__this, 0, false);
        if (region)
        {
            if (init_table_for_region (0, region))
            {
                return_free_region (region);
                dprintf (REGIONS_LOG, ("h%d got a new empty region %p", heap_number, region));
            }
            else
            {
                region = 0;
            }
        }
    }

    return (region != 0);
}

bool gc_heap::init_table_for_region (int gen_number, heap_segment* region)
{
#ifdef BACKGROUND_GC
    dprintf (GC_TABLE_LOG, ("new seg %Ix, mark_array is %Ix",
        heap_segment_mem (region), mark_array));
    if (((region->flags & heap_segment_flags_ma_committed) == 0) &&
        !commit_mark_array_new_seg (__this, region))
    {
        dprintf (GC_TABLE_LOG, ("failed to commit mark array for the new region %Ix-%Ix",
            get_region_start (region), heap_segment_reserved (region)));

        // We don't have memory to commit the mark array so we cannot use the new region.
        decommit_region (region, gen_to_oh (gen_number), heap_number);
        return false;
    }
    if ((region->flags & heap_segment_flags_ma_committed) != 0)
    {
        bgc_verify_mark_array_cleared (region, true);
    }
#endif //BACKGROUND_GC

    if (gen_number <= max_generation)
    {
        size_t first_brick = brick_of (heap_segment_mem (region));
        set_brick (first_brick, -1);
    }
    else
    {
        assert (brick_table[brick_of (heap_segment_mem (region))] == 0);
    }

    return true;
}
#endif //USE_REGIONS

// The following 2 methods Use integer division to prevent potential floating point exception.
// FPE may occur if we use floating point division because of speculative execution.
//
// Return the percentage of efficiency (between 0 and 100) of the allocator.
inline
size_t gc_heap::generation_allocator_efficiency_percent (generation* inst)
{
#ifdef DYNAMIC_HEAP_COUNT
    if (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes)
    {
        uint64_t total_plan_allocated = generation_total_plan_allocated (inst);
        uint64_t condemned_allocated = generation_condemned_allocated (inst);
        return ((total_plan_allocated == 0) ? 0 : (100 * (total_plan_allocated - condemned_allocated) / total_plan_allocated));
    }
    else
#endif //DYNAMIC_HEAP_COUNT
    {
        uint64_t free_obj_space = generation_free_obj_space (inst);
        uint64_t free_list_allocated = generation_free_list_allocated (inst);
        if ((free_list_allocated + free_obj_space) == 0)
            return 0;
        return (size_t)((100 * free_list_allocated) / (free_list_allocated + free_obj_space));
    }
}

inline
size_t gc_heap::generation_unusable_fragmentation (generation* inst, int hn)
{
#ifdef DYNAMIC_HEAP_COUNT
    if (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes)
    {
        uint64_t total_plan_allocated = generation_total_plan_allocated (inst);
        uint64_t condemned_allocated = generation_condemned_allocated (inst);
        uint64_t unusable_frag = 0;
        size_t fo_space = (((ptrdiff_t)generation_free_obj_space (inst) < 0) ? 0 : generation_free_obj_space (inst));

        if (total_plan_allocated != 0)
        {
            unusable_frag = fo_space + (condemned_allocated * generation_free_list_space (inst) / total_plan_allocated);
        }

        dprintf (3, ("h%d g%d FLa: %Id, ESa: %Id, Ca: %Id | FO: %Id, FL %Id, fl effi %.3f, unusable fl is %Id",
            hn, inst->gen_num,
            generation_free_list_allocated (inst), generation_end_seg_allocated (inst), (size_t)condemned_allocated,
            fo_space, generation_free_list_space (inst),
            ((total_plan_allocated == 0) ? 1.0 : ((float)(total_plan_allocated - condemned_allocated) / (float)total_plan_allocated)),
            (size_t)unusable_frag));

        return (size_t)unusable_frag;
    }
    else
#endif //DYNAMIC_HEAP_COUNT
    {
        uint64_t free_obj_space = generation_free_obj_space (inst);
        uint64_t free_list_allocated = generation_free_list_allocated (inst);
        uint64_t free_list_space = generation_free_list_space (inst);
        if ((free_list_allocated + free_obj_space) == 0)
            return 0;
        return (size_t)(free_obj_space + (free_obj_space * free_list_space) / (free_list_allocated + free_obj_space));
    }
}

/*
    This is called by when we are actually doing a GC, or when we are just checking whether
    we would do a full blocking GC, in which case check_only_p is TRUE.

    The difference between calling this with check_only_p TRUE and FALSE is that when it's
    TRUE:
            settings.reason is ignored
            budgets are not checked (since they are checked before this is called)
            it doesn't change anything non local like generation_skip_ratio
*/
int gc_heap::generation_to_condemn (int n_initial,
                                    BOOL* blocking_collection_p,
                                    BOOL* elevation_requested_p,
                                    BOOL check_only_p)
{
    gc_mechanisms temp_settings = settings;
    gen_to_condemn_tuning temp_condemn_reasons;
    gc_mechanisms* local_settings = (check_only_p ? &temp_settings : &settings);
    gen_to_condemn_tuning* local_condemn_reasons = (check_only_p ? &temp_condemn_reasons : &gen_to_condemn_reasons);
    if (!check_only_p)
    {
        if ((local_settings->reason == reason_oos_soh) || (local_settings->reason == reason_oos_loh))
        {
            assert (n_initial >= 1);
        }

        assert (settings.reason != reason_empty);
    }

    local_condemn_reasons->init();

    int n = n_initial;
    int n_alloc = n;
    if (heap_number == 0)
    {
        dprintf (6666, ("init: %d(%d)", n_initial, settings.reason));
    }
    int i = 0;
    int temp_gen = 0;
    BOOL low_memory_detected = g_low_memory_status;
    uint32_t memory_load = 0;
    uint64_t available_physical = 0;
    uint64_t available_page_file = 0;
    BOOL check_memory = FALSE;
    BOOL high_fragmentation  = FALSE;
    BOOL v_high_memory_load  = FALSE;
    BOOL high_memory_load    = FALSE;
    BOOL low_ephemeral_space = FALSE;
    BOOL evaluate_elevation  = TRUE;
    *elevation_requested_p   = FALSE;
    *blocking_collection_p   = FALSE;

    BOOL check_max_gen_alloc = TRUE;

#ifdef STRESS_HEAP
    int orig_gen = n;
#endif //STRESS_HEAP

    if (!check_only_p)
    {
        dd_fragmentation (dynamic_data_of (0)) =
            generation_free_list_space (youngest_generation) +
            generation_free_obj_space (youngest_generation);

        for (int i = uoh_start_generation; i < total_generation_count; i++)
        {
            dd_fragmentation (dynamic_data_of (i)) =
                generation_free_list_space (generation_of (i)) +
                generation_free_obj_space (generation_of (i));
        }

        //save new_allocation
        for (i = 0; i < total_generation_count; i++)
        {
            dynamic_data* dd = dynamic_data_of (i);
            if ((dd_new_allocation (dd) < 0) && (i >= 2))
            {
                dprintf (6666, ("h%d: g%d: l: %zd (%zd)",
                    heap_number, i,
                    dd_new_allocation (dd),
                    dd_desired_allocation (dd)));
            }
            dd_gc_new_allocation (dd) = dd_new_allocation (dd);
        }

        local_condemn_reasons->set_gen (gen_initial, n);
        temp_gen = n;

#ifdef BACKGROUND_GC
        if (gc_heap::background_running_p()
#ifdef BGC_SERVO_TUNING
            || bgc_tuning::fl_tuning_triggered
            || (bgc_tuning::enable_fl_tuning && bgc_tuning::use_stepping_trigger_p)
#endif //BGC_SERVO_TUNING
            )
        {
            check_max_gen_alloc = FALSE;
        }
#endif //BACKGROUND_GC

        if (check_max_gen_alloc)
        {
            //figure out if UOH objects need to be collected.
            for (int i = uoh_start_generation; i < total_generation_count; i++)
            {
                if (get_new_allocation (i) <= 0)
                {
                    n = max_generation;
                    local_condemn_reasons->set_gen (gen_alloc_budget, n);
                    dprintf (BGC_TUNING_LOG, ("BTL[GTC]: trigger based on gen%d b: %zd",
                             (i),
                             get_new_allocation (i)));
                    break;
                }
            }
        }

        //figure out which generation ran out of allocation
        for (i = n+1; i <= (check_max_gen_alloc ? max_generation : (max_generation - 1)); i++)
        {
            if (get_new_allocation (i) <= 0)
            {
                n = i;
                if (n == max_generation)
                {
                    dprintf (BGC_TUNING_LOG, ("BTL[GTC]: trigger based on gen2 b: %zd",
                            get_new_allocation (max_generation)));
                }
            }
            else
                break;
        }
    }

    if (n > temp_gen)
    {
        local_condemn_reasons->set_gen (gen_alloc_budget, n);
    }

    if (n > 0)
    {
        dprintf (6666, ("h%d: g%d budget", heap_number, ((get_new_allocation (loh_generation) <= 0) ? 3 : n)));
    }

    n_alloc = n;

#if defined(BACKGROUND_GC) && !defined(MULTIPLE_HEAPS)
    //time based tuning
    // if enough time has elapsed since the last gc
    // and the number of gc is too low (1/10 of lower gen) then collect
    // This should also be enabled if we have memory concerns
    int n_time_max = max_generation;

    if (!check_only_p)
    {
        if (!check_max_gen_alloc)
        {
            n_time_max = max_generation - 1;
        }
    }

    if ((local_settings->pause_mode == pause_interactive) ||
        (local_settings->pause_mode == pause_sustained_low_latency))
    {
        dynamic_data* dd0 = dynamic_data_of (0);
        uint64_t now = GetHighPrecisionTimeStamp();
        temp_gen = n;
        for (i = (temp_gen+1); i <= n_time_max; i++)
        {
            dynamic_data* dd = dynamic_data_of (i);
            if ((now > dd_time_clock(dd) + dd_time_clock_interval(dd)) &&
                (dd_gc_clock (dd0) > (dd_gc_clock (dd) + dd_gc_clock_interval(dd))) &&
                ((n < max_generation) || ((dd_current_size (dd) < dd_max_size (dd0)))))
            {
                n = min (i, n_time_max);
                dprintf (GTC_LOG, ("time %d", n));
            }
        }
        if (n > temp_gen)
        {
            local_condemn_reasons->set_gen (gen_time_tuning, n);
            if (n == max_generation)
            {
                dprintf (BGC_TUNING_LOG, ("BTL[GTC]: trigger based on time"));
            }
        }
    }

    if (n != n_alloc)
    {
        dprintf (GTC_LOG, ("Condemning %d based on time tuning and fragmentation", n));
    }
#endif //BACKGROUND_GC && !MULTIPLE_HEAPS

    if (n < (max_generation - 1))
    {
        dprintf (6666, ("h%d: skip %d", heap_number, generation_skip_ratio));

        if (dt_low_card_table_efficiency_p (tuning_deciding_condemned_gen))
        {
            n = max (n, max_generation - 1);
            local_settings->promotion = TRUE;
            dprintf (2, ("h%d: skip %d, c %d",
                        heap_number, generation_skip_ratio, n));
            local_condemn_reasons->set_condition (gen_low_card_p);
        }
    }

    if (!check_only_p)
    {
        generation_skip_ratio = 100;
    }

    if (dt_low_ephemeral_space_p (check_only_p ?
                                  tuning_deciding_full_gc :
                                  tuning_deciding_condemned_gen))
    {
        low_ephemeral_space = TRUE;

        n = max (n, max_generation - 1);
        local_condemn_reasons->set_condition (gen_low_ephemeral_p);
        dprintf (GTC_LOG, ("h%d: low eph", heap_number));

        if (!provisional_mode_triggered)
        {
#ifdef BACKGROUND_GC
            if (!gc_can_use_concurrent || (generation_free_list_space (generation_of (max_generation)) == 0))
#endif //BACKGROUND_GC
            {
                //It is better to defragment first if we are running out of space for
                //the ephemeral generation but we have enough fragmentation to make up for it
                //in the non ephemeral generation. Essentially we are trading a gen2 for
                // having to expand heap in ephemeral collections.
                if (dt_high_frag_p (tuning_deciding_condemned_gen,
                                    max_generation - 1,
                                    TRUE))
                {
                    high_fragmentation = TRUE;
                    local_condemn_reasons->set_condition (gen_max_high_frag_e_p);
                    dprintf (6666, ("heap%d: gen1 frag", heap_number));
                }
            }
        }
    }

#ifdef USE_REGIONS
    if (!check_only_p)
    {
        if (!try_get_new_free_region())
        {
            dprintf (GTC_LOG, ("can't get an empty region -> full compacting"));
            last_gc_before_oom = TRUE;
        }
    }
#endif //USE_REGIONS

    //figure out which ephemeral generation is too fragmented
    temp_gen = n;
    for (i = n+1; i < max_generation; i++)
    {
        if (dt_high_frag_p (tuning_deciding_condemned_gen, i))
        {
            dprintf (6666, ("h%d g%d too frag", heap_number, i));
            n = i;
        }
        else
            break;
    }

    if (low_ephemeral_space)
    {
        //enable promotion
        local_settings->promotion = TRUE;
    }

    if (n > temp_gen)
    {
        local_condemn_reasons->set_condition (gen_eph_high_frag_p);
    }

    if (!check_only_p)
    {
        if (settings.pause_mode == pause_low_latency)
        {
            if (!is_induced (settings.reason))
            {
                n = min (n, max_generation - 1);
                dprintf (GTC_LOG, ("low latency mode is enabled, condemning %d", n));
                evaluate_elevation = FALSE;
                goto exit;
            }
        }
    }

    // It's hard to catch when we get to the point that the memory load is so high
    // we get an induced GC from the finalizer thread so we are checking the memory load
    // for every gen0 GC.
    check_memory = (check_only_p ?
                    (n >= 0) :
                    ((n >= 1) || low_memory_detected));

    if (check_memory)
    {
        //find out if we are short on memory
        get_memory_info (&memory_load, &available_physical, &available_page_file);
        if (heap_number == 0)
        {
            dprintf (GTC_LOG, ("ml: %d", memory_load));
        }

#ifdef USE_REGIONS
        // For regions we want to take the VA range into consideration as well.
        uint32_t va_memory_load = global_region_allocator.get_va_memory_load();
        if (heap_number == 0)
        {
            dprintf (GTC_LOG, ("h%d ML %d, va ML %d", heap_number, memory_load, va_memory_load));
        }
        memory_load = max (memory_load, va_memory_load);
#endif //USE_REGIONS

        // Need to get it early enough for all heaps to use.
        local_settings->entry_available_physical_mem = available_physical;
        local_settings->entry_memory_load = memory_load;

        // @TODO: Force compaction more often under GCSTRESS
        if (memory_load >= high_memory_load_th || low_memory_detected)
        {
#ifdef SIMPLE_DPRINTF
            // stress log can't handle any parameter that's bigger than a void*.
            if (heap_number == 0)
            {
                dprintf (GTC_LOG, ("tp: %zd, ap: %zd", total_physical_mem, available_physical));
            }
#endif //SIMPLE_DPRINTF

            high_memory_load = TRUE;

            if (memory_load >= v_high_memory_load_th || low_memory_detected)
            {
                // TODO: Perhaps in 64-bit we should be estimating gen1's fragmentation as well since
                // gen1/gen0 may take a lot more memory than gen2.
                if (!high_fragmentation)
                {
                    high_fragmentation = dt_estimate_reclaim_space_p (tuning_deciding_condemned_gen, max_generation);
                }
                v_high_memory_load = TRUE;
            }
            else
            {
                if (!high_fragmentation)
                {
                    high_fragmentation = dt_estimate_high_frag_p (tuning_deciding_condemned_gen, max_generation, available_physical);
                }
            }

            if (high_fragmentation)
            {
                dprintf (6666, ("h%d high frag true!! mem load %d", heap_number, memory_load));

                if (high_memory_load)
                {
                    local_condemn_reasons->set_condition (gen_max_high_frag_m_p);
                }
                else if (v_high_memory_load)
                {
                    local_condemn_reasons->set_condition (gen_max_high_frag_vm_p);
                }
            }
        }
    }

    dprintf (GTC_LOG, ("h%d: le: %d, hm: %d, vm: %d, f: %d",
                 heap_number, low_ephemeral_space, high_memory_load, v_high_memory_load,
                 high_fragmentation));

#ifndef USE_REGIONS
    if (should_expand_in_full_gc)
    {
        dprintf (GTC_LOG, ("h%d: expand_in_full - BLOCK", heap_number));
        *blocking_collection_p = TRUE;
        evaluate_elevation = FALSE;
        n = max_generation;
        local_condemn_reasons->set_condition (gen_expand_fullgc_p);
    }
#endif //!USE_REGIONS

    if (last_gc_before_oom)
    {
        dprintf (GTC_LOG, ("h%d: alloc full - BLOCK", heap_number));
        n = max_generation;
        *blocking_collection_p = TRUE;

        if ((local_settings->reason == reason_oos_loh) ||
            (local_settings->reason == reason_alloc_loh))
        {
            evaluate_elevation = FALSE;
        }

        local_condemn_reasons->set_condition (gen_before_oom);
    }

    if (!check_only_p)
    {
        if (is_induced_blocking (settings.reason) &&
            n_initial == max_generation
            IN_STRESS_HEAP( && !settings.stress_induced ))
        {
            if (heap_number == 0)
            {
                dprintf (GTC_LOG, ("induced - BLOCK"));
            }

            *blocking_collection_p = TRUE;
            local_condemn_reasons->set_condition (gen_induced_fullgc_p);
            evaluate_elevation = FALSE;
        }

        if (settings.reason == reason_induced_noforce)
        {
            local_condemn_reasons->set_condition (gen_induced_noforce_p);
            evaluate_elevation = FALSE;
        }
    }

    if (!provisional_mode_triggered && evaluate_elevation && (low_ephemeral_space || high_memory_load || v_high_memory_load))
    {
        *elevation_requested_p = TRUE;
#ifdef HOST_64BIT
        // if we are in high memory load and have consumed 10% of the gen2 budget, do a gen2 now.
        if (high_memory_load || v_high_memory_load)
        {
            dynamic_data* dd_max = dynamic_data_of (max_generation);
            if (((float)dd_new_allocation (dd_max) / (float)dd_desired_allocation (dd_max)) < 0.9)
            {
                dprintf (GTC_LOG, ("%zd left in gen2 alloc (%zd)",
                    dd_new_allocation (dd_max), dd_desired_allocation (dd_max)));
                n = max_generation;
                local_condemn_reasons->set_condition (gen_almost_max_alloc);
            }
        }

        if (n <= max_generation)
#endif // HOST_64BIT
        {
            if (high_fragmentation)
            {
                //elevate to max_generation
                n = max_generation;
                dprintf (GTC_LOG, ("h%d: f full", heap_number));

#ifdef BACKGROUND_GC
                if (high_memory_load || v_high_memory_load)
                {
                    // For background GC we want to do blocking collections more eagerly because we don't
                    // want to get into the situation where the memory load becomes high while we are in
                    // a background GC and we'd have to wait for the background GC to finish to start
                    // a blocking collection (right now the implementation doesn't handle converting
                    // a background GC to a blocking collection midway.
                    dprintf (GTC_LOG, ("h%d: bgc - BLOCK", heap_number));
                    *blocking_collection_p = TRUE;
                }
#else
                if (v_high_memory_load)
                {
                    dprintf (GTC_LOG, ("h%d: - BLOCK", heap_number));
                    *blocking_collection_p = TRUE;
                }
#endif //BACKGROUND_GC
            }
            else
            {
                n = max (n, max_generation - 1);
                dprintf (GTC_LOG, ("h%d: nf c %d", heap_number, n));
            }
        }
    }

    if (!provisional_mode_triggered && (n == (max_generation - 1)) && (n_alloc < (max_generation -1)))
    {
#ifdef BGC_SERVO_TUNING
        if (!bgc_tuning::enable_fl_tuning)
#endif //BGC_SERVO_TUNING
        {
            dprintf (GTC_LOG, ("h%d: budget %d, check 2",
                        heap_number, n_alloc));
            if (get_new_allocation (max_generation) <= 0)
            {
                dprintf (GTC_LOG, ("h%d: budget alloc", heap_number));
                n = max_generation;
                local_condemn_reasons->set_condition (gen_max_gen1);
            }
        }
    }

    //figure out if max_generation is too fragmented -> blocking collection
    if (!provisional_mode_triggered
#ifdef BGC_SERVO_TUNING
        && !bgc_tuning::enable_fl_tuning
#endif //BGC_SERVO_TUNING
        && (n == max_generation))
    {
        if (dt_high_frag_p (tuning_deciding_condemned_gen, n))
        {
            dprintf (6666, ("h%d: g%d too frag", heap_number, n));
            local_condemn_reasons->set_condition (gen_max_high_frag_p);
            if (local_settings->pause_mode != pause_sustained_low_latency)
            {
                *blocking_collection_p = TRUE;
            }
        }
    }

#ifdef BACKGROUND_GC
    if ((n == max_generation) && !(*blocking_collection_p))
    {
        if (heap_number == 0)
        {
            BOOL bgc_heap_too_small = TRUE;
            size_t gen2size = 0;
            size_t gen3size = 0;
#ifdef MULTIPLE_HEAPS
            for (int i = 0; i < n_heaps; i++)
            {
                if (((g_heaps[i]->current_generation_size (max_generation)) > bgc_min_per_heap) ||
                    ((g_heaps[i]->current_generation_size (loh_generation)) > bgc_min_per_heap) ||
                    ((g_heaps[i]->current_generation_size (poh_generation)) > bgc_min_per_heap))
                {
                    bgc_heap_too_small = FALSE;
                    break;
                }
            }
#else //MULTIPLE_HEAPS
            if ((current_generation_size (max_generation) > bgc_min_per_heap) ||
                (current_generation_size (loh_generation) > bgc_min_per_heap) ||
                (current_generation_size (poh_generation) > bgc_min_per_heap))
            {
                bgc_heap_too_small = FALSE;
            }
#endif //MULTIPLE_HEAPS

            if (bgc_heap_too_small)
            {
                dprintf (GTC_LOG, ("gen2 and gen3 too small"));

#ifdef STRESS_HEAP
                // do not turn stress-induced collections into blocking GCs
                if (!settings.stress_induced)
#endif //STRESS_HEAP
                {
                    *blocking_collection_p = TRUE;
                }

                local_condemn_reasons->set_condition (gen_gen2_too_small);
            }
        }
    }
#endif //BACKGROUND_GC

exit:
    if (!check_only_p)
    {
#ifdef STRESS_HEAP
#ifdef BACKGROUND_GC
        // We can only do Concurrent GC Stress if the caller did not explicitly ask for all
        // generations to be collected,

        if (orig_gen != max_generation &&
            g_pConfig->GetGCStressLevel() && gc_can_use_concurrent)
        {
            *elevation_requested_p = FALSE;
        }
#endif //BACKGROUND_GC
#endif //STRESS_HEAP

        if (check_memory)
        {
            fgm_result.available_pagefile_mb = (size_t)(available_page_file / (1024 * 1024));
        }

        local_condemn_reasons->set_gen (gen_final_per_heap, n);
        get_gc_data_per_heap()->gen_to_condemn_reasons.init (local_condemn_reasons);

#ifdef DT_LOG
        local_condemn_reasons->print (heap_number);
#endif //DT_LOG

        if ((local_settings->reason == reason_oos_soh) ||
            (local_settings->reason == reason_oos_loh))
        {
            assert (n >= 1);
        }
    }

    return n;
}

inline
size_t gc_heap::min_reclaim_fragmentation_threshold (uint32_t num_heaps)
{
    // if the memory load is higher, the threshold we'd want to collect gets lower.
    size_t min_mem_based_on_available =
        (500 - (settings.entry_memory_load - high_memory_load_th) * 40) * 1024 * 1024 / num_heaps;

    size_t ten_percent_size = (size_t)((float)generation_size (max_generation) * 0.10);
    uint64_t three_percent_mem = mem_one_percent * 3 / num_heaps;

#ifdef SIMPLE_DPRINTF
    dprintf (GTC_LOG, ("min av: %zd, 10%% gen2: %zd, 3%% mem: %zd",
        min_mem_based_on_available, ten_percent_size, three_percent_mem));
#endif //SIMPLE_DPRINTF
    return (size_t)(min ((uint64_t)min_mem_based_on_available, min ((uint64_t)ten_percent_size, three_percent_mem)));
}

inline
uint64_t gc_heap::min_high_fragmentation_threshold(uint64_t available_mem, uint32_t num_heaps)
{
    return min (available_mem, (uint64_t)(256*1024*1024)) / num_heaps;
}

enum {
CORINFO_EXCEPTION_GC = 0xE0004743 // 'GC'
};


#ifdef BACKGROUND_GC
void gc_heap::init_background_gc ()
{
    //reset the allocation so foreground gc can allocate into older (max_generation) generation
    generation* gen = generation_of (max_generation);
    generation_allocation_pointer (gen)= 0;
    generation_allocation_limit (gen) = 0;
    generation_allocation_segment (gen) = heap_segment_rw (generation_start_segment (gen));

    _ASSERTE(generation_allocation_segment(gen) != NULL);

#ifdef DOUBLY_LINKED_FL
    generation_set_bgc_mark_bit_p (gen) = FALSE;
#endif //DOUBLY_LINKED_FL

#ifndef USE_REGIONS
    //reset the plan allocation for each segment
    for (heap_segment* seg = generation_allocation_segment (gen); seg != ephemeral_heap_segment;
        seg = heap_segment_next_rw (seg))
    {
        heap_segment_plan_allocated (seg) = heap_segment_allocated (seg);
    }
#endif //!USE_REGIONS

    if (heap_number == 0)
    {
        dprintf (2, ("heap%d: bgc lowest: %p, highest: %p",
            heap_number,
            background_saved_lowest_address,
            background_saved_highest_address));
    }
}
#endif //BACKGROUND_GC

inline
void fire_drain_mark_list_event (size_t mark_list_objects)
{
    FIRE_EVENT(BGCDrainMark, mark_list_objects);
}

inline
void fire_revisit_event (size_t dirtied_pages,
                         size_t marked_objects,
                         BOOL large_objects_p)
{
    FIRE_EVENT(BGCRevisit, dirtied_pages, marked_objects, large_objects_p);
}

inline
void fire_overflow_event (uint8_t* overflow_min,
                          uint8_t* overflow_max,
                          size_t marked_objects,
                          int gen_number)
{
    FIRE_EVENT(BGCOverflow_V1, (uint64_t)overflow_min, (uint64_t)overflow_max, marked_objects, gen_number == loh_generation, gen_number);
}

void gc_heap::concurrent_print_time_delta (const char* msg)
{
#ifdef TRACE_GC
    uint64_t current_time = GetHighPrecisionTimeStamp();
    size_t elapsed_time_ms = (size_t)((current_time - time_bgc_last) / 1000);
    time_bgc_last = current_time;

    dprintf (2, ("h%d: %s T %zd ms", heap_number, msg, elapsed_time_ms));
#else
    UNREFERENCED_PARAMETER(msg);
#endif //TRACE_GC
}

void gc_heap::free_list_info (int gen_num, const char* msg)
{
#if defined (BACKGROUND_GC) && defined (TRACE_GC)
    dprintf (3, ("h%d: %s", heap_number, msg));
    for (int i = 0; i < total_generation_count; i++)
    {
        generation* gen = generation_of (i);
        if ((generation_allocation_size (gen) == 0) &&
            (generation_free_list_space (gen) == 0) &&
            (generation_free_obj_space (gen) == 0))
        {
            // don't print if everything is 0.
        }
        else
        {
            dprintf (3, ("h%d: g%d: a-%zd, fl-%zd, fo-%zd",
                heap_number, i,
                generation_allocation_size (gen),
                generation_free_list_space (gen),
                generation_free_obj_space (gen)));
        }
    }
#else
    UNREFERENCED_PARAMETER(gen_num);
    UNREFERENCED_PARAMETER(msg);
#endif // BACKGROUND_GC && TRACE_GC
}

void gc_heap::update_collection_counts_for_no_gc()
{
    assert (settings.pause_mode == pause_no_gc);

    settings.condemned_generation = max_generation;
#ifdef MULTIPLE_HEAPS
    for (int i = 0; i < n_heaps; i++)
        g_heaps[i]->update_collection_counts();
#else //MULTIPLE_HEAPS
    update_collection_counts();
#endif //MULTIPLE_HEAPS

    full_gc_counts[gc_type_blocking]++;
}

BOOL gc_heap::should_proceed_with_gc()
{
    if (gc_heap::settings.pause_mode == pause_no_gc)
    {
        if (current_no_gc_region_info.started)
        {
            if (current_no_gc_region_info.soh_withheld_budget != 0)
            {
                dprintf(1, ("[no_gc_callback] allocation budget exhausted with withheld, time to trigger callback\n"));
#ifdef MULTIPLE_HEAPS
                for (int i = 0; i < gc_heap::n_heaps; i++)
                {
                    gc_heap* hp = gc_heap::g_heaps [i];
#else
                {
                    gc_heap* hp = pGenGCHeap;
#endif
                    dd_new_allocation (hp->dynamic_data_of (soh_gen0)) += current_no_gc_region_info.soh_withheld_budget;
                    dd_new_allocation (hp->dynamic_data_of (loh_generation)) += current_no_gc_region_info.loh_withheld_budget;
                }
                current_no_gc_region_info.soh_withheld_budget = 0;
                current_no_gc_region_info.loh_withheld_budget = 0;

                // Trigger the callback
                schedule_no_gc_callback (false);
                current_no_gc_region_info.callback = nullptr;
                return FALSE;
            }
            else
            {
                dprintf(1, ("[no_gc_callback] GC triggered while in no_gc mode. Exiting no_gc mode.\n"));
                // The no_gc mode was already in progress yet we triggered another GC,
                // this effectively exits the no_gc mode.
                restore_data_for_no_gc();
                if (current_no_gc_region_info.callback != nullptr)
                {
                    dprintf (1, ("[no_gc_callback] detaching callback on exit"));
                    schedule_no_gc_callback (true);
                }
                memset (&current_no_gc_region_info, 0, sizeof (current_no_gc_region_info));
            }
        }
        else
            return should_proceed_for_no_gc();
    }

    return TRUE;
}

void gc_heap::update_end_gc_time_per_heap()
{
    for (int gen_number = 0; gen_number <= settings.condemned_generation; gen_number++)
    {
        dynamic_data* dd = dynamic_data_of (gen_number);

        if (heap_number == 0)
        {
            dprintf (3, ("prev gen%d GC end time: prev start %I64d + prev gc elapsed %Id = %I64d",
                gen_number, dd_previous_time_clock (dd), dd_gc_elapsed_time (dd), (dd_previous_time_clock (dd) + dd_gc_elapsed_time (dd))));
        }

        dd_gc_elapsed_time (dd) = (size_t)(end_gc_time - dd_time_clock (dd));

        if (heap_number == 0)
        {
            dprintf (3, ("updated NGC%d %Id elapsed time to %I64d - %I64d = %I64d", gen_number, dd_gc_clock (dd), end_gc_time, dd_time_clock (dd), dd_gc_elapsed_time (dd)));
        }
    }
}

void gc_heap::update_end_ngc_time()
{
    end_gc_time = GetHighPrecisionTimeStamp();
    last_alloc_reset_suspended_end_time = end_gc_time;

#ifdef HEAP_BALANCE_INSTRUMENTATION
    last_gc_end_time_us = end_gc_time;
    dprintf (HEAP_BALANCE_LOG, ("[GC#%zd-%zd-%zd]", settings.gc_index,
        (last_gc_end_time_us - dd_time_clock (dynamic_data_of (0))),
        dd_time_clock (dynamic_data_of (0))));
#endif //HEAP_BALANCE_INSTRUMENTATION
}

size_t gc_heap::exponential_smoothing (int gen, size_t collection_count, size_t desired_per_heap)
{
    // to avoid spikes in mem usage due to short terms fluctuations in survivorship,
    // apply some smoothing.
    size_t smoothing = min((size_t)3, collection_count);

    size_t desired_total = desired_per_heap * n_heaps;
    size_t new_smoothed_desired_total = desired_total / smoothing + ((smoothed_desired_total[gen] / smoothing) * (smoothing - 1));
    smoothed_desired_total[gen] = new_smoothed_desired_total;
    size_t new_smoothed_desired_per_heap = new_smoothed_desired_total / n_heaps;

    // make sure we have at least dd_min_size
#ifdef MULTIPLE_HEAPS
    gc_heap* hp = g_heaps[0];
#else //MULTIPLE_HEAPS
    gc_heap* hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS
    dynamic_data* dd = hp->dynamic_data_of (gen);
    new_smoothed_desired_per_heap = max (new_smoothed_desired_per_heap, dd_min_size (dd));

    // align properly
    new_smoothed_desired_per_heap = Align (new_smoothed_desired_per_heap, get_alignment_constant (gen <= soh_gen2));
    dprintf (2, ("new smoothed_desired_per_heap for gen %d = %zd, desired_per_heap = %zd", gen, new_smoothed_desired_per_heap, desired_per_heap));

    return new_smoothed_desired_per_heap;
}

//internal part of gc used by the serial and concurrent version
void gc_heap::gc1()
{
#ifdef BACKGROUND_GC
    assert (settings.concurrent == (uint32_t)(bgc_thread_id.IsCurrentThread()));
#endif //BACKGROUND_GC

    verify_soh_segment_list();

    int n = settings.condemned_generation;

    if (settings.reason == reason_pm_full_gc)
    {
        assert (n == max_generation);
        init_records();

        gen_to_condemn_tuning* local_condemn_reasons = &(get_gc_data_per_heap()->gen_to_condemn_reasons);
        local_condemn_reasons->init();
        local_condemn_reasons->set_gen (gen_initial, n);
        local_condemn_reasons->set_gen (gen_final_per_heap, n);
    }

    update_collection_counts ();

#ifdef BACKGROUND_GC
    bgc_alloc_lock->check();
#endif //BACKGROUND_GC

    free_list_info (max_generation, "beginning");

    vm_heap->GcCondemnedGeneration = settings.condemned_generation;

    assert (g_gc_card_table == card_table);

#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
    assert (g_gc_card_bundle_table == card_bundle_table);
#endif

    {
#ifndef USE_REGIONS
        if (n == max_generation)
        {
            gc_low = lowest_address;
            gc_high = highest_address;
        }
        else
        {
            gc_low = generation_allocation_start (generation_of (n));
            gc_high = heap_segment_reserved (ephemeral_heap_segment);
        }
#endif //USE_REGIONS

#ifdef BACKGROUND_GC
        if (settings.concurrent)
        {
#ifdef TRACE_GC
            time_bgc_last = GetHighPrecisionTimeStamp();
#endif //TRACE_GC

            FIRE_EVENT(BGCBegin);

            concurrent_print_time_delta ("BGC");

            concurrent_print_time_delta ("RW");
            background_mark_phase();
            free_list_info (max_generation, "after mark phase");

            background_sweep();
            free_list_info (max_generation, "after sweep phase");
        }
        else
#endif //BACKGROUND_GC
        {
            mark_phase (n);

            check_gen0_bricks();

            GCScan::GcRuntimeStructuresValid (FALSE);
            plan_phase (n);
            GCScan::GcRuntimeStructuresValid (TRUE);

            check_gen0_bricks();
        }
    }

    //adjust the allocation size from the pinned quantities.
    for (int gen_number = 0; gen_number <= min ((int)max_generation,n+1); gen_number++)
    {
        generation* gn = generation_of (gen_number);
        if (settings.compaction)
        {
            generation_allocation_size (generation_of (gen_number)) += generation_pinned_allocation_compact_size (gn);
        }
        else
        {
            generation_allocation_size (generation_of (gen_number)) += generation_pinned_allocation_sweep_size (gn);
        }
        generation_pinned_allocation_sweep_size (gn) = 0;
        generation_pinned_allocation_compact_size (gn) = 0;
    }

#ifdef BACKGROUND_GC
    if (settings.concurrent)
    {
        dynamic_data* dd = dynamic_data_of (n);
        end_gc_time = GetHighPrecisionTimeStamp();
        size_t time_since_last_gen2 = 0;

#ifdef DYNAMIC_HEAP_COUNT
        if ((heap_number == 0) && (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes))
        {
            time_since_last_gen2 = (size_t)(end_gc_time - (dd_previous_time_clock (dd) + dd_gc_elapsed_time (dd)));
            dprintf (6666, ("BGC %Id end %I64d - (prev gen2 start %I64d + elapsed %Id = %I64d) = time inbewteen gen2 %Id",
                dd_gc_clock (dd), end_gc_time, dd_previous_time_clock (dd), dd_gc_elapsed_time (dd), (dd_previous_time_clock (dd) + dd_gc_elapsed_time (dd)), time_since_last_gen2));
        }
#endif //DYNAMIC_HEAP_COUNT

        dd_gc_elapsed_time (dd) = (size_t)(end_gc_time - dd_time_clock (dd));
#ifdef DYNAMIC_HEAP_COUNT
        if ((heap_number == 0) && (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes))
        {
            dprintf (6666, ("updating BGC %Id elapsed time to %I64d - %I64d = %I64d", dd_gc_clock (dd), end_gc_time, dd_time_clock (dd), dd_gc_elapsed_time (dd)));

            float bgc_percent = (float)dd_gc_elapsed_time (dd) * 100.0f / (float)time_since_last_gen2;
            dynamic_heap_count_data_t::gen2_sample& g2_sample = dynamic_heap_count_data.gen2_samples[dynamic_heap_count_data.gen2_sample_index];
            g2_sample.gc_index = VolatileLoadWithoutBarrier (&(settings.gc_index));
            g2_sample.gc_duration = dd_gc_elapsed_time (dd);
            g2_sample.gc_percent = bgc_percent;
            dprintf (6666, ("gen2 sample %d elapsed %Id * 100 / time inbetween gen2 %Id = %.3f",
                dynamic_heap_count_data.gen2_sample_index, dd_gc_elapsed_time (dd), time_since_last_gen2, bgc_percent));
            dynamic_heap_count_data.gen2_sample_index = (dynamic_heap_count_data.gen2_sample_index + 1) % dynamic_heap_count_data_t::sample_size;
            (dynamic_heap_count_data.current_gen2_samples_count)++;
            gc_index_full_gc_end = dd_gc_clock (dynamic_data_of (0));

            calculate_new_heap_count ();
        }
#endif //DYNAMIC_HEAP_COUNT

#ifdef HEAP_BALANCE_INSTRUMENTATION
        if (heap_number == 0)
        {
            last_gc_end_time_us = end_gc_time;
            dprintf (HEAP_BALANCE_LOG, ("[GC#%zd-%zd-BGC]", settings.gc_index, dd_gc_elapsed_time (dd)));
        }
#endif //HEAP_BALANCE_INSTRUMENTATION

        free_list_info (max_generation, "after computing new dynamic data");

        gc_history_per_heap* current_gc_data_per_heap = get_gc_data_per_heap();

        for (int gen_number = 0; gen_number < max_generation; gen_number++)
        {
            dprintf (2, ("end of BGC: gen%d new_alloc: %zd",
                         gen_number, dd_desired_allocation (dynamic_data_of (gen_number))));
            current_gc_data_per_heap->gen_data[gen_number].size_after = generation_size (gen_number);
            current_gc_data_per_heap->gen_data[gen_number].free_list_space_after = generation_free_list_space (generation_of (gen_number));
            current_gc_data_per_heap->gen_data[gen_number].free_obj_space_after = generation_free_obj_space (generation_of (gen_number));
        }
    }
    else
#endif //BACKGROUND_GC
    {
        free_list_info (max_generation, "end");
        for (int gen_number = 0; gen_number <= n; gen_number++)
        {
            compute_new_dynamic_data (gen_number);
        }

        if (n != max_generation)
        {
            for (int gen_number = (n + 1); gen_number < total_generation_count; gen_number++)
            {
                get_gc_data_per_heap()->gen_data[gen_number].size_after = generation_size (gen_number);
                get_gc_data_per_heap()->gen_data[gen_number].free_list_space_after = generation_free_list_space (generation_of (gen_number));
                get_gc_data_per_heap()->gen_data[gen_number].free_obj_space_after = generation_free_obj_space (generation_of (gen_number));
            }
        }

        get_gc_data_per_heap()->maxgen_size_info.running_free_list_efficiency = (uint32_t)(generation_allocator_efficiency_percent (generation_of (max_generation)));

        free_list_info (max_generation, "after computing new dynamic data");
    }

    if (n < max_generation)
    {
        int highest_gen_number =
#ifdef USE_REGIONS
            max_generation;
#else //USE_REGIONS
            1 + n;
#endif //USE_REGIONS

        for (int older_gen_idx = (1 + n); older_gen_idx <= highest_gen_number; older_gen_idx++)
        {
            compute_in (older_gen_idx);

            dynamic_data* dd = dynamic_data_of (older_gen_idx);
            size_t new_fragmentation = generation_free_list_space (generation_of (older_gen_idx)) +
                                       generation_free_obj_space (generation_of (older_gen_idx));

#ifdef BACKGROUND_GC
            if ((older_gen_idx != max_generation) || (current_c_gc_state != c_gc_state_planning))
#endif //BACKGROUND_GC
            {
                if (settings.promotion)
                {
                    dd_fragmentation (dd) = new_fragmentation;
                }
                else
                {
                    //assert (dd_fragmentation (dd) == new_fragmentation);
                }
            }
        }
    }

#ifdef BACKGROUND_GC
    if (!settings.concurrent)
#endif //BACKGROUND_GC
    {
#ifndef FEATURE_NATIVEAOT
        // GCToEEInterface::IsGCThread() always returns false on NativeAOT, but this assert is useful in CoreCLR.
        assert(GCToEEInterface::IsGCThread());
#endif // FEATURE_NATIVEAOT
        adjust_ephemeral_limits();
    }

#if defined(BACKGROUND_GC) && !defined(USE_REGIONS)
    assert (ephemeral_low == generation_allocation_start (generation_of ( max_generation -1)));
    assert (ephemeral_high == heap_segment_reserved (ephemeral_heap_segment));
#endif //BACKGROUND_GC && !USE_REGIONS

    if (fgn_maxgen_percent)
    {
        if (settings.condemned_generation == (max_generation - 1))
        {
            check_for_full_gc (max_generation - 1, 0);
        }
        else if (settings.condemned_generation == max_generation)
        {
            if (full_gc_approach_event_set
#ifdef MULTIPLE_HEAPS
                && (heap_number == 0)
#endif //MULTIPLE_HEAPS
                )
            {
                dprintf (2, ("FGN-GC: setting gen2 end event"));

                full_gc_approach_event.Reset();
#ifdef BACKGROUND_GC
                // By definition WaitForFullGCComplete only succeeds if it's full, *blocking* GC, otherwise need to return N/A
                fgn_last_gc_was_concurrent = settings.concurrent ? TRUE : FALSE;
#endif //BACKGROUND_GC
                full_gc_end_event.Set();
                full_gc_approach_event_set = false;
            }
        }
    }

#ifdef BACKGROUND_GC
    if (!settings.concurrent)
#endif //BACKGROUND_GC
    {
        //decide on the next allocation quantum
        if (alloc_contexts_used >= 1)
        {
            allocation_quantum = Align (min ((size_t)CLR_SIZE,
                                            (size_t)max ((size_t)1024, get_new_allocation (0) / (2 * alloc_contexts_used))),
                                            get_alignment_constant(FALSE));
            dprintf (3, ("New allocation quantum: %zd(0x%zx)", allocation_quantum, allocation_quantum));
        }
    }
#ifdef USE_REGIONS
    if (end_gen0_region_space == uninitialized_end_gen0_region_space)
    {
        end_gen0_region_space = get_gen0_end_space (memory_type_reserved);
    }
#endif //USE_REGIONS

    descr_generations ("END");

    verify_soh_segment_list();

#ifdef BACKGROUND_GC
    if (gc_can_use_concurrent)
    {
        check_bgc_mark_stack_length();
    }
    assert (settings.concurrent == (uint32_t)(bgc_thread_id.IsCurrentThread()));
#endif //BACKGROUND_GC

#if defined(VERIFY_HEAP) || (defined (FEATURE_EVENT_TRACE) && defined(BACKGROUND_GC))
    if (FALSE
#ifdef VERIFY_HEAP
        // Note that right now g_pConfig->GetHeapVerifyLevel always returns the same
        // value. If we ever allow randomly adjusting this as the process runs,
        // we cannot call it this way as joins need to match - we must have the same
        // value for all heaps like we do with bgc_heap_walk_for_etw_p.
        || (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_GC)
#endif
#if defined(FEATURE_EVENT_TRACE) && defined(BACKGROUND_GC)
        || (bgc_heap_walk_for_etw_p && settings.concurrent)
#endif
        )
    {
#ifdef BACKGROUND_GC
        bool cooperative_mode = true;

        if (settings.concurrent)
        {
            cooperative_mode = enable_preemptive ();

#ifdef MULTIPLE_HEAPS
            bgc_t_join.join(this, gc_join_suspend_ee_verify);
            if (bgc_t_join.joined())
            {
                bgc_threads_sync_event.Reset();

                dprintf(2, ("Joining BGC threads to suspend EE for verify heap"));
                bgc_t_join.restart();
            }
            if (heap_number == 0)
            {
                // need to take the gc_lock in preparation for verify_heap below
                // *before* we suspend the EE, otherwise we get a deadlock
                enter_gc_lock_for_verify_heap();

                suspend_EE();
                bgc_threads_sync_event.Set();
            }
            else
            {
                bgc_threads_sync_event.Wait(INFINITE, FALSE);
                dprintf (2, ("bgc_threads_sync_event is signalled"));
            }
#else //MULTIPLE_HEAPS
            // need to take the gc_lock in preparation for verify_heap below
            // *before* we suspend the EE, otherwise we get a deadlock
            enter_gc_lock_for_verify_heap();

            suspend_EE();
#endif //MULTIPLE_HEAPS

            //fix the allocation area so verify_heap can proceed.
            fix_allocation_contexts (FALSE);
        }
#endif //BACKGROUND_GC

#ifdef BACKGROUND_GC
        assert (settings.concurrent == (uint32_t)(bgc_thread_id.IsCurrentThread()));
#ifdef FEATURE_EVENT_TRACE
        if (bgc_heap_walk_for_etw_p && settings.concurrent)
        {
            GCToEEInterface::DiagWalkBGCSurvivors(__this);

#ifdef MULTIPLE_HEAPS
            bgc_t_join.join(this, gc_join_after_profiler_heap_walk);
            if (bgc_t_join.joined())
            {
                bgc_t_join.restart();
            }
#endif // MULTIPLE_HEAPS
        }
#endif // FEATURE_EVENT_TRACE
#endif //BACKGROUND_GC

#ifdef VERIFY_HEAP
        if (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_GC)
            verify_heap (FALSE);
#endif // VERIFY_HEAP

#ifdef BACKGROUND_GC
        if (settings.concurrent)
        {
            repair_allocation_contexts (TRUE);

#ifdef MULTIPLE_HEAPS
            bgc_t_join.join(this, gc_join_restart_ee_verify);
            if (bgc_t_join.joined())
            {
                bgc_threads_sync_event.Reset();

                dprintf(2, ("Joining BGC threads to restart EE after verify heap"));
                bgc_t_join.restart();
            }
            if (heap_number == 0)
            {
                restart_EE();
                leave_gc_lock_for_verify_heap();
                bgc_threads_sync_event.Set();
            }
            else
            {
                bgc_threads_sync_event.Wait(INFINITE, FALSE);
                dprintf (2, ("bgc_threads_sync_event is signalled"));
            }
#else //MULTIPLE_HEAPS

            restart_EE();
            leave_gc_lock_for_verify_heap();
#endif //MULTIPLE_HEAPS

            disable_preemptive (cooperative_mode);
        }
#endif //BACKGROUND_GC
    }
#endif //VERIFY_HEAP || (FEATURE_EVENT_TRACE && BACKGROUND_GC)

#ifdef MULTIPLE_HEAPS
    if (!settings.concurrent)
    {
        gc_t_join.join(this, gc_join_done);
        if (gc_t_join.joined ())
        {
            gc_heap::internal_gc_done = false;

            //equalize the new desired size of the generations
            int limit = settings.condemned_generation;
            if (limit == max_generation)
            {
                limit = total_generation_count-1;
            }

            for (int gen = 0; gen <= limit; gen++)
            {
                size_t total_desired = 0;
                size_t total_already_consumed = 0;

                for (int i = 0; i < gc_heap::n_heaps; i++)
                {
                    gc_heap* hp = gc_heap::g_heaps[i];
                    dynamic_data* dd = hp->dynamic_data_of (gen);
                    size_t temp_total_desired = total_desired + dd_desired_allocation (dd);
                    if (temp_total_desired < total_desired)
                    {
                        // we overflowed.
                        total_desired = (size_t)MAX_PTR;
                        break;
                    }
                    total_desired = temp_total_desired;
                    // for gen 1 and gen 2, there may have been some incoming size
                    // already accounted for
                    assert ((ptrdiff_t)dd_desired_allocation (dd) >= dd_new_allocation (dd));
                    size_t already_consumed = dd_desired_allocation (dd) - dd_new_allocation (dd);
                    size_t temp_total_already_consumed = total_already_consumed + already_consumed;

                    // we should never have an overflow here as the consumed size should always fit in a size_t
                    assert (temp_total_already_consumed >= total_already_consumed);
                    total_already_consumed = temp_total_already_consumed;
                }

                size_t desired_per_heap = Align (total_desired/gc_heap::n_heaps, get_alignment_constant (gen <= max_generation));

                size_t already_consumed_per_heap = total_already_consumed / gc_heap::n_heaps;

                if (gen == 0)
                {
                    // to avoid spikes in mem usage due to short terms fluctuations in survivorship,
                    // apply some smoothing.
                    size_t desired_per_heap_before_smoothing = desired_per_heap;
                    desired_per_heap = exponential_smoothing (gen, dd_collection_count (dynamic_data_of(gen)), desired_per_heap);
                    size_t desired_per_heap_after_smoothing = desired_per_heap;

                    if (!heap_hard_limit
#ifdef DYNAMIC_HEAP_COUNT
                        && (dynamic_adaptation_mode != dynamic_adaptation_to_application_sizes)
#endif //DYNAMIC_HEAP_COUNT
                        )
                    {
                        // if desired_per_heap is close to min_gc_size, trim it
                        // down to min_gc_size to stay in the cache
                        gc_heap* hp = gc_heap::g_heaps[0];
                        dynamic_data* dd = hp->dynamic_data_of (gen);
                        size_t min_gc_size = dd_min_size(dd);
                        // if min GC size larger than true on die cache, then don't bother
                        // limiting the desired size
                        if ((min_gc_size <= GCToOSInterface::GetCacheSizePerLogicalCpu(TRUE)) &&
                            desired_per_heap <= 2*min_gc_size)
                        {
                            desired_per_heap = min_gc_size;
                        }
                    }
#ifdef HOST_64BIT
                    size_t desired_per_heap_before_trim = desired_per_heap;
                    desired_per_heap = joined_youngest_desired (desired_per_heap);

                    dprintf (6666, ("final gen0 bcs: total desired: %Id (%.3fmb/heap), before smooth %zd -> after smooth %zd -> after joined %zd",
                        total_desired, ((double)(total_desired / n_heaps)/ 1000.0 / 1000.0),
                        desired_per_heap_before_smoothing, desired_per_heap_after_smoothing, desired_per_heap));
#endif // HOST_64BIT
                    gc_data_global.final_youngest_desired = desired_per_heap;
                }
#if 1 //subsumed by the linear allocation model
                if (gen >= uoh_start_generation)
                {
                    // to avoid spikes in mem usage due to short terms fluctuations in survivorship,
                    // apply some smoothing.
                    desired_per_heap = exponential_smoothing (gen, dd_collection_count (dynamic_data_of (max_generation)), desired_per_heap);
                }
#endif //0
                for (int i = 0; i < gc_heap::n_heaps; i++)
                {
                    gc_heap* hp = gc_heap::g_heaps[i];
                    dynamic_data* dd = hp->dynamic_data_of (gen);
                    dd_desired_allocation (dd) = desired_per_heap;
                    dd_gc_new_allocation (dd) = desired_per_heap;
#ifdef USE_REGIONS
                    // we may have had some incoming objects during this GC -
                    // adjust the consumed budget for these
                    dd_new_allocation (dd) = desired_per_heap - already_consumed_per_heap;
#else //USE_REGIONS
                    // for segments, we want to keep the .NET 6.0 behavior where we did not adjust
                    dd_new_allocation (dd) = desired_per_heap;
#endif //USE_REGIONS

                    if (gen == 0)
                    {
                        hp->fgn_last_alloc = desired_per_heap;
                    }
                }
            }

#ifdef FEATURE_LOH_COMPACTION
            BOOL all_heaps_compacted_p = TRUE;
#endif //FEATURE_LOH_COMPACTION
            int max_gen0_must_clear_bricks = 0;
            for (int i = 0; i < gc_heap::n_heaps; i++)
            {
                gc_heap* hp = gc_heap::g_heaps[i];
                hp->rearrange_uoh_segments();
#ifdef FEATURE_LOH_COMPACTION
                all_heaps_compacted_p &= hp->loh_compacted_p;
#endif //FEATURE_LOH_COMPACTION
                // compute max of gen0_must_clear_bricks over all heaps
                max_gen0_must_clear_bricks = max(max_gen0_must_clear_bricks, hp->gen0_must_clear_bricks);
            }
            verify_committed_bytes_per_heap ();

#ifdef USE_REGIONS
            initGCShadow();
            verify_region_to_generation_map ();
            compute_gc_and_ephemeral_range (settings.condemned_generation, true);
            stomp_write_barrier_ephemeral (ephemeral_low, ephemeral_high,
                                           map_region_to_generation_skewed, (uint8_t)min_segment_size_shr);
#endif //USE_REGIONS

#ifdef FEATURE_LOH_COMPACTION
            check_loh_compact_mode (all_heaps_compacted_p);
#endif //FEATURE_LOH_COMPACTION

            // if max_gen0_must_clear_bricks > 0, distribute to all heaps -
            // if one heap encountered an interior pointer during this GC,
            // the next GC might see one on another heap
            if (max_gen0_must_clear_bricks > 0)
            {
                for (int i = 0; i < gc_heap::n_heaps; i++)
                {
                    gc_heap* hp = gc_heap::g_heaps[i];
                    hp->gen0_must_clear_bricks = max_gen0_must_clear_bricks;
                }
            }

#ifdef DYNAMIC_HEAP_COUNT
            if (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes)
            {
                update_total_soh_stable_size();

                if ((settings.condemned_generation == max_generation) && trigger_bgc_for_rethreading_p)
                {
                    trigger_bgc_for_rethreading_p = false;
                }

                process_datas_sample();
            }
#endif //DYNAMIC_HEAP_COUNT

            for (int i = 0; i < gc_heap::n_heaps; i++)
            {
                gc_heap* hp = gc_heap::g_heaps[i];
                hp->decommit_ephemeral_segment_pages();
                hp->descr_generations ("END");
            }

            fire_pevents();

#ifdef USE_REGIONS
            distribute_free_regions();
            age_free_regions ("END");
#endif //USE_REGIONS

            update_end_ngc_time();
            pm_full_gc_init_or_clear();

            gc_t_join.restart();
        }

        update_end_gc_time_per_heap();
        add_to_history_per_heap();
        alloc_context_count = 0;
        heap_select::mark_heap (heap_number);
    }
#else //MULTIPLE_HEAPS
    gc_data_global.final_youngest_desired =
        dd_desired_allocation (dynamic_data_of (0));

#ifdef FEATURE_LOH_COMPACTION
    check_loh_compact_mode (loh_compacted_p);
#endif //FEATURE_LOH_COMPACTION

#ifndef USE_REGIONS
    decommit_ephemeral_segment_pages();
#endif

    fire_pevents();

    if (!(settings.concurrent))
    {
        rearrange_uoh_segments();
        verify_committed_bytes_per_heap ();
#ifdef USE_REGIONS
        initGCShadow();
        verify_region_to_generation_map ();
        compute_gc_and_ephemeral_range (settings.condemned_generation, true);
        stomp_write_barrier_ephemeral (ephemeral_low, ephemeral_high,
                                        map_region_to_generation_skewed, (uint8_t)min_segment_size_shr);
        distribute_free_regions();
        age_free_regions ("END");
#endif //USE_REGIONS

        update_end_ngc_time();
        update_end_gc_time_per_heap();
        add_to_history_per_heap();
        do_post_gc();
    }

    pm_full_gc_init_or_clear();

#ifdef BACKGROUND_GC
    recover_bgc_settings();
#endif //BACKGROUND_GC
#endif //MULTIPLE_HEAPS
#ifdef USE_REGIONS
    if (!(settings.concurrent) && (settings.condemned_generation == max_generation))
    {
        last_gc_before_oom = FALSE;
    }
#endif //USE_REGIONS
}

#ifdef DYNAMIC_HEAP_COUNT
size_t gc_heap::get_total_soh_stable_size()
{
    if (current_total_soh_stable_size)
    {
        return current_total_soh_stable_size;
    }
    else
    {
        size_t total_stable_size = 0;
        for (int i = 0; i < gc_heap::n_heaps; i++)
        {
            gc_heap* hp = g_heaps[i];
            total_stable_size += hp->generation_size (max_generation - 1) / 2;
        }

        if (!total_stable_size)
        {
            // Setting a temp value before a GC naturally happens (ie, due to allocation).
            total_stable_size = dd_min_size (g_heaps[0]->dynamic_data_of (max_generation - 1));
        }

        return total_stable_size;
    }
}

void gc_heap::update_total_soh_stable_size()
{
    if ((dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes) && (settings.condemned_generation == max_generation))
    {
        current_total_soh_stable_size = 0;
        for (int i = 0; i < gc_heap::n_heaps; i++)
        {
            gc_heap* hp = g_heaps[i];

            dynamic_data* dd = hp->dynamic_data_of (max_generation);
            current_total_soh_stable_size += dd_current_size (dd) + dd_desired_allocation (dd);
            dprintf (2, ("current size is %.3fmb, budget %.3fmb, total -> %.3fmb", mb (dd_current_size (dd)), mb (dd_desired_allocation (dd)), mb (current_total_soh_stable_size)));
        }
    }
}

void gc_heap::assign_new_budget (int gen_number, size_t desired_per_heap)
{
    for (int i = 0; i < gc_heap::n_heaps; i++)
    {
        gc_heap* hp = gc_heap::g_heaps[i];
        dynamic_data* dd = hp->dynamic_data_of (gen_number);
        dd_desired_allocation (dd) = desired_per_heap;
        dd_gc_new_allocation (dd) = desired_per_heap;
        dd_new_allocation (dd) = desired_per_heap;
        if (gen_number == 0)
        {
            hp->fgn_last_alloc = desired_per_heap;
        }
    }

    gc_data_global.final_youngest_desired = desired_per_heap;
}

bool gc_heap::prepare_rethread_fl_items()
{
    if (!min_fl_list)
    {
        min_fl_list = new (nothrow) min_fl_list_info [MAX_BUCKET_COUNT * n_max_heaps];
        if (min_fl_list == nullptr)
            return false;
    }
    if (!free_list_space_per_heap)
    {
        free_list_space_per_heap = new (nothrow) size_t[n_max_heaps];
        if (free_list_space_per_heap == nullptr)
            return false;
    }
    return true;
}

void gc_heap::rethread_fl_items(int gen_idx)
{
    uint32_t min_fl_list_size = sizeof (min_fl_list_info) * (MAX_BUCKET_COUNT * n_max_heaps);
    memset (min_fl_list, 0, min_fl_list_size);
    memset (free_list_space_per_heap, 0, sizeof(free_list_space_per_heap[0])*n_max_heaps);

    size_t num_fl_items = 0;
    size_t num_fl_items_rethreaded = 0;

    allocator* gen_allocator = generation_allocator (generation_of (gen_idx));
    gen_allocator->rethread_items (&num_fl_items, &num_fl_items_rethreaded, this, min_fl_list, free_list_space_per_heap, n_heaps);

    num_fl_items_rethreaded_stage2 = num_fl_items_rethreaded;
}

void gc_heap::merge_fl_from_other_heaps (int gen_idx, int to_n_heaps, int from_n_heaps)
{
#ifdef _DEBUG
    uint64_t start_us = GetHighPrecisionTimeStamp ();

    size_t total_num_fl_items_rethreaded_stage2 = 0;

    for (int hn = 0; hn < to_n_heaps; hn++)
    {
        gc_heap* hp = g_heaps[hn];

        total_num_fl_items_rethreaded_stage2 += hp->num_fl_items_rethreaded_stage2;

        min_fl_list_info* current_heap_min_fl_list = hp->min_fl_list;
        allocator* gen_allocator = generation_allocator (hp->generation_of (gen_idx));
        int num_buckets = gen_allocator->number_of_buckets();

        for (int i = 0; i < num_buckets; i++)
        {
            // Get to the bucket for this fl
            min_fl_list_info* current_bucket_min_fl_list = current_heap_min_fl_list + (i * to_n_heaps);
            for (int other_hn = 0; other_hn < from_n_heaps; other_hn++)
            {
                min_fl_list_info* min_fl_other_heap = &current_bucket_min_fl_list[other_hn];
                if (min_fl_other_heap->head)
                {
                    if (other_hn == hn)
                    {
                        dprintf (8888, ("h%d has fl items for itself on the temp list?!", hn));
                        GCToOSInterface::DebugBreak ();
                    }
                }
            }
        }
    }

    uint64_t elapsed = GetHighPrecisionTimeStamp () - start_us;

    dprintf (8888, ("rethreaded %Id items, merging took %I64dus (%I64dms)",
        total_num_fl_items_rethreaded_stage2, elapsed, (elapsed / 1000)));
#endif //_DEBUG

    for (int hn = 0; hn < to_n_heaps; hn++)
    {
        gc_heap* hp = g_heaps[hn];
        generation* gen = hp->generation_of (gen_idx);
        dynamic_data* dd = hp->dynamic_data_of (gen_idx);
        allocator* gen_allocator = generation_allocator (gen);
        gen_allocator->merge_items (hp, to_n_heaps, from_n_heaps);

        size_t free_list_space_decrease = 0;
        if (hn < from_n_heaps)
        {
            // we don't keep track of the size of the items staying on the same heap
            assert (hp->free_list_space_per_heap[hn] == 0);

            for (int to_hn = 0; to_hn < to_n_heaps; to_hn++)
            {
                free_list_space_decrease += hp->free_list_space_per_heap[to_hn];
            }
        }
        dprintf (8888, ("heap %d gen %d %zd total free list space, %zd moved to other heaps",
            hn,
            gen_idx,
            generation_free_list_space (gen),
            free_list_space_decrease));

        assert (free_list_space_decrease <= generation_free_list_space (gen));
        generation_free_list_space (gen) -= free_list_space_decrease;

        // TODO - I'm seeing for gen2 this is free_list_space_decrease can be a bit larger than frag.
        // Need to fix this later.
        if (gen_idx != max_generation)
        {
            assert (free_list_space_decrease <= dd_fragmentation (dd));
        }

        size_t free_list_space_increase = 0;
        for (int from_hn = 0; from_hn < from_n_heaps; from_hn++)
        {
            gc_heap* from_hp = g_heaps[from_hn];

            free_list_space_increase += from_hp->free_list_space_per_heap[hn];
        }
        dprintf (8888, ("heap %d gen %d %zd free list space moved from other heaps", hn, gen_idx, free_list_space_increase));
        generation_free_list_space (gen) += free_list_space_increase;
    }

#ifdef _DEBUG
    // verification to make sure we have the same # of fl items total
    size_t total_fl_items_count = 0;
    size_t total_fl_items_for_oh_count = 0;

    for (int hn = 0; hn < to_n_heaps; hn++)
    {
        gc_heap* hp = g_heaps[hn];
        allocator* gen_allocator = generation_allocator (hp->generation_of (gen_idx));
        size_t fl_items_count = 0;
        size_t fl_items_for_oh_count = 0;
        gen_allocator->count_items (hp, &fl_items_count, &fl_items_for_oh_count);
        total_fl_items_count += fl_items_count;
        total_fl_items_for_oh_count += fl_items_for_oh_count;
    }

    dprintf (8888, ("total %Id fl items, %Id are for other heaps",
        total_fl_items_count, total_fl_items_for_oh_count));

    if (total_fl_items_for_oh_count)
    {
        GCToOSInterface::DebugBreak ();
    }
#endif //_DEBUG
}
#endif //DYNAMIC_HEAP_COUNT

void gc_heap::save_data_for_no_gc()
{
    current_no_gc_region_info.saved_pause_mode = settings.pause_mode;
#ifdef MULTIPLE_HEAPS
    // This is to affect heap balancing.
    for (int i = 0; i < n_heaps; i++)
    {
        current_no_gc_region_info.saved_gen0_min_size = dd_min_size (g_heaps[i]->dynamic_data_of (0));
        dd_min_size (g_heaps[i]->dynamic_data_of (0)) = min_balance_threshold;
        current_no_gc_region_info.saved_gen3_min_size = dd_min_size (g_heaps[i]->dynamic_data_of (loh_generation));
        dd_min_size (g_heaps[i]->dynamic_data_of (loh_generation)) = 0;
    }
#endif //MULTIPLE_HEAPS
}

void gc_heap::restore_data_for_no_gc()
{
    gc_heap::settings.pause_mode = current_no_gc_region_info.saved_pause_mode;
#ifdef MULTIPLE_HEAPS
    for (int i = 0; i < n_heaps; i++)
    {
        dd_min_size (g_heaps[i]->dynamic_data_of (0)) = current_no_gc_region_info.saved_gen0_min_size;
        dd_min_size (g_heaps[i]->dynamic_data_of (loh_generation)) = current_no_gc_region_info.saved_gen3_min_size;
    }
#endif //MULTIPLE_HEAPS
}

start_no_gc_region_status gc_heap::prepare_for_no_gc_region (uint64_t total_size,
                                                             BOOL loh_size_known,
                                                             uint64_t loh_size,
                                                             BOOL disallow_full_blocking)
{
    if (current_no_gc_region_info.started)
    {
        return start_no_gc_in_progress;
    }

    start_no_gc_region_status status = start_no_gc_success;

    save_data_for_no_gc();
    settings.pause_mode = pause_no_gc;
    current_no_gc_region_info.start_status = start_no_gc_success;

    uint64_t allocation_no_gc_loh = 0;
    uint64_t allocation_no_gc_soh = 0;
    assert(total_size != 0);
    if (loh_size_known)
    {
        assert(loh_size != 0);
        assert(loh_size <= total_size);
        allocation_no_gc_loh = loh_size;
        allocation_no_gc_soh = total_size - loh_size;
    }
    else
    {
        allocation_no_gc_soh = total_size;
        allocation_no_gc_loh = total_size;
    }

    int soh_align_const = get_alignment_constant (TRUE);
#ifdef USE_REGIONS
    size_t max_soh_allocated = SIZE_T_MAX;
#else
    size_t max_soh_allocated = soh_segment_size - segment_info_size - eph_gen_starts_size;
#endif
    size_t size_per_heap = 0;
    const double scale_factor = 1.05;

    int num_heaps = get_num_heaps();

    uint64_t total_allowed_soh_allocation = (uint64_t)max_soh_allocated * num_heaps;
    // [LOCALGC TODO]
    // In theory, the upper limit here is the physical memory of the machine, not
    // SIZE_T_MAX. This is not true today because total_physical_mem can be
    // larger than SIZE_T_MAX if running in wow64 on a machine with more than
    // 4GB of RAM. Once Local GC code divergence is resolved and code is flowing
    // more freely between branches, it would be good to clean this up to use
    // total_physical_mem instead of SIZE_T_MAX.
    assert(total_allowed_soh_allocation <= SIZE_T_MAX);
    uint64_t total_allowed_loh_allocation = SIZE_T_MAX;
    uint64_t total_allowed_soh_alloc_scaled = allocation_no_gc_soh > 0 ? static_cast<uint64_t>(total_allowed_soh_allocation / scale_factor) : 0;
    uint64_t total_allowed_loh_alloc_scaled = allocation_no_gc_loh > 0 ? static_cast<uint64_t>(total_allowed_loh_allocation / scale_factor) : 0;

    if (allocation_no_gc_soh > total_allowed_soh_alloc_scaled ||
        allocation_no_gc_loh > total_allowed_loh_alloc_scaled)
    {
        status = start_no_gc_too_large;
        goto done;
    }

    if (allocation_no_gc_soh > 0)
    {
        allocation_no_gc_soh = static_cast<uint64_t>(allocation_no_gc_soh * scale_factor);
        allocation_no_gc_soh = min (allocation_no_gc_soh, total_allowed_soh_alloc_scaled);
    }

    if (allocation_no_gc_loh > 0)
    {
        allocation_no_gc_loh = static_cast<uint64_t>(allocation_no_gc_loh * scale_factor);
        allocation_no_gc_loh = min (allocation_no_gc_loh, total_allowed_loh_alloc_scaled);
    }

    if (disallow_full_blocking)
        current_no_gc_region_info.minimal_gc_p = TRUE;

    if (allocation_no_gc_soh != 0)
    {
        current_no_gc_region_info.soh_allocation_size = (size_t)allocation_no_gc_soh;
        size_per_heap = current_no_gc_region_info.soh_allocation_size;
#ifdef MULTIPLE_HEAPS
        size_per_heap /= n_heaps;
        for (int i = 0; i < n_heaps; i++)
        {
            // due to heap balancing we need to allow some room before we even look to balance to another heap.
            g_heaps[i]->soh_allocation_no_gc = min (Align ((size_per_heap + min_balance_threshold), soh_align_const), max_soh_allocated);
        }
#else //MULTIPLE_HEAPS
        soh_allocation_no_gc = min (Align (size_per_heap, soh_align_const), max_soh_allocated);
#endif //MULTIPLE_HEAPS
    }

    if (allocation_no_gc_loh != 0)
    {
        current_no_gc_region_info.loh_allocation_size = (size_t)allocation_no_gc_loh;
        size_per_heap = current_no_gc_region_info.loh_allocation_size;
#ifdef MULTIPLE_HEAPS
        size_per_heap /= n_heaps;
        for (int i = 0; i < n_heaps; i++)
            g_heaps[i]->loh_allocation_no_gc = Align (size_per_heap, get_alignment_constant (FALSE));
#else //MULTIPLE_HEAPS
        loh_allocation_no_gc = Align (size_per_heap, get_alignment_constant (FALSE));
#endif //MULTIPLE_HEAPS
    }

done:
    if (status != start_no_gc_success)
        restore_data_for_no_gc();
    return status;
}

void gc_heap::handle_failure_for_no_gc()
{
    gc_heap::restore_data_for_no_gc();
    // sets current_no_gc_region_info.started to FALSE here.
    memset (&current_no_gc_region_info, 0, sizeof (current_no_gc_region_info));
}

start_no_gc_region_status gc_heap::get_start_no_gc_region_status()
{
    return current_no_gc_region_info.start_status;
}

void gc_heap::record_gcs_during_no_gc()
{
    if (current_no_gc_region_info.started)
    {
        current_no_gc_region_info.num_gcs++;
        if (is_induced (settings.reason))
            current_no_gc_region_info.num_gcs_induced++;
    }
}

BOOL gc_heap::find_loh_free_for_no_gc()
{
    allocator* loh_allocator = generation_allocator (generation_of (loh_generation));
    size_t size = loh_allocation_no_gc;
    for (unsigned int a_l_idx = loh_allocator->first_suitable_bucket(size); a_l_idx < loh_allocator->number_of_buckets(); a_l_idx++)
    {
        uint8_t* free_list = loh_allocator->alloc_list_head_of (a_l_idx);
        while (free_list)
        {
            size_t free_list_size = unused_array_size(free_list);

            if (free_list_size > size)
            {
                dprintf (3, ("free item %zx(%zd) for no gc", (size_t)free_list, free_list_size));
                return TRUE;
            }

            free_list = free_list_slot (free_list);
        }
    }

    return FALSE;
}

BOOL gc_heap::find_loh_space_for_no_gc()
{
    saved_loh_segment_no_gc = 0;

    if (find_loh_free_for_no_gc())
        return TRUE;

    heap_segment* seg = generation_allocation_segment (generation_of (loh_generation));

    while (seg)
    {
        size_t remaining = heap_segment_reserved (seg) - heap_segment_allocated (seg);
        if (remaining >= loh_allocation_no_gc)
        {
            saved_loh_segment_no_gc = seg;
            break;
        }
        seg = heap_segment_next (seg);
    }

    if (!saved_loh_segment_no_gc && current_no_gc_region_info.minimal_gc_p)
    {
        // If no full GC is allowed, we try to get a new seg right away.
        saved_loh_segment_no_gc = get_segment_for_uoh (loh_generation, get_uoh_seg_size (loh_allocation_no_gc)
#ifdef MULTIPLE_HEAPS
                                                      , this
#endif //MULTIPLE_HEAPS
                                                      );
    }

    return (saved_loh_segment_no_gc != 0);
}

BOOL gc_heap::loh_allocated_for_no_gc()
{
    if (!saved_loh_segment_no_gc)
        return FALSE;

    heap_segment* seg = generation_allocation_segment (generation_of (loh_generation));
    do
    {
        if (seg == saved_loh_segment_no_gc)
        {
            return FALSE;
        }
        seg = heap_segment_next (seg);
    } while (seg);

    return TRUE;
}

BOOL gc_heap::commit_loh_for_no_gc (heap_segment* seg)
{
    uint8_t* end_committed = heap_segment_allocated (seg) + loh_allocation_no_gc;
    assert (end_committed <= heap_segment_reserved (seg));
    return (grow_heap_segment (seg, end_committed));
}

void gc_heap::thread_no_gc_loh_segments()
{
#ifdef MULTIPLE_HEAPS
    for (int i = 0; i < n_heaps; i++)
    {
        gc_heap* hp = g_heaps[i];
        if (hp->loh_allocated_for_no_gc())
        {
            hp->thread_uoh_segment (loh_generation, hp->saved_loh_segment_no_gc);
            hp->saved_loh_segment_no_gc = 0;
        }
    }
#else //MULTIPLE_HEAPS
    if (loh_allocated_for_no_gc())
    {
        thread_uoh_segment (loh_generation, saved_loh_segment_no_gc);
        saved_loh_segment_no_gc = 0;
    }
#endif //MULTIPLE_HEAPS
}

void gc_heap::set_loh_allocations_for_no_gc()
{
    if (current_no_gc_region_info.loh_allocation_size != 0)
    {
        dynamic_data* dd = dynamic_data_of (loh_generation);
        dd_new_allocation (dd) = loh_allocation_no_gc;
        dd_gc_new_allocation (dd) = dd_new_allocation (dd);
    }
}

void gc_heap::set_soh_allocations_for_no_gc()
{
    if (current_no_gc_region_info.soh_allocation_size != 0)
    {
        dynamic_data* dd = dynamic_data_of (0);
        dd_new_allocation (dd) = soh_allocation_no_gc;
        dd_gc_new_allocation (dd) = dd_new_allocation (dd);
#ifdef MULTIPLE_HEAPS
        alloc_context_count = 0;
#endif //MULTIPLE_HEAPS
    }
}

void gc_heap::set_allocations_for_no_gc()
{
#ifdef MULTIPLE_HEAPS
    for (int i = 0; i < n_heaps; i++)
    {
        gc_heap* hp = g_heaps[i];
        hp->set_loh_allocations_for_no_gc();
        hp->set_soh_allocations_for_no_gc();
    }
#else //MULTIPLE_HEAPS
    set_loh_allocations_for_no_gc();
    set_soh_allocations_for_no_gc();
#endif //MULTIPLE_HEAPS
}

BOOL gc_heap::should_proceed_for_no_gc()
{
    BOOL gc_requested = FALSE;
    BOOL loh_full_gc_requested = FALSE;
    BOOL soh_full_gc_requested = FALSE;
    BOOL no_gc_requested = FALSE;
    BOOL get_new_loh_segments = FALSE;

#ifdef MULTIPLE_HEAPS
    // need to turn off this flag here because of the call to grow_heap_segment below
    gradual_decommit_in_progress_p = FALSE;
#endif //MULTIPLE_HEAPS

    gc_heap* hp = nullptr;
    if (current_no_gc_region_info.soh_allocation_size)
    {
#ifdef USE_REGIONS
#ifdef MULTIPLE_HEAPS
        for (int i = 0; i < n_heaps; i++)
        {
            hp = g_heaps[i];
#else
        {
            hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS
            if (!hp->extend_soh_for_no_gc())
            {
                soh_full_gc_requested = TRUE;
#ifdef MULTIPLE_HEAPS
                break;
#endif //MULTIPLE_HEAPS
            }
        }
#else //USE_REGIONS
#ifdef MULTIPLE_HEAPS
        for (int i = 0; i < n_heaps; i++)
        {
            hp = g_heaps[i];
#else //MULTIPLE_HEAPS
        {
            hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS
            size_t reserved_space = heap_segment_reserved (hp->ephemeral_heap_segment) - hp->alloc_allocated;
            if (reserved_space < hp->soh_allocation_no_gc)
            {
                gc_requested = TRUE;
#ifdef MULTIPLE_HEAPS
                break;
#endif //MULTIPLE_HEAPS
            }
        }
        if (!gc_requested)
        {
#ifdef MULTIPLE_HEAPS
            for (int i = 0; i < n_heaps; i++)
            {
                hp = g_heaps[i];
#else //MULTIPLE_HEAPS
            {
                hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS
                if (!(hp->grow_heap_segment (hp->ephemeral_heap_segment, (hp->alloc_allocated + hp->soh_allocation_no_gc))))
                {
                    soh_full_gc_requested = TRUE;
#ifdef MULTIPLE_HEAPS
                    break;
#endif //MULTIPLE_HEAPS
                }
            }
        }
#endif //USE_REGIONS
    }

    if (!current_no_gc_region_info.minimal_gc_p && gc_requested)
    {
        soh_full_gc_requested = TRUE;
    }

    no_gc_requested = !(soh_full_gc_requested || gc_requested);

    if (soh_full_gc_requested && current_no_gc_region_info.minimal_gc_p)
    {
        current_no_gc_region_info.start_status = start_no_gc_no_memory;
        goto done;
    }

    if (!soh_full_gc_requested && current_no_gc_region_info.loh_allocation_size)
    {
        // Check to see if we have enough reserved space.
#ifdef MULTIPLE_HEAPS
        for (int i = 0; i < n_heaps; i++)
        {
            gc_heap* hp = g_heaps[i];
            if (!hp->find_loh_space_for_no_gc())
            {
                loh_full_gc_requested = TRUE;
                break;
            }
        }
#else //MULTIPLE_HEAPS
        if (!find_loh_space_for_no_gc())
            loh_full_gc_requested = TRUE;
#endif //MULTIPLE_HEAPS

        // Check to see if we have committed space.
        if (!loh_full_gc_requested)
        {
#ifdef MULTIPLE_HEAPS
            for (int i = 0; i < n_heaps; i++)
            {
                gc_heap* hp = g_heaps[i];
                if (hp->saved_loh_segment_no_gc &&!hp->commit_loh_for_no_gc (hp->saved_loh_segment_no_gc))
                {
                    loh_full_gc_requested = TRUE;
                    break;
                }
            }
#else //MULTIPLE_HEAPS
            if (saved_loh_segment_no_gc && !commit_loh_for_no_gc (saved_loh_segment_no_gc))
                loh_full_gc_requested = TRUE;
#endif //MULTIPLE_HEAPS
        }
    }

    if (loh_full_gc_requested || soh_full_gc_requested)
    {
        if (current_no_gc_region_info.minimal_gc_p)
            current_no_gc_region_info.start_status = start_no_gc_no_memory;
    }

    no_gc_requested = !(loh_full_gc_requested || soh_full_gc_requested || gc_requested);

    if (current_no_gc_region_info.start_status == start_no_gc_success)
    {
        if (no_gc_requested)
            set_allocations_for_no_gc();
    }

done:

    if ((current_no_gc_region_info.start_status == start_no_gc_success) && !no_gc_requested)
        return TRUE;
    else
    {
        // We are done with starting the no_gc_region.
        current_no_gc_region_info.started = TRUE;
        return FALSE;
    }
}

end_no_gc_region_status gc_heap::end_no_gc_region()
{
    dprintf (1, ("end no gc called"));

    end_no_gc_region_status status = end_no_gc_success;

    if (!(current_no_gc_region_info.started))
        status = end_no_gc_not_in_progress;
    if (current_no_gc_region_info.num_gcs_induced)
        status = end_no_gc_induced;
    else if (current_no_gc_region_info.num_gcs)
        status = end_no_gc_alloc_exceeded;

    if (settings.pause_mode == pause_no_gc)
    {
        restore_data_for_no_gc();
        if (current_no_gc_region_info.callback != nullptr)
        {
            dprintf (1, ("[no_gc_callback] detaching callback on exit"));
            schedule_no_gc_callback (true);
        }
    }

    // sets current_no_gc_region_info.started to FALSE here.
    memset (&current_no_gc_region_info, 0, sizeof (current_no_gc_region_info));

    return status;
}

void gc_heap::schedule_no_gc_callback (bool abandoned)
{
    // We still want to schedule the work even when the no-gc callback is abandoned
    // so that we can free the memory associated with it.
    current_no_gc_region_info.callback->abandoned = abandoned;

    if (!current_no_gc_region_info.callback->scheduled)
    {
        current_no_gc_region_info.callback->scheduled = true;
        schedule_finalizer_work(current_no_gc_region_info.callback);
    }
}

void gc_heap::schedule_finalizer_work (FinalizerWorkItem* callback)
{
    FinalizerWorkItem* prev;
    do
    {
        prev = finalizer_work;
        callback->next = prev;
    }
    while (Interlocked::CompareExchangePointer (&finalizer_work, callback, prev) != prev);

    if (prev == nullptr)
    {
        GCToEEInterface::EnableFinalization(true);
    }
}

//update counters
void gc_heap::update_collection_counts ()
{
    dynamic_data* dd0 = dynamic_data_of (0);
    dd_gc_clock (dd0) += 1;

    uint64_t now = GetHighPrecisionTimeStamp();

    for (int i = 0; i <= settings.condemned_generation;i++)
    {
        dynamic_data* dd = dynamic_data_of (i);
        dd_collection_count (dd)++;
        //this is needed by the linear allocation model
        if (i == max_generation)
        {
            dd_collection_count (dynamic_data_of (loh_generation))++;
            dd_collection_count(dynamic_data_of(poh_generation))++;
        }

        dd_gc_clock (dd) = dd_gc_clock (dd0);
        dd_previous_time_clock (dd) = dd_time_clock (dd);
        dd_time_clock (dd) = now;
    }
}

#ifdef USE_REGIONS
bool gc_heap::extend_soh_for_no_gc()
{
    size_t required = soh_allocation_no_gc;
    heap_segment* region = ephemeral_heap_segment;

    while (true)
    {
        uint8_t* allocated = (region == ephemeral_heap_segment) ?
                             alloc_allocated :
                             heap_segment_allocated (region);
        size_t available = heap_segment_reserved (region) - allocated;
        size_t commit = min (available, required);

        if (grow_heap_segment (region, allocated + commit))
        {
            required -= commit;
            if (required == 0)
            {
                break;
            }

            region = heap_segment_next (region);
            if (region == nullptr)
            {
                region = get_new_region (0);
                if (region == nullptr)
                {
                    break;
                }
                else
                {
                    GCToEEInterface::DiagAddNewRegion(
                            0,
                            heap_segment_mem (region),
                            heap_segment_allocated (region),
                            heap_segment_reserved (region)
                        );
                }
            }
        }
        else
        {
            break;
        }
    }

    return (required == 0);
}
#else
BOOL gc_heap::expand_soh_with_minimal_gc()
{
    if ((size_t)(heap_segment_reserved (ephemeral_heap_segment) - heap_segment_allocated (ephemeral_heap_segment)) >= soh_allocation_no_gc)
        return TRUE;

    heap_segment* new_seg = soh_get_segment_to_expand();
    if (new_seg)
    {
        if (g_gc_card_table != card_table)
            copy_brick_card_table();

        settings.promotion = TRUE;
        settings.demotion = FALSE;
        ephemeral_promotion = TRUE;
        int condemned_gen_number = max_generation - 1;

        int align_const = get_alignment_constant (TRUE);

        for (int i = 0; i <= condemned_gen_number; i++)
        {
            generation* gen = generation_of (i);
            saved_ephemeral_plan_start[i] = generation_allocation_start (gen);
            saved_ephemeral_plan_start_size[i] = Align (size (generation_allocation_start (gen)), align_const);
        }

        // We do need to clear the bricks here as we are converting a bunch of ephemeral objects to gen2
        // and need to make sure that there are no left over bricks from the previous GCs for the space
        // we just used for gen0 allocation. We will need to go through the bricks for these objects for
        // ephemeral GCs later.
        for (size_t b = brick_of (generation_allocation_start (generation_of (0)));
             b < brick_of (align_on_brick (heap_segment_allocated (ephemeral_heap_segment)));
             b++)
        {
            set_brick (b, -1);
        }

        size_t ephemeral_size = (heap_segment_allocated (ephemeral_heap_segment) -
                                generation_allocation_start (generation_of (max_generation - 1)));
        heap_segment_next (ephemeral_heap_segment) = new_seg;
        ephemeral_heap_segment = new_seg;
        uint8_t*  start = heap_segment_mem (ephemeral_heap_segment);

        for (int i = condemned_gen_number; i >= 0; i--)
        {
            size_t gen_start_size = Align (min_obj_size);
            make_generation (i, ephemeral_heap_segment, start);

            generation* gen = generation_of (i);
            generation_plan_allocation_start (gen) = start;
            generation_plan_allocation_start_size (gen) = gen_start_size;
            start += gen_start_size;
        }
        heap_segment_used (ephemeral_heap_segment) = start - plug_skew;
        heap_segment_plan_allocated (ephemeral_heap_segment) = start;

        fix_generation_bounds (condemned_gen_number, generation_of (0));

        dd_gc_new_allocation (dynamic_data_of (max_generation)) -= ephemeral_size;
        dd_new_allocation (dynamic_data_of (max_generation)) = dd_gc_new_allocation (dynamic_data_of (max_generation));

        adjust_ephemeral_limits();
        return TRUE;
    }
    else
    {
        return FALSE;
    }
}
#endif //USE_REGIONS

// Only to be done on the thread that calls restart in a join for server GC
// and reset the oom status per heap.
void gc_heap::check_and_set_no_gc_oom()
{
#ifdef MULTIPLE_HEAPS
    for (int i = 0; i < n_heaps; i++)
    {
        gc_heap* hp = g_heaps[i];
        if (hp->no_gc_oom_p)
        {
            current_no_gc_region_info.start_status = start_no_gc_no_memory;
            hp->no_gc_oom_p = false;
        }
    }
#else
    if (no_gc_oom_p)
    {
        current_no_gc_region_info.start_status = start_no_gc_no_memory;
        no_gc_oom_p = false;
    }
#endif //MULTIPLE_HEAPS
}

void gc_heap::allocate_for_no_gc_after_gc()
{
    if (current_no_gc_region_info.minimal_gc_p)
        repair_allocation_contexts (TRUE);

    no_gc_oom_p = false;

    if (current_no_gc_region_info.start_status != start_no_gc_no_memory)
    {
        if (current_no_gc_region_info.soh_allocation_size != 0)
        {
#ifdef USE_REGIONS
            no_gc_oom_p = !extend_soh_for_no_gc();
#else
            if (((size_t)(heap_segment_reserved (ephemeral_heap_segment) - heap_segment_allocated (ephemeral_heap_segment)) < soh_allocation_no_gc) ||
                (!grow_heap_segment (ephemeral_heap_segment, (heap_segment_allocated (ephemeral_heap_segment) + soh_allocation_no_gc))))
            {
                no_gc_oom_p = true;
            }
#endif //USE_REGIONS

#ifdef MULTIPLE_HEAPS
            gc_t_join.join(this, gc_join_after_commit_soh_no_gc);
            if (gc_t_join.joined())
#endif //MULTIPLE_HEAPS
            {
                check_and_set_no_gc_oom();

#ifdef MULTIPLE_HEAPS
                gc_t_join.restart();
#endif //MULTIPLE_HEAPS
            }
        }

        if ((current_no_gc_region_info.start_status == start_no_gc_success) &&
            !(current_no_gc_region_info.minimal_gc_p) &&
            (current_no_gc_region_info.loh_allocation_size != 0))
        {
            gc_policy = policy_compact;
            saved_loh_segment_no_gc = 0;

            if (!find_loh_free_for_no_gc())
            {
                heap_segment* seg = generation_allocation_segment (generation_of (loh_generation));
                BOOL found_seg_p = FALSE;
                while (seg)
                {
                    if ((size_t)(heap_segment_reserved (seg) - heap_segment_allocated (seg)) >= loh_allocation_no_gc)
                    {
                        found_seg_p = TRUE;
                        if (!commit_loh_for_no_gc (seg))
                        {
                            no_gc_oom_p = true;
                            break;
                        }
                    }
                    seg = heap_segment_next (seg);
                }

                if (!found_seg_p)
                    gc_policy = policy_expand;
            }

#ifdef MULTIPLE_HEAPS
            gc_t_join.join(this, gc_join_expand_loh_no_gc);
            if (gc_t_join.joined())
            {
                check_and_set_no_gc_oom();

                if (current_no_gc_region_info.start_status == start_no_gc_success)
                {
                    for (int i = 0; i < n_heaps; i++)
                    {
                        gc_heap* hp = g_heaps[i];
                        if (hp->gc_policy == policy_expand)
                        {
                            hp->saved_loh_segment_no_gc = get_segment_for_uoh (loh_generation, get_uoh_seg_size (loh_allocation_no_gc), hp);
                            if (!(hp->saved_loh_segment_no_gc))
                            {
                                current_no_gc_region_info.start_status = start_no_gc_no_memory;
                                break;
                            }
                        }
                    }
                }

                gc_t_join.restart();
            }
#else //MULTIPLE_HEAPS
            check_and_set_no_gc_oom();

            if ((current_no_gc_region_info.start_status == start_no_gc_success) && (gc_policy == policy_expand))
            {
                saved_loh_segment_no_gc = get_segment_for_uoh (loh_generation, get_uoh_seg_size (loh_allocation_no_gc));
                if (!saved_loh_segment_no_gc)
                    current_no_gc_region_info.start_status = start_no_gc_no_memory;
            }
#endif //MULTIPLE_HEAPS

            if ((current_no_gc_region_info.start_status == start_no_gc_success) && saved_loh_segment_no_gc)
            {
                if (!commit_loh_for_no_gc (saved_loh_segment_no_gc))
                {
                    no_gc_oom_p = true;
                }
            }
        }
    }

#ifdef MULTIPLE_HEAPS
    gc_t_join.join(this, gc_join_final_no_gc);
    if (gc_t_join.joined())
#endif //MULTIPLE_HEAPS
    {
        check_and_set_no_gc_oom();

        if (current_no_gc_region_info.start_status == start_no_gc_success)
        {
            set_allocations_for_no_gc();
            current_no_gc_region_info.started = TRUE;
        }

#ifdef MULTIPLE_HEAPS
        gc_t_join.restart();
#endif //MULTIPLE_HEAPS
    }
}

void gc_heap::init_records()
{
    // An option is to move this to be after we figure out which gen to condemn so we don't
    // need to clear some generations' data 'cause we know they don't change, but that also means
    // we can't simply call memset here.
    memset (&gc_data_per_heap, 0, sizeof (gc_data_per_heap));
    gc_data_per_heap.heap_index = heap_number;
    if (heap_number == 0)
        memset (&gc_data_global, 0, sizeof (gc_data_global));

#ifdef GC_CONFIG_DRIVEN
    memset (interesting_data_per_gc, 0, sizeof (interesting_data_per_gc));
#endif //GC_CONFIG_DRIVEN
    memset (&fgm_result, 0, sizeof (fgm_result));

    for (int i = 0; i < total_generation_count; i++)
    {
        gc_data_per_heap.gen_data[i].size_before = generation_size (i);
        generation* gen = generation_of (i);
        gc_data_per_heap.gen_data[i].free_list_space_before = generation_free_list_space (gen);
        gc_data_per_heap.gen_data[i].free_obj_space_before = generation_free_obj_space (gen);
    }

#ifdef USE_REGIONS
    end_gen0_region_space = uninitialized_end_gen0_region_space;
    end_gen0_region_committed_space = 0;
    gen0_pinned_free_space = 0;
    gen0_large_chunk_found = false;
    num_regions_freed_in_sweep = 0;
#endif //USE_REGIONS

    sufficient_gen0_space_p = FALSE;

#ifdef MULTIPLE_HEAPS
    gen0_allocated_after_gc_p = false;
#endif //MULTIPLE_HEAPS

#if defined (_DEBUG) && defined (VERIFY_HEAP)
    verify_pinned_queue_p = FALSE;
#endif // _DEBUG && VERIFY_HEAP
}

void gc_heap::pm_full_gc_init_or_clear()
{
    // This means the next GC will be a full blocking GC and we need to init.
    if (settings.condemned_generation == (max_generation - 1))
    {
        if (pm_trigger_full_gc)
        {
#ifdef MULTIPLE_HEAPS
            do_post_gc();
#endif //MULTIPLE_HEAPS
            dprintf (GTC_LOG, ("init for PM triggered full GC"));
            uint32_t saved_entry_memory_load = settings.entry_memory_load;
            settings.init_mechanisms();
            settings.reason = reason_pm_full_gc;
            settings.condemned_generation = max_generation;
            settings.entry_memory_load = saved_entry_memory_load;
            // Can't assert this since we only check at the end of gen2 GCs,
            // during gen1 the memory load could have already dropped.
            // Although arguably we should just turn off PM then...
            //assert (settings.entry_memory_load >= high_memory_load_th);
            assert (settings.entry_memory_load > 0);
            settings.gc_index += 1;
            do_pre_gc();
        }
    }
    // This means we are in the progress of a full blocking GC triggered by
    // this PM mode.
    else if (settings.reason == reason_pm_full_gc)
    {
        assert (settings.condemned_generation == max_generation);
        assert (pm_trigger_full_gc);
        pm_trigger_full_gc = false;

        dprintf (GTC_LOG, ("PM triggered full GC done"));
    }
}

void gc_heap::garbage_collect_pm_full_gc()
{
    assert (settings.condemned_generation == max_generation);
    assert (settings.reason == reason_pm_full_gc);
    assert (!settings.concurrent);
    gc1();
}

void gc_heap::garbage_collect (int n)
{
    gc_pause_mode saved_settings_pause_mode = settings.pause_mode;

    //reset the number of alloc contexts
    alloc_contexts_used = 0;

    fix_allocation_contexts (TRUE);
#ifdef MULTIPLE_HEAPS
#ifdef JOIN_STATS
    gc_t_join.start_ts(this);
#endif //JOIN_STATS
    check_gen0_bricks();
    clear_gen0_bricks();
#endif //MULTIPLE_HEAPS

    if ((settings.pause_mode == pause_no_gc) && current_no_gc_region_info.minimal_gc_p)
    {
#ifdef MULTIPLE_HEAPS
        gc_t_join.join(this, gc_join_minimal_gc);
        if (gc_t_join.joined())
#endif //MULTIPLE_HEAPS
        {
#ifndef USE_REGIONS
#ifdef MULTIPLE_HEAPS
            // this is serialized because we need to get a segment
            for (int i = 0; i < n_heaps; i++)
            {
                if (!(g_heaps[i]->expand_soh_with_minimal_gc()))
                    current_no_gc_region_info.start_status = start_no_gc_no_memory;
            }
#else
            if (!expand_soh_with_minimal_gc())
                current_no_gc_region_info.start_status = start_no_gc_no_memory;
#endif //MULTIPLE_HEAPS
#endif //!USE_REGIONS

            update_collection_counts_for_no_gc();

#ifdef MULTIPLE_HEAPS
            gc_start_event.Reset();
            gc_t_join.restart();
#endif //MULTIPLE_HEAPS
        }

        goto done;
    }

    init_records();

    settings.reason = gc_trigger_reason;
    num_pinned_objects = 0;

#ifdef STRESS_HEAP
    if (settings.reason == reason_gcstress)
    {
        settings.reason = reason_induced;
        settings.stress_induced = TRUE;
    }
#endif // STRESS_HEAP

#ifdef MULTIPLE_HEAPS
#ifdef STRESS_DYNAMIC_HEAP_COUNT
    Interlocked::Increment (&heaps_in_this_gc);
#endif //STRESS_DYNAMIC_HEAP_COUNT
    //align all heaps on the max generation to condemn
    dprintf (3, ("Joining for max generation to condemn"));
    condemned_generation_num = generation_to_condemn (n,
                                                      &blocking_collection,
                                                      &elevation_requested,
                                                      FALSE);
    gc_t_join.join(this, gc_join_generation_determined);
    if (gc_t_join.joined())
#endif //MULTIPLE_HEAPS
    {
#ifdef FEATURE_BASICFREEZE
        seg_table->delete_old_slots();
#endif //FEATURE_BASICFREEZE

#ifndef USE_REGIONS
        copy_brick_card_table_on_growth ();
#endif //!USE_REGIONS

#ifdef MULTIPLE_HEAPS
#ifdef STRESS_DYNAMIC_HEAP_COUNT
        dprintf (9999, ("%d heaps, join sees %d, actually joined %d, %d idle threads (%d)",
            n_heaps, gc_t_join.get_num_threads (), heaps_in_this_gc,
            VolatileLoadWithoutBarrier(&dynamic_heap_count_data.idle_thread_count), (n_max_heaps - n_heaps)));
        if (heaps_in_this_gc != n_heaps)
        {
            dprintf (9999, ("should have %d heaps but actually have %d!!", n_heaps, heaps_in_this_gc));
            GCToOSInterface::DebugBreak ();
        }

        heaps_in_this_gc = 0;
#endif //STRESS_DYNAMIC_HEAP_COUNT

        for (int i = 0; i < n_heaps; i++)
        {
            gc_heap* hp = g_heaps[i];
            hp->delay_free_segments();
        }
#else //MULTIPLE_HEAPS
        delay_free_segments();
#endif //MULTIPLE_HEAPS

        BOOL should_evaluate_elevation = TRUE;
        BOOL should_do_blocking_collection = FALSE;

#ifdef MULTIPLE_HEAPS
        int gen_max = condemned_generation_num;
        for (int i = 0; i < n_heaps; i++)
        {
            if (gen_max < g_heaps[i]->condemned_generation_num)
                gen_max = g_heaps[i]->condemned_generation_num;
            if (should_evaluate_elevation && !(g_heaps[i]->elevation_requested))
                should_evaluate_elevation = FALSE;
            if ((!should_do_blocking_collection) && (g_heaps[i]->blocking_collection))
                should_do_blocking_collection = TRUE;
        }

        settings.condemned_generation = gen_max;
#else //MULTIPLE_HEAPS
        settings.condemned_generation = generation_to_condemn (n,
                                                            &blocking_collection,
                                                            &elevation_requested,
                                                            FALSE);
        should_evaluate_elevation = elevation_requested;
        should_do_blocking_collection = blocking_collection;
#endif //MULTIPLE_HEAPS

        settings.condemned_generation = joined_generation_to_condemn (
                                            should_evaluate_elevation,
                                            n,
                                            settings.condemned_generation,
                                            &should_do_blocking_collection
                                            STRESS_HEAP_ARG(n)
                                            );

        STRESS_LOG1(LF_GCROOTS|LF_GC|LF_GCALLOC, LL_INFO10,
                "condemned generation num: %d\n", settings.condemned_generation);

        record_gcs_during_no_gc();

        if (settings.condemned_generation > 1)
            settings.promotion = TRUE;

#ifdef HEAP_ANALYZE
        // At this point we've decided what generation is condemned
        // See if we've been requested to analyze survivors after the mark phase
        if (GCToEEInterface::AnalyzeSurvivorsRequested(settings.condemned_generation))
        {
            heap_analyze_enabled = TRUE;
        }
#endif // HEAP_ANALYZE

        GCToEEInterface::DiagGCStart(settings.condemned_generation, is_induced (settings.reason));

#ifdef BACKGROUND_GC
        if ((settings.condemned_generation == max_generation) &&
            (should_do_blocking_collection == FALSE) &&
            gc_can_use_concurrent &&
            !temp_disable_concurrent_p &&
            ((settings.pause_mode == pause_interactive) || (settings.pause_mode == pause_sustained_low_latency)))
        {
            keep_bgc_threads_p = TRUE;
            c_write (settings.concurrent, TRUE);
            memset (&bgc_data_global, 0, sizeof(bgc_data_global));
            memcpy (&bgc_data_global, &gc_data_global, sizeof(gc_data_global));
        }
#endif //BACKGROUND_GC

        settings.gc_index = (uint32_t)dd_collection_count (dynamic_data_of (0)) + 1;

#ifdef MULTIPLE_HEAPS
        hb_log_balance_activities();
        hb_log_new_allocation();
#endif //MULTIPLE_HEAPS

        // Call the EE for start of GC work
        GCToEEInterface::GcStartWork (settings.condemned_generation,
                                max_generation);

        // TODO: we could fire an ETW event to say this GC as a concurrent GC but later on due to not being able to
        // create threads or whatever, this could be a non concurrent GC. Maybe for concurrent GC we should fire
        // it in do_background_gc and if it failed to be a CGC we fire it in gc1... in other words, this should be
        // fired in gc1.
        do_pre_gc();

#ifdef MULTIPLE_HEAPS
        dprintf (9999, ("in GC, resetting gc_start"));
        gc_start_event.Reset();
        dprintf(3, ("Starting all gc threads for gc"));
        gc_t_join.restart();
#endif //MULTIPLE_HEAPS
    }

    descr_generations ("BEGIN");
#if defined(TRACE_GC) && defined(USE_REGIONS)
    if (heap_number == 0)
    {
#ifdef MULTIPLE_HEAPS
        for (int i = 0; i < n_heaps; i++)
        {
            gc_heap *hp = g_heaps[i];
#else //MULTIPLE_HEAPS
        {
            gc_heap* hp = pGenGCHeap;
            const int i = 0;
#endif //MULTIPLE_HEAPS
            if (settings.condemned_generation == max_generation)
            {
                // print all kinds of free regions
                region_free_list::print(hp->free_regions, i, "BEGIN");
            }
            else
            {
                // print only basic free regions
                hp->free_regions[basic_free_region].print (i, "BEGIN");
            }
        }
    }
#endif // TRACE_GC && USE_REGIONS

#ifdef VERIFY_HEAP
    if ((GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_GC) &&
       !(GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_POST_GC_ONLY))
    {
        verify_heap (TRUE);
    }
    if (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_BARRIERCHECK)
        checkGCWriteBarrier();
#endif // VERIFY_HEAP

#ifdef BACKGROUND_GC
    if (settings.concurrent)
    {
        // We need to save the settings because we'll need to restore it after each FGC.
        assert (settings.condemned_generation == max_generation);
        settings.compaction = FALSE;
        saved_bgc_settings = settings;

#ifdef MULTIPLE_HEAPS
        if (heap_number == 0)
        {
#ifdef DYNAMIC_HEAP_COUNT
            size_t current_gc_index = VolatileLoadWithoutBarrier (&settings.gc_index);
            if (!bgc_init_gc_index)
            {
                assert (!bgc_init_n_heaps);
                bgc_init_gc_index = current_gc_index;
                bgc_init_n_heaps = (short)n_heaps;
            }
            size_t saved_bgc_th_count_created = bgc_th_count_created;
            size_t saved_bgc_th_count_created_th_existed = bgc_th_count_created_th_existed;
            size_t saved_bgc_th_count_creation_failed = bgc_th_count_creation_failed;
#endif //DYNAMIC_HEAP_COUNT

            // This is the count of threads that GCToEEInterface::CreateThread reported successful for.
            int total_bgc_threads_running = 0;
            for (int i = 0; i < n_heaps; i++)
            {
                gc_heap* hp = g_heaps[i];
                if (prepare_bgc_thread (hp))
                {
                    assert (hp->bgc_thread_running);
                    if (!hp->bgc_thread_running)
                    {
                        dprintf (6666, ("h%d prepare succeeded but running is still false!", i));
                        GCToOSInterface::DebugBreak();
                    }
                    total_bgc_threads_running++;
                }
                else
                {
                    break;
                }
            }

#ifdef DYNAMIC_HEAP_COUNT
            // Even if we don't do a BGC, we need to record how many threads were successfully created because those will
            // be running.
            total_bgc_threads = max (total_bgc_threads, total_bgc_threads_running);

            if (total_bgc_threads_running != n_heaps)
            {
                dprintf (6666, ("wanted to have %d BGC threads but only have %d", n_heaps, total_bgc_threads_running));
            }

            add_to_bgc_th_creation_history (current_gc_index,
                (bgc_th_count_created - saved_bgc_th_count_created),
                (bgc_th_count_created_th_existed - saved_bgc_th_count_created_th_existed),
                (bgc_th_count_creation_failed - saved_bgc_th_count_creation_failed));
#endif //DYNAMIC_HEAP_COUNT

            dprintf (2, ("setting bgc_threads_sync_event"));
            bgc_threads_sync_event.Set();
        }
        else
        {
            bgc_threads_sync_event.Wait(INFINITE, FALSE);
            dprintf (2, ("bgc_threads_sync_event is signalled"));
        }
#else
        prepare_bgc_thread(0);
#endif //MULTIPLE_HEAPS

#ifdef MULTIPLE_HEAPS
        gc_t_join.join(this, gc_join_start_bgc);
        if (gc_t_join.joined())
#endif //MULTIPLE_HEAPS
        {
            do_concurrent_p = TRUE;
            do_ephemeral_gc_p = FALSE;
#ifdef MULTIPLE_HEAPS
            dprintf(2, ("Joined to perform a background GC"));

            for (int i = 0; i < n_heaps; i++)
            {
                gc_heap* hp = g_heaps[i];

                if (!(hp->bgc_thread_running))
                {
                    assert (!(hp->bgc_thread));
                }

                // In theory we could be in a situation where bgc_thread_running is false but bgc_thread is non NULL. We don't
                // support this scenario so don't do a BGC.
                if (!(hp->bgc_thread_running && hp->bgc_thread && hp->commit_mark_array_bgc_init()))
                {
                    do_concurrent_p = FALSE;
                    break;
                }
                else
                {
                    hp->background_saved_lowest_address = hp->lowest_address;
                    hp->background_saved_highest_address = hp->highest_address;
                }
            }
#else
            do_concurrent_p = (bgc_thread_running && commit_mark_array_bgc_init());
            if (do_concurrent_p)
            {
                background_saved_lowest_address = lowest_address;
                background_saved_highest_address = highest_address;
            }
#endif //MULTIPLE_HEAPS

#ifdef DYNAMIC_HEAP_COUNT
            dprintf (6666, ("last BGC saw %d heaps and %d total threads, currently %d heaps and %d total threads, %s BGC",
                last_bgc_n_heaps, last_total_bgc_threads, n_heaps, total_bgc_threads, (do_concurrent_p ? "doing" : "not doing")));
#endif //DYNAMIC_HEAP_COUNT

            if (do_concurrent_p)
            {
#ifdef DYNAMIC_HEAP_COUNT
                int diff = n_heaps - last_bgc_n_heaps;
                if (diff > 0)
                {
                    int saved_idle_bgc_thread_count = dynamic_heap_count_data.idle_bgc_thread_count;
                    int max_idle_event_count = min (n_heaps, last_total_bgc_threads);
                    int idle_events_to_set = max_idle_event_count - last_bgc_n_heaps;
                    if (idle_events_to_set > 0)
                    {
                        Interlocked::ExchangeAdd (&dynamic_heap_count_data.idle_bgc_thread_count, -idle_events_to_set);
                        dprintf (6666, ("%d BGC threads exist, setting %d idle events for h%d-h%d, total idle %d -> %d",
                            total_bgc_threads, idle_events_to_set, last_bgc_n_heaps, (last_bgc_n_heaps + idle_events_to_set - 1),
                            saved_idle_bgc_thread_count, VolatileLoadWithoutBarrier (&dynamic_heap_count_data.idle_bgc_thread_count)));
                        for (int heap_idx = last_bgc_n_heaps; heap_idx < max_idle_event_count; heap_idx++)
                        {
                            g_heaps[heap_idx]->bgc_idle_thread_event.Set();
                        }
                    }
                }

                last_bgc_n_heaps = n_heaps;
                last_total_bgc_threads = total_bgc_threads;
#endif //DYNAMIC_HEAP_COUNT

#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
                SoftwareWriteWatch::EnableForGCHeap();
#endif //FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP

#ifdef MULTIPLE_HEAPS
                for (int i = 0; i < n_heaps; i++)
                    g_heaps[i]->current_bgc_state = bgc_initialized;
#else
                current_bgc_state = bgc_initialized;
#endif //MULTIPLE_HEAPS

                int gen = check_for_ephemeral_alloc();
                // always do a gen1 GC before we start BGC.
                dont_restart_ee_p = TRUE;
                if (gen == -1)
                {
                    // If we decide to not do a GC before the BGC we need to
                    // restore the gen0 alloc context.
#ifdef MULTIPLE_HEAPS
                    for (int i = 0; i < n_heaps; i++)
                    {
                        generation_allocation_pointer (g_heaps[i]->generation_of (0)) =  0;
                        generation_allocation_limit (g_heaps[i]->generation_of (0)) = 0;
                    }
#else
                    generation_allocation_pointer (youngest_generation) =  0;
                    generation_allocation_limit (youngest_generation) = 0;
#endif //MULTIPLE_HEAPS
                }
                else
                {
                    do_ephemeral_gc_p = TRUE;

                    settings.init_mechanisms();
                    settings.condemned_generation = gen;

#ifdef DYNAMIC_HEAP_COUNT
                    if (trigger_bgc_for_rethreading_p)
                    {
                        settings.condemned_generation = 0;
                    }
#endif //DYNAMIC_HEAP_COUNT

                    settings.gc_index = (size_t)dd_collection_count (dynamic_data_of (0)) + 2;
                    do_pre_gc();

                    // TODO BACKGROUND_GC need to add the profiling stuff here.
                    dprintf (GTC_LOG, ("doing gen%d before doing a bgc", gen));
                }

                //clear the cards so they don't bleed in gen 1 during collection
                // shouldn't this always be done at the beginning of any GC?
                //clear_card_for_addresses (
                //    generation_allocation_start (generation_of (0)),
                //    heap_segment_allocated (ephemeral_heap_segment));

                if (!do_ephemeral_gc_p)
                {
                    do_background_gc();
                }
            }
            else
            {
                settings.compaction = TRUE;
                c_write (settings.concurrent, FALSE);
            }

#ifdef MULTIPLE_HEAPS
            gc_t_join.restart();
#endif //MULTIPLE_HEAPS
        }

        if (do_concurrent_p)
        {
            // At this point we are sure we'll be starting a BGC, so save its per heap data here.
            // global data is only calculated at the end of the GC so we don't need to worry about
            // FGCs overwriting it.
            memset (&bgc_data_per_heap, 0, sizeof (bgc_data_per_heap));
            memcpy (&bgc_data_per_heap, &gc_data_per_heap, sizeof(gc_data_per_heap));

            if (do_ephemeral_gc_p)
            {
                dprintf (2, ("GC threads running, doing gen%d GC", settings.condemned_generation));

                gen_to_condemn_reasons.init();
                gen_to_condemn_reasons.set_condition (gen_before_bgc);
                gc_data_per_heap.gen_to_condemn_reasons.init (&gen_to_condemn_reasons);
                gc1();
#ifdef MULTIPLE_HEAPS
                gc_t_join.join(this, gc_join_bgc_after_ephemeral);
                if (gc_t_join.joined())
#endif //MULTIPLE_HEAPS
                {
#ifdef MULTIPLE_HEAPS
                    do_post_gc();
#endif //MULTIPLE_HEAPS
                    settings = saved_bgc_settings;
                    assert (settings.concurrent);

                    do_background_gc();

#ifdef MULTIPLE_HEAPS
                    gc_t_join.restart();
#endif //MULTIPLE_HEAPS
                }
            }
        }
        else
        {
            dprintf (2, ("couldn't create BGC threads, reverting to doing a blocking GC"));
            gc1();
        }
    }
    else
#endif //BACKGROUND_GC
    {
        gc1();
    }
#ifndef MULTIPLE_HEAPS
    allocation_running_time = GCToOSInterface::GetLowPrecisionTimeStamp();
    allocation_running_amount = dd_new_allocation (dynamic_data_of (0));
    fgn_last_alloc = dd_new_allocation (dynamic_data_of (0));
#endif //MULTIPLE_HEAPS

done:
    if (saved_settings_pause_mode == pause_no_gc)
        allocate_for_no_gc_after_gc();
}

#define mark_stack_empty_p() (mark_stack_base == mark_stack_tos)

inline
size_t gc_heap::get_promoted_bytes()
{
#ifdef USE_REGIONS
    if (!survived_per_region)
    {
        dprintf (REGIONS_LOG, ("no space to store promoted bytes"));
        return 0;
    }

    dprintf (3, ("h%d getting surv", heap_number));
    size_t promoted = 0;
    for (size_t i = 0; i < region_count; i++)
    {
        if (survived_per_region[i] > 0)
        {
            heap_segment* region = get_region_at_index (i);
            dprintf (REGIONS_LOG, ("h%d region[%zd] %p(g%d)(%s) surv: %zd(%p)",
                heap_number, i,
                heap_segment_mem (region),
                heap_segment_gen_num (region),
                (heap_segment_loh_p (region) ? "LOH" : (heap_segment_poh_p (region) ? "POH" :"SOH")),
                survived_per_region[i],
                &survived_per_region[i]));

            promoted += survived_per_region[i];
        }
    }

#ifdef _DEBUG
    dprintf (REGIONS_LOG, ("h%d global recorded %zd, regions recorded %zd",
        heap_number, promoted_bytes (heap_number), promoted));
    assert (promoted_bytes (heap_number) == promoted);
#endif //_DEBUG

    return promoted;

#else //USE_REGIONS

#ifdef MULTIPLE_HEAPS
    return g_promoted [heap_number*16];
#else //MULTIPLE_HEAPS
    return g_promoted;
#endif //MULTIPLE_HEAPS
#endif //USE_REGIONS
}

#ifdef USE_REGIONS
void gc_heap::sync_promoted_bytes()
{
    int condemned_gen_number = settings.condemned_generation;
    int highest_gen_number = ((condemned_gen_number == max_generation) ?
                              (total_generation_count - 1) : settings.condemned_generation);
    int stop_gen_idx = get_stop_generation_index (condemned_gen_number);

#ifdef MULTIPLE_HEAPS
// We gather all the promoted bytes for a region recorded by all threads into that region's survived
// for plan phase. sore_mark_list will be called shortly and will start using the same storage that
// the GC threads used to record promoted bytes.
    for (int i = 0; i < n_heaps; i++)
    {
        gc_heap* hp = g_heaps[i];

#else //MULTIPLE_HEAPS
    {
        gc_heap* hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS

        for (int gen_idx = highest_gen_number; gen_idx >= stop_gen_idx; gen_idx--)
        {
            generation* condemned_gen = hp->generation_of (gen_idx);
            heap_segment* current_region = heap_segment_rw (generation_start_segment (condemned_gen));

            while (current_region)
            {
                size_t region_index = get_basic_region_index_for_address (heap_segment_mem (current_region));

#ifdef MULTIPLE_HEAPS
                size_t total_surv = 0;
                size_t total_old_card_surv = 0;

                for (int hp_idx = 0; hp_idx < n_heaps; hp_idx++)
                {
                    total_surv += g_heaps[hp_idx]->survived_per_region[region_index];
                    total_old_card_surv += g_heaps[hp_idx]->old_card_survived_per_region[region_index];
                }

                heap_segment_survived (current_region) = total_surv;
                heap_segment_old_card_survived (current_region) = (int)total_old_card_surv;
#else
                heap_segment_survived (current_region) = survived_per_region[region_index];
                heap_segment_old_card_survived (current_region) =
                    (int)(old_card_survived_per_region[region_index]);
#endif //MULTIPLE_HEAPS

                dprintf (REGIONS_LOG, ("region #%zd %p surv %zd, old card surv %d",
                    region_index,
                    heap_segment_mem (current_region),
                    heap_segment_survived (current_region),
                    heap_segment_old_card_survived (current_region)));

                current_region = heap_segment_next (current_region);
            }
        }
    }
}

#ifdef MULTIPLE_HEAPS
void gc_heap::set_heap_for_contained_basic_regions (heap_segment* region, gc_heap* hp)
{
    uint8_t* region_start = get_region_start (region);
    uint8_t* region_end = heap_segment_reserved (region);

    int num_basic_regions = (int)((region_end - region_start) >> min_segment_size_shr);
    for (int i = 0; i < num_basic_regions; i++)
    {
        uint8_t* basic_region_start = region_start + ((size_t)i << min_segment_size_shr);
        heap_segment* basic_region = get_region_info (basic_region_start);
        heap_segment_heap (basic_region) = hp;
    }
}

heap_segment* gc_heap::unlink_first_rw_region (int gen_idx)
{
    generation* gen = generation_of (gen_idx);
    heap_segment* prev_region = generation_tail_ro_region (gen);
    heap_segment* region = nullptr;
    if (prev_region)
    {
        assert (heap_segment_read_only_p (prev_region));
        region = heap_segment_next (prev_region);
        assert (region != nullptr);
        // don't remove the last region in the generation
        if (heap_segment_next (region) == nullptr)
        {
            assert (region == generation_tail_region (gen));
            return nullptr;
        }
        heap_segment_next (prev_region) = heap_segment_next (region);
    }
    else
    {
        region = generation_start_segment (gen);
        assert (region != nullptr);
        // don't remove the last region in the generation
        if (heap_segment_next (region) == nullptr)
        {
            assert (region == generation_tail_region (gen));
            return nullptr;
        }
        generation_start_segment (gen) = heap_segment_next (region);
    }
    assert (region != generation_tail_region (gen));
    assert (!heap_segment_read_only_p (region));
    dprintf (REGIONS_LOG, ("unlink_first_rw_region on heap: %d gen: %d region: %p", heap_number, gen_idx, heap_segment_mem (region)));

    int oh = heap_segment_oh (region);
    dprintf(3, ("commit-accounting:  from %d to temp [%p, %p) for heap %d", oh, get_region_start (region), heap_segment_committed (region), this->heap_number));
#ifdef _DEBUG
    size_t committed = heap_segment_committed (region) - get_region_start (region);
    if (committed > 0)
    {
        assert (this->committed_by_oh_per_heap[oh] >= committed);
        this->committed_by_oh_per_heap[oh] -= committed;
    }
#endif //_DEBUG

    set_heap_for_contained_basic_regions (region, nullptr);

    return region;
}

void gc_heap::thread_rw_region_front (int gen_idx, heap_segment* region)
{
    generation* gen = generation_of (gen_idx);
    assert (!heap_segment_read_only_p (region));
    heap_segment* prev_region = generation_tail_ro_region (gen);
    if (prev_region)
    {
        heap_segment_next (region) = heap_segment_next (prev_region);
        heap_segment_next (prev_region) = region;
    }
    else
    {
        heap_segment_next (region) = generation_start_segment (gen);
        generation_start_segment (gen) = region;
    }
    if (heap_segment_next (region) == nullptr)
    {
        generation_tail_region (gen) = region;
    }
    dprintf (REGIONS_LOG, ("thread_rw_region_front on heap: %d gen: %d region: %p", heap_number, gen_idx, heap_segment_mem (region)));

    int oh = heap_segment_oh (region);
    dprintf(3, ("commit-accounting:  from temp to %d [%p, %p) for heap %d", oh, get_region_start (region), heap_segment_committed (region), this->heap_number));
#ifdef _DEBUG
    size_t committed = heap_segment_committed (region) - get_region_start (region);
    assert (heap_segment_heap (region) == nullptr);
    this->committed_by_oh_per_heap[oh] += committed;
#endif //_DEBUG

    set_heap_for_contained_basic_regions (region, this);
}
#endif // MULTIPLE_HEAPS

void gc_heap::equalize_promoted_bytes(int condemned_gen_number)
{
#ifdef MULTIPLE_HEAPS
    // algorithm to roughly balance promoted bytes across heaps by moving regions between heaps
    // goal is just to balance roughly, while keeping computational complexity low
    // hope is to achieve better work balancing in relocate and compact phases
    // this is also used when the heap count changes to balance regions between heaps
    int highest_gen_number = ((condemned_gen_number == max_generation) ?
        (total_generation_count - 1) : condemned_gen_number);
    int stop_gen_idx = get_stop_generation_index (condemned_gen_number);

    for (int gen_idx = highest_gen_number; gen_idx >= stop_gen_idx; gen_idx--)
    {
        // step 1:
        //  compute total promoted bytes per gen
        size_t total_surv = 0;
        size_t max_surv_per_heap = 0;
        size_t surv_per_heap[MAX_SUPPORTED_CPUS];
        for (int i = 0; i < n_heaps; i++)
        {
            surv_per_heap[i] = 0;

            gc_heap* hp = g_heaps[i];

            generation* condemned_gen = hp->generation_of (gen_idx);
            heap_segment* current_region = heap_segment_rw (generation_start_segment (condemned_gen));

            while (current_region)
            {
                total_surv += heap_segment_survived (current_region);
                surv_per_heap[i] += heap_segment_survived (current_region);
                current_region = heap_segment_next (current_region);
            }

            max_surv_per_heap = max (max_surv_per_heap, surv_per_heap[i]);

            dprintf (REGIONS_LOG, ("gen: %d heap %d surv: %zd", gen_idx, i, surv_per_heap[i]));
        }
        // compute average promoted bytes per heap and per gen
        // be careful to round up
        size_t avg_surv_per_heap = (total_surv + n_heaps - 1) / n_heaps;

        if (avg_surv_per_heap != 0)
        {
            dprintf (REGIONS_LOG, ("before equalize: gen: %d avg surv: %zd max_surv: %zd imbalance: %zd", gen_idx, avg_surv_per_heap, max_surv_per_heap, max_surv_per_heap*100/avg_surv_per_heap));
        }
        //
        // step 2:
        //   remove regions from surplus heaps until all heaps are <= average
        //   put removed regions into surplus regions
        //
        // step 3:
        //   put regions into size classes by survivorship
        //   put deficit heaps into size classes by deficit
        //
        // step 4:
        //   while (surplus regions is non-empty)
        //     get surplus region from biggest size class
        //     put it into heap from biggest deficit size class
        //     re-insert heap by resulting deficit size class

        heap_segment* surplus_regions = nullptr;
        size_t max_deficit = 0;
        size_t max_survived = 0;

        //  go through all the heaps
        for (int i = 0; i < n_heaps; i++)
        {
            // remove regions from this heap until it has average or less survivorship
            while (surv_per_heap[i] > avg_surv_per_heap)
            {
                heap_segment* region = g_heaps[i]->unlink_first_rw_region (gen_idx);
                if (region == nullptr)
                {
                    break;
                }
                assert (surv_per_heap[i] >= heap_segment_survived (region));
                dprintf (REGIONS_LOG, ("heap: %d surv: %zd - %zd = %zd",
                    i,
                    surv_per_heap[i],
                    heap_segment_survived (region),
                    surv_per_heap[i] - heap_segment_survived (region)));

                surv_per_heap[i] -= heap_segment_survived (region);

                heap_segment_next (region) = surplus_regions;
                surplus_regions = region;

                max_survived = max (max_survived, heap_segment_survived (region));
            }
            if (surv_per_heap[i] < avg_surv_per_heap)
            {
                size_t deficit = avg_surv_per_heap - surv_per_heap[i];
                max_deficit = max (max_deficit, deficit);
            }
        }

        // give heaps without regions a region from the surplus_regions,
        // if none are available, steal a region from another heap
        for (int i = 0; i < n_heaps; i++)
        {
            gc_heap* hp = g_heaps[i];
            generation* gen = hp->generation_of (gen_idx);
            if (heap_segment_rw (generation_start_segment (gen)) == nullptr)
            {
                heap_segment* start_region = surplus_regions;
                if (start_region != nullptr)
                {
                    surplus_regions = heap_segment_next (start_region);
                }
                else
                {
                    for (int j = 0; j < n_heaps; j++)
                    {
                        start_region = g_heaps[j]->unlink_first_rw_region (gen_idx);
                        if (start_region != nullptr)
                        {
                            surv_per_heap[j] -= heap_segment_survived (start_region);
                            size_t deficit = avg_surv_per_heap - surv_per_heap[j];
                            max_deficit = max (max_deficit, deficit);
                            break;
                        }
                    }
                }
                assert (start_region);
                dprintf (3, ("making sure heap %d gen %d has at least one region by adding region %zx", start_region));
                heap_segment_next (start_region) = nullptr;

                assert (heap_segment_heap (start_region) == nullptr && hp != nullptr);
                int oh = heap_segment_oh (start_region);
                size_t committed = heap_segment_committed (start_region) - get_region_start (start_region);
                dprintf(3, ("commit-accounting:  from temp to %d [%p, %p) for heap %d", oh, get_region_start (start_region), heap_segment_committed (start_region), hp->heap_number));
#ifdef _DEBUG
                g_heaps[hp->heap_number]->committed_by_oh_per_heap[oh] += committed;
#endif //_DEBUG
                set_heap_for_contained_basic_regions (start_region, hp);
                max_survived = max (max_survived, heap_segment_survived (start_region));
                hp->thread_start_region (gen, start_region);
                surv_per_heap[i] += heap_segment_survived (start_region);
            }
        }

        // we arrange both surplus regions and deficit heaps by size classes
        const int NUM_SIZE_CLASSES = 16;
        heap_segment* surplus_regions_by_size_class[NUM_SIZE_CLASSES];
        memset (surplus_regions_by_size_class, 0, sizeof(surplus_regions_by_size_class));
        double survived_scale_factor = ((double)NUM_SIZE_CLASSES) / (max_survived + 1);

        heap_segment* next_region;
        for (heap_segment* region = surplus_regions; region != nullptr; region = next_region)
        {
            size_t size_class = (size_t)(heap_segment_survived (region)*survived_scale_factor);
            assert ((0 <= size_class) && (size_class < NUM_SIZE_CLASSES));
            next_region = heap_segment_next (region);
            heap_segment_next (region) = surplus_regions_by_size_class[size_class];
            surplus_regions_by_size_class[size_class] = region;
        }

        int next_heap_in_size_class[MAX_SUPPORTED_CPUS];
        int heaps_by_deficit_size_class[NUM_SIZE_CLASSES];
        for (int i = 0; i < NUM_SIZE_CLASSES; i++)
        {
            heaps_by_deficit_size_class[i] = -1;
        }
        double deficit_scale_factor = ((double)NUM_SIZE_CLASSES) / (max_deficit + 1);

        for (int i = 0; i < n_heaps; i++)
        {
            if (avg_surv_per_heap > surv_per_heap[i])
            {
                size_t deficit = avg_surv_per_heap - surv_per_heap[i];
                int size_class = (int)(deficit*deficit_scale_factor);
                assert ((0 <= size_class) && (size_class < NUM_SIZE_CLASSES));
                next_heap_in_size_class[i] = heaps_by_deficit_size_class[size_class];
                heaps_by_deficit_size_class[size_class] = i;
            }
        }

        int region_size_class = NUM_SIZE_CLASSES - 1;
        int heap_size_class = NUM_SIZE_CLASSES - 1;
        while (region_size_class >= 0)
        {
            // obtain a region from the biggest size class
            heap_segment* region = surplus_regions_by_size_class[region_size_class];
            if (region == nullptr)
            {
                region_size_class--;
                continue;
            }
            // and a heap from the biggest deficit size class
            int heap_num;
            while (true)
            {
                if (heap_size_class < 0)
                {
                    // put any remaining regions on heap 0
                    // rare case, but there may be some 0 surv size regions
                    heap_num = 0;
                    break;
                }
                heap_num = heaps_by_deficit_size_class[heap_size_class];
                if (heap_num >= 0)
                {
                    break;
                }
                heap_size_class--;
            }

            // now move the region to the heap
            surplus_regions_by_size_class[region_size_class] = heap_segment_next (region);
            g_heaps[heap_num]->thread_rw_region_front (gen_idx, region);

            // adjust survival for this heap
            dprintf (REGIONS_LOG, ("heap: %d surv: %zd + %zd = %zd",
                heap_num,
                surv_per_heap[heap_num],
                heap_segment_survived (region),
                surv_per_heap[heap_num] + heap_segment_survived (region)));

            surv_per_heap[heap_num] += heap_segment_survived (region);

            if (heap_size_class < 0)
            {
                // no need to update size classes for heaps -
                // just work down the remaining regions, if any
                continue;
            }

            // is this heap now average or above?
            if (surv_per_heap[heap_num] >= avg_surv_per_heap)
            {
                // if so, unlink from the current size class
                heaps_by_deficit_size_class[heap_size_class] = next_heap_in_size_class[heap_num];
                continue;
            }

            // otherwise compute the updated deficit
            size_t new_deficit = avg_surv_per_heap - surv_per_heap[heap_num];

            // check if this heap moves to a differenct deficit size class
            int new_heap_size_class = (int)(new_deficit*deficit_scale_factor);
            if (new_heap_size_class != heap_size_class)
            {
                // the new deficit size class should be smaller and in range
                assert (new_heap_size_class < heap_size_class);
                assert ((0 <= new_heap_size_class) && (new_heap_size_class < NUM_SIZE_CLASSES));

                // if so, unlink from the current size class
                heaps_by_deficit_size_class[heap_size_class] = next_heap_in_size_class[heap_num];

                // and link to the new size class
                next_heap_in_size_class[heap_num] = heaps_by_deficit_size_class[new_heap_size_class];
                heaps_by_deficit_size_class[new_heap_size_class] = heap_num;
            }
        }
        // we will generally be left with some heaps with deficits here, but that's ok

        // check we didn't screw up the data structures
        for (int i = 0; i < n_heaps; i++)
        {
            gc_heap* hp = g_heaps[i];
            hp->verify_regions (gen_idx, true, true);
        }
#ifdef TRACE_GC
        max_surv_per_heap = 0;
        for (int i = 0; i < n_heaps; i++)
        {
            max_surv_per_heap = max (max_surv_per_heap, surv_per_heap[i]);
        }
        if (avg_surv_per_heap != 0)
        {
            dprintf (REGIONS_LOG, ("after equalize: gen: %d avg surv: %zd max_surv: %zd imbalance: %zd", gen_idx, avg_surv_per_heap, max_surv_per_heap, max_surv_per_heap*100/avg_surv_per_heap));
        }
#endif // TRACE_GC
    }
#endif //MULTIPLE_HEAPS
}

#ifdef DYNAMIC_HEAP_COUNT

// check that the fields of a decommissioned heap have their expected values,
// i.e. were not inadvertently modified
#define DECOMMISSIONED_VALUE 0xdec0dec0dec0dec0
static const size_t DECOMMISSIONED_SIZE_T = DECOMMISSIONED_VALUE;
static const ptrdiff_t DECOMMISSIONED_PTRDIFF_T = (ptrdiff_t)DECOMMISSIONED_VALUE;
static const ptrdiff_t DECOMMISSIONED_UINT64_T = (uint64_t)DECOMMISSIONED_VALUE;
static uint8_t* const DECOMMISSIONED_UINT8_T_P = (uint8_t*)DECOMMISSIONED_VALUE;
static uint8_t** const DECOMMISSIONED_UINT8_T_PP = (uint8_t**)DECOMMISSIONED_VALUE;
static PTR_heap_segment const DECOMMISSIONED_REGION_P = (PTR_heap_segment)DECOMMISSIONED_VALUE;
static mark* const DECOMMISSIONED_MARK_P = (mark*)DECOMMISSIONED_VALUE;
static const BOOL DECOMMISSIONED_BOOL = 0xdec0dec0;
static const BOOL DECOMMISSIONED_INT = (int)0xdec0dec0;
static const float DECOMMISSIONED_FLOAT = (float)DECOMMISSIONED_VALUE;

static const ptrdiff_t UNINITIALIZED_VALUE  = 0xbaadbaadbaadbaad;

void gc_heap::check_decommissioned_heap()
{
//  keep the mark stack for the time being
//  assert (mark_stack_array_length             == DECOMMISSIONED_SIZE_T);
//  assert (mark_stack_array                    == DECOMMISSIONED_MARK_P);

    assert (generation_skip_ratio               == DECOMMISSIONED_INT);
    assert (gen0_must_clear_bricks              == DECOMMISSIONED_INT);

    assert (freeable_uoh_segment                == DECOMMISSIONED_REGION_P);

    // TODO: check gen2_alloc_list

#ifdef BACKGROUND_GC
    // keep these fields
    // bgc_thread_id;
    // bgc_thread_running; // gc thread is its main loop
    // bgc_thread;

    // we don't want to hold on to this storage for unused heaps, so zap these fields
    //assert (background_mark_stack_tos           == DECOMMISSIONED_UINT8_T_PP);
    //assert (background_mark_stack_array         == DECOMMISSIONED_UINT8_T_PP);
    //assert (background_mark_stack_array_length  == DECOMMISSIONED_SIZE_T);

    //assert (c_mark_list                         == DECOMMISSIONED_UINT8_T_PP);
    //assert (c_mark_list_length                  == DECOMMISSIONED_SIZE_T);

    assert (freeable_soh_segment                == DECOMMISSIONED_REGION_P);
#endif //BACKGROUND_GC

#ifdef FEATURE_LOH_COMPACTION
    assert (loh_pinned_queue_length             == DECOMMISSIONED_SIZE_T);
    assert (loh_pinned_queue_decay              == DECOMMISSIONED_INT);
    assert (loh_pinned_queue                    == DECOMMISSIONED_MARK_P);
#endif //FEATURE_LOH_COMPACTION

    assert (gen0_bricks_cleared                 == DECOMMISSIONED_BOOL);

    // TODO: check loh_alloc_list
    // TODO: check poh_alloc_list

    assert (alloc_allocated                     == DECOMMISSIONED_UINT8_T_P);
    assert (ephemeral_heap_segment              == DECOMMISSIONED_REGION_P);

    // Keep this field
    // finalize_queue;

#ifdef USE_REGIONS
    // TODO: check free_regions[count_free_region_kinds];
#endif //USE_REGIONS

    assert (more_space_lock_soh.lock            == lock_decommissioned);
    assert (more_space_lock_uoh.lock            == lock_decommissioned);

    assert (soh_allocation_no_gc                == DECOMMISSIONED_SIZE_T);
    assert (loh_allocation_no_gc                == DECOMMISSIONED_SIZE_T);

    for (int gen_idx = 0; gen_idx < total_generation_count; gen_idx++)
    {
        generation* gen = generation_of (gen_idx);

        assert (generation_start_segment                   (gen) == DECOMMISSIONED_REGION_P);
        assert (generation_allocation_segment              (gen) == DECOMMISSIONED_REGION_P);
        assert (generation_tail_region                     (gen) == DECOMMISSIONED_REGION_P);
        assert (generation_tail_ro_region                  (gen) == DECOMMISSIONED_REGION_P);
        assert (generation_allocation_context_start_region (gen) == DECOMMISSIONED_UINT8_T_P);
        assert (generation_free_list_allocated             (gen) == DECOMMISSIONED_SIZE_T);
        assert (generation_end_seg_allocated               (gen) == DECOMMISSIONED_SIZE_T);
        assert (generation_allocate_end_seg_p              (gen) == DECOMMISSIONED_BOOL);
        assert (generation_condemned_allocated             (gen) == DECOMMISSIONED_SIZE_T);
        assert (generation_sweep_allocated                 (gen) == DECOMMISSIONED_SIZE_T);
        assert (generation_free_list_space                 (gen) == DECOMMISSIONED_SIZE_T);
        assert (generation_free_obj_space                  (gen) == DECOMMISSIONED_SIZE_T);
        assert (generation_allocation_size                 (gen) == DECOMMISSIONED_SIZE_T);
        assert (generation_pinned_allocation_compact_size  (gen) == DECOMMISSIONED_SIZE_T);
        assert (generation_pinned_allocation_sweep_size    (gen) == DECOMMISSIONED_SIZE_T);
        assert (gen->gen_num                                     == DECOMMISSIONED_INT);

#ifdef DOUBLY_LINKED_FL
        assert (generation_set_bgc_mark_bit_p              (gen) == DECOMMISSIONED_BOOL);
        assert (generation_last_free_list_allocated        (gen) == DECOMMISSIONED_UINT8_T_P);
#endif //DOUBLY_LINKED_FL

        dynamic_data* dd = dynamic_data_of (gen_idx);

        // check if any of the fields have been modified
        assert (dd_new_allocation                  (dd) == DECOMMISSIONED_PTRDIFF_T);
        assert (dd_gc_new_allocation               (dd) == DECOMMISSIONED_PTRDIFF_T);
        assert (dd_surv                     (dd) == (float)DECOMMISSIONED_VALUE);
        assert (dd_desired_allocation              (dd) == DECOMMISSIONED_SIZE_T);

        assert (dd_begin_data_size                 (dd) == DECOMMISSIONED_SIZE_T);
        assert (dd_survived_size                   (dd) == DECOMMISSIONED_SIZE_T);
        assert (dd_pinned_survived_size            (dd) == DECOMMISSIONED_SIZE_T);
        assert (dd_artificial_pinned_survived_size (dd) == DECOMMISSIONED_SIZE_T);
        assert (dd_added_pinned_size               (dd) == DECOMMISSIONED_SIZE_T);

#ifdef SHORT_PLUGS
        assert (dd_padding_size                    (dd) == DECOMMISSIONED_SIZE_T);
#endif //SHORT_PLUGS
#if defined (RESPECT_LARGE_ALIGNMENT) || defined (FEATURE_STRUCTALIGN)
        assert (dd_num_npinned_plugs               (dd) == DECOMMISSIONED_SIZE_T);
#endif //RESPECT_LARGE_ALIGNMENT || FEATURE_STRUCTALIGN
        assert (dd_current_size                    (dd) == DECOMMISSIONED_SIZE_T);
        assert (dd_collection_count                (dd) == DECOMMISSIONED_SIZE_T);
        assert (dd_promoted_size                   (dd) == DECOMMISSIONED_SIZE_T);
        assert (dd_freach_previous_promotion       (dd) == DECOMMISSIONED_SIZE_T);

        assert (dd_fragmentation                   (dd) == DECOMMISSIONED_SIZE_T);

        assert (dd_gc_clock                        (dd) == DECOMMISSIONED_SIZE_T);
        assert (dd_time_clock                      (dd) == DECOMMISSIONED_SIZE_T);
        assert (dd_previous_time_clock             (dd) == DECOMMISSIONED_SIZE_T);

        assert (dd_gc_elapsed_time                 (dd) == DECOMMISSIONED_SIZE_T);
    }
}

// take a heap out of service, setting its fields to non-sensical value
// to detect inadvertent usage
void gc_heap::decommission_heap()
{
    // avoid race condition where a thread decides to wait on the gc done event just as
    // another thread decides to decommission the heap
    set_gc_done();

//  keep the mark stack for the time being
//  mark_stack_array_length             = DECOMMISSIONED_SIZE_T;
//  mark_stack_array                    = DECOMMISSIONED_MARK_P;

    generation_skip_ratio               = DECOMMISSIONED_INT;
    gen0_must_clear_bricks              = DECOMMISSIONED_INT;

    freeable_uoh_segment                = DECOMMISSIONED_REGION_P;

    memset ((void *)gen2_alloc_list, DECOMMISSIONED_INT, sizeof(gen2_alloc_list[0])*(NUM_GEN2_ALIST - 1));

#ifdef BACKGROUND_GC
    // keep these fields
    // bgc_thread_id;
    // bgc_thread_running; // gc thread is its main loop
    // bgc_thread;

    // We can set these to the decommission value (or wait till they are not used for N GCs before we do that) but if we do we'll
    // need to allocate them in recommission_heap. For now I'm leaving them as they are.
    //background_mark_stack_tos           = DECOMMISSIONED_UINT8_T_PP;
    //background_mark_stack_array         = DECOMMISSIONED_UINT8_T_PP;
    //background_mark_stack_array_length  = DECOMMISSIONED_SIZE_T;

    //c_mark_list                         = DECOMMISSIONED_UINT8_T_PP;
    //c_mark_list_length                  = DECOMMISSIONED_SIZE_T;

    freeable_soh_segment                = DECOMMISSIONED_REGION_P;
#endif //BACKGROUND_GC

#ifdef FEATURE_LOH_COMPACTION
    loh_pinned_queue_length             = DECOMMISSIONED_SIZE_T;
    loh_pinned_queue_decay              = DECOMMISSIONED_INT;
    loh_pinned_queue                    = DECOMMISSIONED_MARK_P;
#endif //FEATURE_LOH_COMPACTION

    gen0_bricks_cleared                 = DECOMMISSIONED_BOOL;

    memset ((void *)loh_alloc_list, DECOMMISSIONED_INT, sizeof(loh_alloc_list));
    memset ((void *)poh_alloc_list, DECOMMISSIONED_INT, sizeof(poh_alloc_list));

    alloc_allocated                     = DECOMMISSIONED_UINT8_T_P;
    ephemeral_heap_segment              = DECOMMISSIONED_REGION_P;

    // Keep this field
    // finalize_queue;

#ifdef USE_REGIONS
    memset ((void *)free_regions, DECOMMISSIONED_INT, sizeof(free_regions));
#endif //USE_REGIONS

    // put the more space locks in the decommissioned state
    assert (more_space_lock_soh.lock    == lock_free);
    more_space_lock_soh.lock            = lock_decommissioned;

    assert (more_space_lock_uoh.lock    == lock_free);
    more_space_lock_uoh.lock            = lock_decommissioned;

    soh_allocation_no_gc                = DECOMMISSIONED_SIZE_T;
    loh_allocation_no_gc                = DECOMMISSIONED_SIZE_T;

    // clear per generation data
    for (int gen_idx = 0; gen_idx < total_generation_count; gen_idx++)
    {
        generation* gen = generation_of (gen_idx);

        // clear the free lists
        generation_allocator (gen)->clear();

        // set some fields in the dynamic data to nonsensical values
        // to catch cases where we inadvertently use or modify them
        memset (generation_alloc_context           (gen),  DECOMMISSIONED_INT, sizeof(alloc_context));

        generation_start_segment                   (gen) = DECOMMISSIONED_REGION_P;
        generation_allocation_segment              (gen) = DECOMMISSIONED_REGION_P;
        generation_allocation_context_start_region (gen) = DECOMMISSIONED_UINT8_T_P;
        generation_tail_region                     (gen) = DECOMMISSIONED_REGION_P;
        generation_tail_ro_region                  (gen) = DECOMMISSIONED_REGION_P;

        generation_free_list_allocated             (gen) = DECOMMISSIONED_SIZE_T;
        generation_end_seg_allocated               (gen) = DECOMMISSIONED_SIZE_T;
        generation_allocate_end_seg_p              (gen) = DECOMMISSIONED_BOOL;
        generation_condemned_allocated             (gen) = DECOMMISSIONED_SIZE_T;
        generation_sweep_allocated                 (gen) = DECOMMISSIONED_SIZE_T;
        generation_free_list_space                 (gen) = DECOMMISSIONED_SIZE_T;
        generation_free_obj_space                  (gen) = DECOMMISSIONED_SIZE_T;
        generation_allocation_size                 (gen) = DECOMMISSIONED_SIZE_T;

        generation_pinned_allocation_compact_size  (gen) = DECOMMISSIONED_SIZE_T;
        generation_pinned_allocation_sweep_size    (gen) = DECOMMISSIONED_SIZE_T;
        gen->gen_num                                     = DECOMMISSIONED_INT;

#ifdef DOUBLY_LINKED_FL
        generation_set_bgc_mark_bit_p              (gen) = DECOMMISSIONED_BOOL;
        generation_last_free_list_allocated        (gen) = DECOMMISSIONED_UINT8_T_P;
#endif //DOUBLY_LINKED_FL

        dynamic_data* dd = dynamic_data_of (gen_idx);

        // set some fields in the dynamic data to nonsensical values
        // to catch cases where we inadvertently use or modify them
        dd_new_allocation                  (dd) = DECOMMISSIONED_SIZE_T;
        dd_gc_new_allocation               (dd) = DECOMMISSIONED_PTRDIFF_T;
        dd_surv                     (dd) = (float)DECOMMISSIONED_VALUE;
        dd_desired_allocation              (dd) = DECOMMISSIONED_SIZE_T;

        dd_begin_data_size                 (dd) = DECOMMISSIONED_SIZE_T;
        dd_survived_size                   (dd) = DECOMMISSIONED_SIZE_T;
        dd_pinned_survived_size            (dd) = DECOMMISSIONED_SIZE_T;
        dd_artificial_pinned_survived_size (dd) = DECOMMISSIONED_SIZE_T;
        dd_added_pinned_size               (dd) = DECOMMISSIONED_SIZE_T;

#ifdef SHORT_PLUGS
        dd_padding_size                    (dd) = DECOMMISSIONED_SIZE_T;
#endif //SHORT_PLUGS
#if defined (RESPECT_LARGE_ALIGNMENT) || defined (FEATURE_STRUCTALIGN)
        dd_num_npinned_plugs               (dd) = DECOMMISSIONED_SIZE_T;
#endif //RESPECT_LARGE_ALIGNMENT || FEATURE_STRUCTALIGN
        dd_current_size                    (dd) = DECOMMISSIONED_SIZE_T;
        dd_collection_count                (dd) = DECOMMISSIONED_SIZE_T;
        dd_promoted_size                   (dd) = DECOMMISSIONED_SIZE_T;
        dd_freach_previous_promotion       (dd) = DECOMMISSIONED_SIZE_T;

        dd_fragmentation                   (dd) = DECOMMISSIONED_SIZE_T;

        dd_gc_clock                        (dd) = DECOMMISSIONED_SIZE_T;
        dd_time_clock                      (dd) = DECOMMISSIONED_SIZE_T;
        dd_previous_time_clock             (dd) = DECOMMISSIONED_SIZE_T;

        dd_gc_elapsed_time                 (dd) = DECOMMISSIONED_SIZE_T;
    }
}

// re-initialize a heap in preparation to putting it back into service
void gc_heap::recommission_heap()
{
    // reinitialize the fields - consider setting the ones initialized
    // by the next GC to UNINITIALIZED_VALUE instead

//  keep the mark stack for the time being
//  mark_stack_array_length             = 0;
//  mark_stack_array                    = nullptr;

    generation_skip_ratio               = 100;
    gen0_must_clear_bricks              = 0;

    freeable_uoh_segment                = nullptr;

    memset ((void *)gen2_alloc_list, 0, sizeof(gen2_alloc_list));

#ifdef BACKGROUND_GC
    // keep these fields
    // bgc_thread_id;
    // bgc_thread_running; // gc thread is its main loop
    // bgc_thread;

    //background_mark_stack_tos           = nullptr;
    //background_mark_stack_array         = nullptr;
    //background_mark_stack_array_length  = 0;

    //c_mark_list                         = nullptr;
    //c_mark_list_length                  = 0;

    freeable_soh_segment                = nullptr;
#endif //BACKGROUND_GC

#ifdef FEATURE_LOH_COMPACTION
    loh_pinned_queue_length             = 0;
    loh_pinned_queue_decay              = 0;
    loh_pinned_queue                    = 0;
#endif //FEATURE_LOH_COMPACTION

    gen0_bricks_cleared                 = FALSE;

    memset ((void *)loh_alloc_list, 0, sizeof(loh_alloc_list));
    memset ((void *)poh_alloc_list, 0, sizeof(poh_alloc_list));

    alloc_allocated                     = 0;
    ephemeral_heap_segment              = nullptr;

    // Keep this field
    // finalize_queue;

    for (int kind = 0; kind < count_free_region_kinds; kind++)
    {
        free_regions[kind].reset();
    }

    // put the more space locks in the free state
    more_space_lock_soh.lock            = lock_free;
    more_space_lock_uoh.lock            = lock_free;

    soh_allocation_no_gc                = 0;
    loh_allocation_no_gc                = 0;

#ifdef BACKGROUND_GC
    // initialize the background GC sync mechanism
    bgc_alloc_lock->init();
#endif //BACKGROUND_GC

    gc_heap* heap0 = g_heaps[0];

    for (int gen_idx = 0; gen_idx < total_generation_count; gen_idx++)
    {
        // clear the free lists for the new heaps
        generation* gen = generation_of (gen_idx);
        generation_allocator (gen)->clear();

        // reinitialize the fields - consider setting the ones initialized
        // by the next GC to UNINITIALIZED_VALUE instead
        memset (generation_alloc_context           (gen), 0, sizeof(alloc_context));

        generation_start_segment                   (gen) = nullptr;
        generation_tail_ro_region                  (gen) = nullptr;
        generation_tail_region                     (gen) = nullptr;
        generation_allocation_segment              (gen) = nullptr;
        generation_allocation_context_start_region (gen) = nullptr;

        generation_free_list_allocated             (gen) = 0;
        generation_end_seg_allocated               (gen) = 0;
        generation_allocate_end_seg_p              (gen) = 0;
        generation_condemned_allocated             (gen) = 0;
        generation_sweep_allocated                 (gen) = 0;
        generation_free_list_space                 (gen) = 0;
        generation_free_obj_space                  (gen) = 0;
        generation_allocation_size                 (gen) = 0;

        generation_pinned_allocation_compact_size  (gen) = 0;
        generation_pinned_allocation_sweep_size    (gen) = 0;
        gen->gen_num                                     = gen_idx;

#ifdef DOUBLY_LINKED_FL
        generation_set_bgc_mark_bit_p              (gen) = FALSE;
        generation_last_free_list_allocated        (gen) = nullptr;
#endif //DOUBLY_LINKED_FL

        dynamic_data* dd = dynamic_data_of (gen_idx);

        dynamic_data* heap0_dd = heap0->dynamic_data_of (gen_idx);

        // copy some fields from heap0

        // this is copied to dd_previous_time_clock at the start of GC
        dd_time_clock     (dd) = dd_time_clock (heap0_dd);

        // this is used at the start of the next gc to update setting.gc_index
        dd_collection_count (dd) = dd_collection_count (heap0_dd);

        // this field is used to estimate the heap size - set it to 0
        // as the data on this heap are accounted for by other heaps
        // until the next gc, where the fields will be re-initialized
        dd_promoted_size                   (dd) = 0;

        // this field is used at the beginning of a GC to decide
        // which generation to condemn - it will be
        // adjusted as free list items are rethreaded onto this heap
        dd_fragmentation                   (dd) = 0;

        // this value will just be incremented, not re-initialized
        dd_gc_clock                        (dd) = dd_gc_clock (heap0_dd);

        // these are used by the allocator, but will be set later
        dd_new_allocation                  (dd) = UNINITIALIZED_VALUE;
        dd_desired_allocation              (dd) = UNINITIALIZED_VALUE;

        // set the fields that are supposed to be set by the next GC to
        // a special value to help in debugging
        dd_gc_new_allocation               (dd) = UNINITIALIZED_VALUE;
        dd_surv                     (dd) = (float)UNINITIALIZED_VALUE;

        dd_begin_data_size                 (dd) = UNINITIALIZED_VALUE;
        dd_survived_size                   (dd) = UNINITIALIZED_VALUE;
        dd_pinned_survived_size            (dd) = UNINITIALIZED_VALUE;
        dd_artificial_pinned_survived_size (dd) = UNINITIALIZED_VALUE;
        dd_added_pinned_size               (dd) = UNINITIALIZED_VALUE;

#ifdef SHORT_PLUGS
        dd_padding_size                    (dd) = UNINITIALIZED_VALUE;
#endif //SHORT_PLUGS
#if defined (RESPECT_LARGE_ALIGNMENT) || defined (FEATURE_STRUCTALIGN)
        dd_num_npinned_plugs               (dd) = UNINITIALIZED_VALUE;
#endif //RESPECT_LARGE_ALIGNMENT || FEATURE_STRUCTALIGN
        dd_current_size                    (dd) = UNINITIALIZED_VALUE;
        dd_freach_previous_promotion       (dd) = UNINITIALIZED_VALUE;

        dd_previous_time_clock             (dd) = UNINITIALIZED_VALUE;

        dd_gc_elapsed_time                 (dd) = UNINITIALIZED_VALUE;
    }

#ifdef SPINLOCK_HISTORY
    spinlock_info_index = 0;
    current_uoh_alloc_state = (allocation_state)-1;
#endif //SPINLOCK_HISTORY

#ifdef RECORD_LOH_STATE
    loh_state_index = 0;
#endif //RECORD_LOH_STATE
}

float median_of_3 (float a, float b, float c)
{
#define compare_and_swap(i, j)          \
        {                               \
            if (i < j)                  \
            {                           \
                float t = i;            \
                          i = j;        \
                              j = t;    \
            }                           \
        }
    compare_and_swap (b, a);
    compare_and_swap (c, a);
    compare_and_swap (c, b);
#undef compare_and_swap
    return b;
}

float log_with_base (float x, float base)
{
    assert (x > base);

    return (float)(log(x) / log(base));
}

float mean (float* arr, int size)
{
    float sum = 0.0;

    for (int i = 0; i < size; i++)
    {
        sum += arr[i];
    }
    return (sum / size);
}

// Change it to a desired number if you want to print.
int max_times_to_print_tcp = 0;

// Return the slope, and the average values in the avg arg.
float gc_heap::dynamic_heap_count_data_t::slope (float* y, int n, float* avg)
{
    assert (n > 0);

    if (n == 1)
    {
        dprintf (6666, ("only 1 tcp: %.3f, no slope", y[0]));
        *avg = y[0];
        return 0.0;
    }

    int sum_x = 0;

    for (int i = 0; i < n; i++)
    {
        sum_x += i;

        if (max_times_to_print_tcp >= 0)
        {
            dprintf (6666, ("%.3f, ", y[i]));
        }
    }

    float avg_x = (float)sum_x / n;
    float avg_y = mean (y, n);
    *avg = avg_y;

    float numerator = 0.0;
    float denominator = 0.0;

    for (int i = 0; i < n; ++i)
    {
        numerator += ((float)i - avg_x) * (y[i] - avg_y);
        denominator += ((float)i - avg_x) * (i - avg_x);
    }

    max_times_to_print_tcp--;

    return (numerator / denominator);
}

void gc_heap::calculate_new_heap_count ()
{
    assert (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes);

    dprintf (6666, ("current num of samples %Id (g2: %Id) prev processed %Id (g2: %Id), last full GC happened at index %Id",
        dynamic_heap_count_data.current_samples_count, dynamic_heap_count_data.current_gen2_samples_count,
        dynamic_heap_count_data.processed_samples_count, dynamic_heap_count_data.processed_gen2_samples_count, gc_index_full_gc_end));

    if ((dynamic_heap_count_data.current_samples_count < (dynamic_heap_count_data.processed_samples_count + dynamic_heap_count_data_t::sample_size)) &&
        (dynamic_heap_count_data.current_gen2_samples_count < (dynamic_heap_count_data.processed_gen2_samples_count + dynamic_heap_count_data_t::sample_size)))
    {
        dprintf (6666, ("not enough GCs, skipping"));
        return;
    }

    bool process_eph_samples_p = (dynamic_heap_count_data.current_samples_count >= (dynamic_heap_count_data.processed_samples_count + dynamic_heap_count_data_t::sample_size));
    bool process_gen2_samples_p = (dynamic_heap_count_data.current_gen2_samples_count >= (dynamic_heap_count_data.processed_gen2_samples_count + dynamic_heap_count_data_t::sample_size));

    size_t current_gc_index = VolatileLoadWithoutBarrier (&settings.gc_index);
    float median_gen2_tcp = 0.0f;
    if (dynamic_heap_count_data.current_gen2_samples_count >= (dynamic_heap_count_data.processed_gen2_samples_count + dynamic_heap_count_data_t::sample_size))
    {
        median_gen2_tcp = dynamic_heap_count_data.get_median_gen2_gc_percent ();
    }

    // If there was a blocking gen2 GC, the overhead would be very large and most likely we would not pick it. So we
    // rely on the gen2 sample's overhead calculated above.
    float throughput_cost_percents[dynamic_heap_count_data_t::sample_size];

    if (process_eph_samples_p)
    {
        for (int i = 0; i < dynamic_heap_count_data_t::sample_size; i++)
        {
            dynamic_heap_count_data_t::sample& sample = dynamic_heap_count_data.samples[i];
            assert (sample.elapsed_between_gcs > 0);
            throughput_cost_percents[i] = (sample.elapsed_between_gcs ? (((float)sample.msl_wait_time / n_heaps + sample.gc_pause_time) * 100.0f / (float)sample.elapsed_between_gcs) : 0.0f);
            assert (throughput_cost_percents[i] >= 0.0);
            if (throughput_cost_percents[i] > 100.0)
                throughput_cost_percents[i] = 100.0;
            dprintf (6666, ("sample %d in GC#%Id msl %I64d / %d + pause %I64d / elapsed %I64d = tcp: %.3f, surv %zd, gc speed %zd/ms", i,
                sample.gc_index, sample.msl_wait_time, n_heaps, sample.gc_pause_time, sample.elapsed_between_gcs, throughput_cost_percents[i],
                sample.gc_survived_size, (sample.gc_pause_time ? (sample.gc_survived_size * 1000 / sample.gc_pause_time) : 0)));
        }
    }

    float median_throughput_cost_percent = median_of_3 (throughput_cost_percents[0], throughput_cost_percents[1], throughput_cost_percents[2]);
    float avg_throughput_cost_percent = (float)((throughput_cost_percents[0] + throughput_cost_percents[1] + throughput_cost_percents[2]) / 3.0);

    // One of the reasons for outliers is something temporarily affected GC work. We pick the min tcp if the survival is very stable to avoid counting these outliers.
    float min_tcp = throughput_cost_percents[0];
    size_t min_survived = dynamic_heap_count_data.samples[0].gc_survived_size;
    uint64_t min_pause = dynamic_heap_count_data.samples[0].gc_pause_time;
    for (int i = 1; i < dynamic_heap_count_data_t::sample_size; i++)
    {
        min_tcp = min (throughput_cost_percents[i], min_tcp);
        min_survived = min (dynamic_heap_count_data.samples[i].gc_survived_size, min_survived);
        min_pause = min (dynamic_heap_count_data.samples[i].gc_pause_time, min_pause);
    }

    dprintf (6666, ("checking if samples are stable %Id %Id %Id, min tcp %.3f, min pause %I64d",
        dynamic_heap_count_data.samples[0].gc_survived_size, dynamic_heap_count_data.samples[1].gc_survived_size, dynamic_heap_count_data.samples[2].gc_survived_size,
        min_tcp, min_pause));

    bool survived_stable_p = true;
    if (min_survived > 0)
    {
        for (int i = 0; i < dynamic_heap_count_data_t::sample_size; i++)
        {
            dynamic_heap_count_data_t::sample& sample = dynamic_heap_count_data.samples[i];
            float diff = (float)(sample.gc_survived_size - min_survived) / (float)min_survived;
            dprintf (6666, ("sample %d diff from min is %Id -> %.3f", i, (sample.gc_survived_size - min_survived), diff));
            if (diff >= 0.15)
            {
                survived_stable_p = false;
            }
        }
    }

    if (survived_stable_p)
    {
        dprintf (6666, ("survived is stable, so we pick min tcp %.3f", min_tcp));
        median_throughput_cost_percent = min_tcp;
    }

    dprintf (6666, ("median tcp: %.3f, avg tcp: %.3f, gen2 tcp %.3f(%.3f, %.3f, %.3f)",
        median_throughput_cost_percent, avg_throughput_cost_percent, median_gen2_tcp,
        dynamic_heap_count_data.gen2_samples[0].gc_percent, dynamic_heap_count_data.gen2_samples[1].gc_percent, dynamic_heap_count_data.gen2_samples[2].gc_percent));

    int extra_heaps = (n_max_heaps >= 16) + (n_max_heaps >= 64);
    int actual_n_max_heaps = n_max_heaps - extra_heaps;

#ifdef STRESS_DYNAMIC_HEAP_COUNT
    // quick hack for initial testing
    int new_n_heaps = (int)gc_rand::get_rand (n_max_heaps - 1) + 1;

    // if we are adjusting down, make sure we adjust lower than the lowest uoh msl heap
    if ((new_n_heaps < n_heaps) && (dynamic_heap_count_data.lowest_heap_with_msl_uoh != -1))
    {
        new_n_heaps = min (dynamic_heap_count_data.lowest_heap_with_msl_uoh, new_n_heaps);
        new_n_heaps = max (new_n_heaps, 1);
    }
    dprintf (6666, ("stress %d -> %d", n_heaps, new_n_heaps));
#else //STRESS_DYNAMIC_HEAP_COUNT
    int new_n_heaps = n_heaps;

    float target_tcp = dynamic_heap_count_data.target_tcp;
    float target_gen2_tcp = dynamic_heap_count_data.target_gen2_tcp;

    if (process_eph_samples_p)
    {
        dynamic_heap_count_data.add_to_recorded_tcp (median_throughput_cost_percent);

        float tcp_to_consider = 0.0f;
        int agg_factor = 0;
        size_t total_soh_stable_size = 0;
        int max_heap_count_datas = 0;
        int min_heap_count_datas = 0;
        dynamic_heap_count_data_t::adjust_metric adj_metric = dynamic_heap_count_data_t::adjust_metric::not_adjusted;

        // For diagnostic purpose. need to init these
        dynamic_heap_count_data_t::decide_change_condition change_decision = (dynamic_heap_count_data_t::decide_change_condition)0;
        int recorded_tcp_count = 0;
        float recorded_tcp_slope = 0.0f;
        size_t num_gcs_since_last_change = 0;
        float current_around_target_accumulation = 0.0f;
        dynamic_heap_count_data_t::decide_adjustment_reason adj_reason = (dynamic_heap_count_data_t::decide_adjustment_reason)0;
        int hc_change_freq_factor = 0;
        dynamic_heap_count_data_t::hc_change_freq_reason hc_freq_reason = (dynamic_heap_count_data_t::hc_change_freq_reason)0;

        if (dynamic_heap_count_data.should_change (median_throughput_cost_percent, &tcp_to_consider, current_gc_index,
                                                   &change_decision, &recorded_tcp_count, &recorded_tcp_slope, &num_gcs_since_last_change, &current_around_target_accumulation))
        {
            total_soh_stable_size = get_total_soh_stable_size();
            size_t total_bcd = dynamic_heap_count_data.compute_total_gen0_budget (total_soh_stable_size);
            max_heap_count_datas = (int)(total_bcd / dynamic_heap_count_data.min_gen0_new_allocation);
            min_heap_count_datas = (int)(total_bcd / dynamic_heap_count_data.max_gen0_new_allocation);
            int max_heap_count_growth_step = dynamic_heap_count_data.get_max_growth (n_heaps);
            int max_heap_count_growth_datas = max_heap_count_datas - n_heaps;
            if (max_heap_count_growth_datas < 0)
            {
                max_heap_count_growth_datas = 0;
            }
            int max_heap_count_growth_core = actual_n_max_heaps - n_heaps;
            int max_heap_count_growth = min (max_heap_count_growth_step, min (max_heap_count_growth_datas, max_heap_count_growth_core));

            float distance = tcp_to_consider - target_tcp;

            dprintf (6666, ("median tcp %.3f, recent tcp %.3f - target %.1f = %.3f", median_throughput_cost_percent, tcp_to_consider, target_tcp, distance));

            float diff_pct = distance / target_tcp;
            // Different for above and below target to avoid oscillation.
            float hc_change_factor = (float)((diff_pct > 0.0) ? 1.5 : 3.0);
            float change_float = diff_pct / hc_change_factor * (float)n_heaps;
            float change_float_rounded = (float)round(change_float);
            int change_int = (int)change_float_rounded;
            dprintf (6666, ("diff pct %.3f / %.1f * %d = %d (%.3f), max hc allowed by datas %d | by core %d, max growth per step %d, max growth by datas %d | by core %d",
                diff_pct, hc_change_factor, n_heaps, change_int, ((float)change_int / n_heaps), max_heap_count_datas, actual_n_max_heaps,
                max_heap_count_growth_step, max_heap_count_growth_datas, max_heap_count_growth_core));

            if (change_int > 0)
            {
                // If we do want to grow but the max HC allowed by DATAS is 0, and we haven't done any gen2 GCs yet, we do want to
                // trigger a gen2 right away.
                if (!max_heap_count_growth_datas && !(dynamic_heap_count_data.current_gen2_samples_count))
                {
                    trigger_initial_gen2_p = true;

                    dprintf (6666, ("we want to grow but DATAS is limiting, trigger a gen2 right away"));
#ifdef BACKGROUND_GC
                    if (is_bgc_in_progress())
                    {
                        trigger_initial_gen2_p = false;
                    }
#endif //BACKGROUND_GC
                }

                agg_factor = dynamic_heap_count_data.get_aggressiveness (change_int);
                if (agg_factor > 1)
                {
                    change_int *= agg_factor;
                    dprintf (6666, ("agg factor is %d, change by %d heaps", agg_factor, change_int));
                }
            }

            if (change_int)
            {
                adj_metric = dynamic_heap_count_data.should_change_hc (max_heap_count_datas, min_heap_count_datas,
                                                                       max_heap_count_growth, change_int, current_gc_index,
                                                                       &adj_reason, &hc_change_freq_factor, &hc_freq_reason);

                // If we decide to change budget, we let the next GC calculate the right budget, ie, we delay changing by one GC which is acceptable.
                if (adj_metric != dynamic_heap_count_data_t::adjust_metric::adjust_hc)
                {
                    change_int = 0;
                }

                if (adj_metric != dynamic_heap_count_data_t::adjust_metric::not_adjusted)
                {
                    if (adj_metric == dynamic_heap_count_data_t::adjust_metric::adjust_hc)
                    {
                        new_n_heaps = n_heaps + change_int;
                    }

                    dynamic_heap_count_data.record_adjustment (adj_metric, distance, change_int, current_gc_index);
                }
            }

            // We always need to reset these since we already made decisions based on them.
            dynamic_heap_count_data.reset_accumulation();
            dprintf (6666, ("changing HC or budget %d -> %d at GC#%Id", n_heaps, new_n_heaps, current_gc_index));

            dprintf (6666, ("total max gen %.3fmb, total bcd %.3fmb, diff %% %.3f-> +%d hc (%%%.3f)",
                mb (total_soh_stable_size), mb (total_bcd), diff_pct, change_int, (change_int * 100.0 / n_heaps)));
        }

#ifdef FEATURE_EVENT_TRACE
        GCEventFireSizeAdaptationTuning_V1 (
            (uint16_t)new_n_heaps,
            (uint16_t)max_heap_count_datas,
            (uint16_t)min_heap_count_datas,
            (uint64_t)current_gc_index,
            (uint64_t)total_soh_stable_size,
            (float)median_throughput_cost_percent,
            (float)tcp_to_consider,
            (float)current_around_target_accumulation,
            (uint16_t)recorded_tcp_count,
            (float)recorded_tcp_slope,
            (uint32_t)num_gcs_since_last_change,
            (uint8_t)agg_factor,
            (uint16_t)change_decision,
            (uint16_t)adj_reason,
            (uint16_t)hc_change_freq_factor,
            (uint16_t)hc_freq_reason,
            (uint8_t)adj_metric);
#endif //FEATURE_EVENT_TRACE
    }

    size_t num_gen2s_since_last_change = 0;

    if ((new_n_heaps == n_heaps) && !process_eph_samples_p && process_gen2_samples_p)
    {
        num_gen2s_since_last_change = dynamic_heap_count_data.current_gen2_samples_count - dynamic_heap_count_data.gen2_last_changed_sample_count;
        // If we have already been processing eph samples, we don't need to process gen2.
        if ((dynamic_heap_count_data.current_samples_count / dynamic_heap_count_data.current_gen2_samples_count) < 10)
        {
            int step_up = (n_heaps + 1) / 2;
            int max_growth = max ((n_max_heaps / 4), (1 + (actual_n_max_heaps > 3)));
            step_up = min (step_up, (actual_n_max_heaps - n_heaps));

            int step_down = (n_heaps + 1) / 3;

            // The gen2 samples only serve as a backstop so this is quite crude.
            if (median_gen2_tcp > target_gen2_tcp)
            {
                new_n_heaps += step_up;
                new_n_heaps = min (new_n_heaps, actual_n_max_heaps);
                dprintf (6666, ("[CHP2-0] gen2 tcp: %.3f, inc by %d + %d = %d", median_gen2_tcp, step_up, n_heaps, new_n_heaps));

                if ((new_n_heaps < actual_n_max_heaps) && dynamic_heap_count_data.is_close_to_max (new_n_heaps, actual_n_max_heaps))
                {
                    dprintf (6666, ("[CHP2-1] %d is close to max heaps %d, grow to max", new_n_heaps, actual_n_max_heaps));
                    new_n_heaps = actual_n_max_heaps;
                }
            }
            else if ((median_gen2_tcp < (target_gen2_tcp / 2)) && (num_gen2s_since_last_change > 30))
            {
                new_n_heaps -= step_down;
                dprintf (6666, ("[CHP3-0] last gen2 sample count when changed: %Id, gen2 tcp: %.3f, dec by %d, %d -> %d",
                    dynamic_heap_count_data.gen2_last_changed_sample_count, median_gen2_tcp, step_down, n_heaps, new_n_heaps));
            }

            if (new_n_heaps != n_heaps)
            {
                dynamic_heap_count_data.gen2_last_changed_sample_count = dynamic_heap_count_data.current_gen2_samples_count;
            }
        }
    }

    assert (new_n_heaps >= 1);
    assert (new_n_heaps <= actual_n_max_heaps);

    if (process_eph_samples_p)
    {
        dprintf (6666, ("processed eph samples, updating processed %Id -> %Id", dynamic_heap_count_data.processed_samples_count, dynamic_heap_count_data.current_samples_count));
        dynamic_heap_count_data.processed_samples_count = dynamic_heap_count_data.current_samples_count;
    }

    if (process_gen2_samples_p)
    {
        dynamic_heap_count_data_t::gen2_sample* gen2_samples = dynamic_heap_count_data.gen2_samples;
#ifdef FEATURE_EVENT_TRACE
        GCEventFireSizeAdaptationFullGCTuning_V1 (
            (uint16_t)dynamic_heap_count_data.new_n_heaps,
            (uint64_t)current_gc_index,
            (float)median_gen2_tcp,
            (uint32_t)num_gen2s_since_last_change,
            (uint32_t)(current_gc_index - gen2_samples[0].gc_index),
            (float)gen2_samples[0].gc_percent,
            (uint32_t)(current_gc_index - gen2_samples[1].gc_index),
            (float)gen2_samples[1].gc_percent,
            (uint32_t)(current_gc_index - gen2_samples[2].gc_index),
            (float)gen2_samples[2].gc_percent);
#endif //FEATURE_EVENT_TRACEs

        dprintf (6666, ("processed gen2 samples, updating processed %Id -> %Id", dynamic_heap_count_data.processed_gen2_samples_count, dynamic_heap_count_data.current_gen2_samples_count));
        dynamic_heap_count_data.processed_gen2_samples_count = dynamic_heap_count_data.current_gen2_samples_count;
    }
#endif //STRESS_DYNAMIC_HEAP_COUNT

    if (new_n_heaps != n_heaps)
    {
        dprintf (6666, ("GC#%Id should change! %d->%d (%s)",
            VolatileLoadWithoutBarrier (&settings.gc_index), n_heaps, new_n_heaps, ((n_heaps < new_n_heaps) ? "INC" : "DEC")));
        dynamic_heap_count_data.heap_count_to_change_to = new_n_heaps;
        dynamic_heap_count_data.should_change_heap_count = true;
    }
}

void gc_heap::check_heap_count ()
{
    dynamic_heap_count_data.new_n_heaps = dynamic_heap_count_data.heap_count_to_change_to;

    assert (dynamic_heap_count_data.new_n_heaps != n_heaps);

    if (dynamic_heap_count_data.new_n_heaps != n_heaps)
    {
        dprintf (9999, ("h0 suspending EE in check"));
        // can't have threads allocating while we change the number of heaps
        GCToEEInterface::SuspendEE(SUSPEND_FOR_GC_PREP);
        dprintf (9999, ("h0 suspended EE in check"));

#ifdef BACKGROUND_GC
        if (gc_heap::background_running_p())
        {
            // background GC is running - reset the new heap count
            add_to_hc_history (hc_record_check_cancelled_bgc);
            hc_change_cancelled_count_bgc++;
            dynamic_heap_count_data.new_n_heaps = n_heaps;
            dprintf (6666, ("can't change heap count! BGC in progress"));
        }
#endif //BACKGROUND_GC
    }

    if (dynamic_heap_count_data.new_n_heaps != n_heaps)
    {
        dprintf (6666, ("prep to change from %d to %d at GC#%Id", n_heaps, dynamic_heap_count_data.new_n_heaps, VolatileLoadWithoutBarrier (&settings.gc_index)));
        if (!prepare_to_change_heap_count (dynamic_heap_count_data.new_n_heaps))
        {
            // we don't have sufficient resources - reset the new heap count
            add_to_hc_history (hc_record_check_cancelled_prep);
            hc_change_cancelled_count_prep++;
            dynamic_heap_count_data.new_n_heaps = n_heaps;
        }
    }

    if (dynamic_heap_count_data.new_n_heaps == n_heaps)
    {
        dynamic_heap_count_data.processed_samples_count = dynamic_heap_count_data.current_samples_count;
        dynamic_heap_count_data.processed_gen2_samples_count = dynamic_heap_count_data.current_gen2_samples_count;
        dynamic_heap_count_data.should_change_heap_count = false;

        dprintf (6666, ("heap count stays the same %d, no work to do, set processed sample count to %Id",
            dynamic_heap_count_data.new_n_heaps, dynamic_heap_count_data.current_samples_count));

        GCToEEInterface::RestartEE(TRUE);

        return;
    }

    int new_n_heaps = dynamic_heap_count_data.new_n_heaps;

    assert (!(dynamic_heap_count_data.init_only_p));

    {
        // At this point we are guaranteed to be able to change the heap count to the new one.
        // Change the heap count for joins here because we will need to join new_n_heaps threads together.
        dprintf (9999, ("changing join hp %d->%d", n_heaps, new_n_heaps));
        int max_threads_to_wake = max (n_heaps, new_n_heaps);
        gc_t_join.update_n_threads (max_threads_to_wake);

        // make sure the other gc threads cannot see this as a request to GC
        assert (dynamic_heap_count_data.new_n_heaps != n_heaps);

        if (n_heaps < new_n_heaps)
        {
            int saved_idle_thread_count = dynamic_heap_count_data.idle_thread_count;
            Interlocked::ExchangeAdd (&dynamic_heap_count_data.idle_thread_count, (n_heaps - new_n_heaps));
            dprintf (9999, ("GC thread %d setting idle events for h%d-h%d, total idle %d -> %d", heap_number, n_heaps, (new_n_heaps - 1),
                saved_idle_thread_count, VolatileLoadWithoutBarrier (&dynamic_heap_count_data.idle_thread_count)));

            for (int heap_idx = n_heaps; heap_idx < new_n_heaps; heap_idx++)
            {
                g_heaps[heap_idx]->gc_idle_thread_event.Set();
            }
        }

        gc_start_event.Set();
    }

    int old_n_heaps = n_heaps;

    change_heap_count (dynamic_heap_count_data.new_n_heaps);

    GCToEEInterface::RestartEE(TRUE);
    dprintf (9999, ("h0 restarted EE"));

    dprintf (6666, ("h0 finished changing, set should change to false!\n"));
    dynamic_heap_count_data.should_change_heap_count = false;
}

bool gc_heap::prepare_to_change_heap_count (int new_n_heaps)
{
    dprintf (9999, ("trying to change heap count %d -> %d", n_heaps, new_n_heaps));

    // use this variable for clarity - n_heaps will change during the transition
    int old_n_heaps = n_heaps;

    // first do some steps that may fail and cause us to give up

    // we'll need temporary memory for the rethreading of the free lists -
    // if we can't allocate what we need, we must give up
    for (int i = 0; i < old_n_heaps; i++)
    {
        gc_heap* hp = g_heaps[i];

        if (!hp->prepare_rethread_fl_items())
        {
            return false;
        }
    }

    // move finalizer list items from heaps going out of service to remaining heaps
    // if this step fails, we have to give up
    if (new_n_heaps < old_n_heaps)
    {
        int to_heap_number = 0;
        for (int i = new_n_heaps; i < old_n_heaps; i++)
        {
            gc_heap* from_hp = g_heaps[i];
            gc_heap* to_hp = g_heaps[to_heap_number];

            // we always add the finalizer list items from a heap going out of service
            // to one of the remaining heaps, which we select in round robin fashion
            if (!to_hp->finalize_queue->MergeFinalizationData (from_hp->finalize_queue))
            {
                // failing to merge finalization data from one of the heaps about to go idle
                // means we cannot in fact reduce the number of heaps.
                dprintf (3, ("failed to merge finalization from heap %d into heap %d", i, to_heap_number));
                return false;
            }

            to_heap_number = (to_heap_number + 1) % new_n_heaps;
        }
    }

    // Before we look at whether we have sufficient regions we should return regions that should be deleted to free
    // so we don't lose them when we decommission heaps. We could do this for only heaps that we are about
    // to decomission. But it's better to do this for all heaps because we don't need to worry about adding them to the
    // heaps remain (freeable uoh/soh regions) and we get rid of regions with the heap_segment_flags_uoh_delete flag
    // because background_delay_delete_uoh_segments makes the assumption it can't be the start region.
    for (int i = 0; i < old_n_heaps; i++)
    {
        gc_heap* hp = g_heaps[i];
        hp->delay_free_segments ();
    }

    // if we want to increase the number of heaps, we have to make sure we can give
    // each heap a region for each generation. If we cannot do that, we have to give up
    ptrdiff_t region_count_in_gen[total_generation_count];
    for (int gen_idx = 0; gen_idx < total_generation_count; gen_idx++)
    {
        region_count_in_gen[gen_idx] = 0;
    }
    if (old_n_heaps < new_n_heaps)
    {
        // count the number of regions in each generation
        for (int i = 0; i < old_n_heaps; i++)
        {
            gc_heap* hp = g_heaps[i];

            for (int gen_idx = 0; gen_idx < total_generation_count; gen_idx++)
            {
                generation* gen = hp->generation_of (gen_idx);
                for (heap_segment* region = heap_segment_rw (generation_start_segment (gen));
                     region != nullptr;
                     region = heap_segment_next (region))
                {
                    region_count_in_gen[gen_idx]++;
                }
            }
        }

        // check if we either have enough regions for each generation,
        // or can get enough from the free regions lists, or can allocate enough
        bool success = true;
        for (int gen_idx = 0; gen_idx < total_generation_count; gen_idx++)
        {
            const size_t size = gen_idx > soh_gen2 ? global_region_allocator.get_large_region_alignment() : 0;

            // if we don't have enough regions in this generation to cover all the new heaps,
            // try to find enough free regions
            while (region_count_in_gen[gen_idx] < new_n_heaps)
            {
                int kind = gen_idx > soh_gen2 ? large_free_region : basic_free_region;
                bool found_free_regions = false;
                for (int i = 0; i < old_n_heaps; i++)
                {
                    gc_heap* hp = g_heaps[i];
                    if (hp->free_regions[kind].get_num_free_regions() > 0)
                    {
                        // this heap has free regions - move one back into the generation
                        heap_segment* region = hp->get_new_region (gen_idx, size);
                        assert (region != nullptr);
                        region_count_in_gen[gen_idx]++;
                        found_free_regions = true;
                        if (region_count_in_gen[gen_idx] == new_n_heaps)
                            break;
                    }
                }
                if (!found_free_regions)
                {
                    break;
                }
            }
            while (region_count_in_gen[gen_idx] < new_n_heaps)
            {
                if (g_heaps[0]->get_new_region (gen_idx, size) == nullptr)
                {
                    success = false;
                    break;
                }
                region_count_in_gen[gen_idx]++;
            }
            if (!success)
            {
                // we failed to get enough regions - give up and rely on the next GC
                // to return the extra regions we got from the free list or allocated
                return false;
            }
        }
    }
    return true;
}

bool gc_heap::change_heap_count (int new_n_heaps)
{
    uint64_t start_time = 0;

    dprintf (9999, ("BEG heap%d changing %d->%d", heap_number, n_heaps, new_n_heaps));

    // use this variable for clarity - n_heaps will change during the transition
    int old_n_heaps = n_heaps;
    bool init_only_p = dynamic_heap_count_data.init_only_p;

    {
        gc_t_join.join (this, gc_join_merge_temp_fl);
        if (gc_t_join.joined ())
        {
            // BGC is not running, we can safely change its join's heap count.
#ifdef BACKGROUND_GC
            bgc_t_join.update_n_threads (new_n_heaps);
#endif //BACKGROUND_GC

            dynamic_heap_count_data.init_only_p = false;
            dprintf (9999, ("in change h%d resetting gc_start, update bgc join to %d heaps", heap_number, new_n_heaps));
            gc_start_event.Reset();
            gc_t_join.restart ();
        }
    }

    assert (dynamic_heap_count_data.new_n_heaps != old_n_heaps);

    if (heap_number == 0)
    {
        start_time = GetHighPrecisionTimeStamp ();

        // spread finalization data out to heaps coming into service
        // if this step fails, we can still continue
        int from_heap_number = 0;
        for (int i = old_n_heaps; i < new_n_heaps; i++)
        {
            gc_heap* to_hp = g_heaps[i];
            gc_heap* from_hp = g_heaps[from_heap_number];

            if (!from_hp->finalize_queue->SplitFinalizationData (to_hp->finalize_queue))
            {
                // we can live with this failure - it just means finalization data
                // are still on the old heap, which is correct, but suboptimal
                dprintf (3, ("failed to split finalization data between heaps %d and %d", from_heap_number, i));
            }

            from_heap_number = (from_heap_number + 1) % old_n_heaps;
        }

        // prepare for the switch by fixing the allocation contexts on the old heaps, unify the gen0_bricks_cleared flag,
        // and setting the survived size for the existing regions to their allocated size
        BOOL unified_gen0_bricks_cleared = TRUE;
        for (int i = 0; i < old_n_heaps; i++)
        {
            gc_heap* hp = g_heaps[i];

            if (!init_only_p)
            {
                hp->fix_allocation_contexts (TRUE);
            }

            if (unified_gen0_bricks_cleared && (hp->gen0_bricks_cleared == FALSE))
            {
                unified_gen0_bricks_cleared = FALSE;
            }

            for (int gen_idx = 0; gen_idx < total_generation_count; gen_idx++)
            {
                generation* gen = hp->generation_of (gen_idx);
                for (heap_segment* region = heap_segment_rw (generation_start_segment (gen));
                     region != nullptr;
                     region = heap_segment_next (region))
                {
                    // prepare the regions by pretending all their allocated space survives
                    heap_segment_survived (region) = heap_segment_allocated (region) - heap_segment_mem (region);
                }
            }
        }

        // inititalize the new heaps
        if (old_n_heaps < new_n_heaps)
        {
            // initialize the region lists of the new heaps
            for (int i = old_n_heaps; i < new_n_heaps; i++)
            {
                gc_heap* hp = g_heaps[i];

                hp->check_decommissioned_heap();

                hp->recommission_heap();
            }
        }

        if (new_n_heaps < old_n_heaps)
        {
            // move all regions from the heaps about to be retired to another heap < new_n_heaps
            assert (new_n_heaps > 0);

            for (int gen_idx = 0; gen_idx < total_generation_count; gen_idx++)
            {
                for (int i = new_n_heaps; i < old_n_heaps; i++)
                {
                    gc_heap* hp = g_heaps[i];

                    int dest_heap_number = i % new_n_heaps;
                    gc_heap* hpd = g_heaps[dest_heap_number];
                    generation* hpd_gen = hpd->generation_of (gen_idx);

                    generation* gen = hp->generation_of (gen_idx);

                    heap_segment* start_region = generation_start_segment (gen);
                    heap_segment* tail_ro_region = generation_tail_ro_region (gen);
                    heap_segment* tail_region = generation_tail_region (gen);

                    for (heap_segment* region = start_region; region != nullptr; region = heap_segment_next(region))
                    {
                        assert ((hp != nullptr) && (hpd != nullptr) && (hp != hpd));

                        int oh = heap_segment_oh (region);
                        size_t committed = heap_segment_committed (region) - get_region_start (region);
                        if (committed > 0)
                        {
                            dprintf(3, ("commit-accounting:  from %d to %d [%p, %p) for heap %d to heap %d", oh, oh, get_region_start (region), heap_segment_committed (region), i, dest_heap_number));
#ifdef _DEBUG
                            assert (hp->committed_by_oh_per_heap[oh] >= committed);
                            hp->committed_by_oh_per_heap[oh] -= committed;
                            hpd->committed_by_oh_per_heap[oh] += committed;
#endif // _DEBUG
                        }

                        set_heap_for_contained_basic_regions (region, hpd);
                    }
                    if (tail_ro_region != nullptr)
                    {
                        // the first r/w region is the one after tail_ro_region
                        heap_segment* start_rw_region = heap_segment_next (tail_ro_region);

                        heap_segment* hpd_tail_ro_region = generation_tail_ro_region (hpd_gen);
                        if (hpd_tail_ro_region != nullptr)
                        {
                            // insert the list of r/o regions between the r/o and the r/w regions already present
                            heap_segment_next (tail_ro_region) = heap_segment_next (hpd_tail_ro_region);
                            heap_segment_next (hpd_tail_ro_region) = start_region;
                        }
                        else
                        {
                            // put the list of r/o regions before the r/w regions present
                            heap_segment_next (tail_ro_region) = generation_start_segment (hpd_gen);
                            generation_start_segment (hpd_gen) = start_region;
                        }
                        generation_tail_ro_region (hpd_gen) = tail_ro_region;

                        // we took care of our r/o regions, we still have to do the r/w regions
                        start_region = start_rw_region;
                    }
                    // put the r/w regions at the tail of hpd_gen
                    heap_segment* hpd_tail_region = generation_tail_region (hpd_gen);
                    heap_segment_next (hpd_tail_region) = start_region;
                    generation_tail_region (hpd_gen) = tail_region;

                    generation_start_segment (gen) = nullptr;
                    generation_tail_ro_region (gen) = nullptr;
                    generation_tail_region (gen) = nullptr;
                }
            }
        }

        // transfer the free regions from the heaps going idle
        for (int i = new_n_heaps; i < old_n_heaps; i++)
        {
            gc_heap* hp = g_heaps[i];
            int dest_heap_number = i % new_n_heaps;
            gc_heap* hpd = g_heaps[dest_heap_number];

            for (int kind = 0; kind < count_free_region_kinds; kind++)
            {
                hpd->free_regions[kind].transfer_regions(&hp->free_regions[kind]);
            }
        }
        dprintf (9999, ("h%d changing %d->%d", heap_number, n_heaps, new_n_heaps));
        n_heaps = new_n_heaps;

        // even out the regions over the current number of heaps
        equalize_promoted_bytes (max_generation);

        // establish invariants for the heaps now in operation
        for (int i = 0; i < new_n_heaps; i++)
        {
            gc_heap* hp = g_heaps[i];

            hp->gen0_bricks_cleared = unified_gen0_bricks_cleared;

            // establish invariants regarding the ephemeral segment
            generation* gen0 = hp->generation_of (0);
            if ((hp->ephemeral_heap_segment == nullptr) ||
                (heap_segment_heap (hp->ephemeral_heap_segment) != hp))
            {
                hp->ephemeral_heap_segment = heap_segment_rw (generation_start_segment (gen0));
                hp->alloc_allocated = heap_segment_allocated (hp->ephemeral_heap_segment);
            }

            for (int gen_idx = 0; gen_idx < total_generation_count; gen_idx++)
            {
                // establish invariants regarding the allocation segment
                generation* gen = hp->generation_of (gen_idx);
                heap_segment *allocation_region = generation_allocation_segment (gen);
                if ((allocation_region == nullptr) ||
                    (heap_segment_heap (allocation_region) != hp))
                {
                    generation_allocation_segment (gen) = heap_segment_rw (generation_start_segment (gen));
                }

                // we shifted regions around, but we have no way to properly account for the small free spaces
                // it's safest to set this to 0, otherwise size computations in compute_new_dynamic_data
                // may overflow
                generation_free_obj_space (gen) = 0;
            }
        }
    }

    dprintf (3, ("individual heap%d changing %d->%d", heap_number, n_heaps, new_n_heaps));

    if (!init_only_p)
    {
        // join for rethreading the free lists
        gc_t_join.join (this, gc_join_merge_temp_fl);
        if (gc_t_join.joined ())
        {
#ifdef BACKGROUND_GC
            // For now I'm always setting it to true. This should be set based on heuristics like the number of
            // FL items. I'm currently rethreading all generations' FL except gen2's. When the next GC happens,
            // it will be a BGC (unless it's a blocking gen2 which also works). And when BGC sweep starts we will
            // build the gen2 FL from scratch.
            trigger_bgc_for_rethreading_p = true;
#endif //BACKGROUND_GC
            gc_t_join.restart ();
        }

        // rethread the free lists
        for (int gen_idx = 0; gen_idx < total_generation_count; gen_idx++)
        {
            bool do_rethreading = true;

#ifdef BACKGROUND_GC
            if (trigger_bgc_for_rethreading_p && (gen_idx == max_generation))
            {
                do_rethreading = false;
            }
#endif //BACKGROUND_GC

            if (do_rethreading)
            {
                if (heap_number < old_n_heaps)
                {
                    dprintf (3, ("h%d calling per heap work!", heap_number));
                    rethread_fl_items (gen_idx);
                }

                // join for merging the free lists
                gc_t_join.join (this, gc_join_merge_temp_fl);
                if (gc_t_join.joined ())
                {
                    merge_fl_from_other_heaps (gen_idx, new_n_heaps, old_n_heaps);

                    gc_t_join.restart ();
                }
            }
        }

#ifdef BACKGROUND_GC
        // there should be no items in the bgc_alloc_lock
        bgc_alloc_lock->check();
#endif //BACKGROUND_GC
    }

    if (heap_number == 0)
    {
        // compute the total budget per generation over the old heaps
        // and figure out what the new budget per heap is
        ptrdiff_t new_alloc_per_heap[total_generation_count];
        size_t desired_alloc_per_heap[total_generation_count];
        for (int gen_idx = 0; gen_idx < total_generation_count; gen_idx++)
        {
            ptrdiff_t total_new_alloc = 0;
            size_t total_desired_alloc = 0;
            for (int i = 0; i < old_n_heaps; i++)
            {
                gc_heap* hp = g_heaps[i];

                dynamic_data* dd = hp->dynamic_data_of (gen_idx);
                total_new_alloc += dd_new_allocation (dd);
                total_desired_alloc += dd_desired_allocation (dd);
            }
            // distribute the total budget for this generation over all new heaps if we are increasing heap count,
            // but keep the budget per heap if we are decreasing heap count
            int max_n_heaps = max (old_n_heaps, new_n_heaps);
            new_alloc_per_heap[gen_idx] = Align (total_new_alloc / max_n_heaps, get_alignment_constant (gen_idx <= max_generation));
            desired_alloc_per_heap[gen_idx] = Align (total_desired_alloc / max_n_heaps, get_alignment_constant (gen_idx <= max_generation));
            size_t allocated_in_budget = total_desired_alloc - total_new_alloc;
            dprintf (6666, ("g%d: total budget %zd (%zd / heap), left in budget: %zd (%zd / heap), (allocated %Id, %.3f%%), min %zd",
                gen_idx, total_desired_alloc, desired_alloc_per_heap[gen_idx],
                total_new_alloc, new_alloc_per_heap[gen_idx],
                allocated_in_budget, ((double)allocated_in_budget * 100.0 / (double)total_desired_alloc),
                dd_min_size (g_heaps[0]->dynamic_data_of (gen_idx))));
        }

        // distribute the new budget per heap over the new heaps
        // and recompute the current size of the generation
        for (int i = 0; i < new_n_heaps; i++)
        {
            gc_heap* hp = g_heaps[i];

            for (int gen_idx = 0; gen_idx < total_generation_count; gen_idx++)
            {
                // distribute the total leftover budget over all heaps.
                dynamic_data* dd = hp->dynamic_data_of (gen_idx);
                dd_new_allocation (dd) = new_alloc_per_heap[gen_idx];
                dd_desired_allocation (dd) = max (desired_alloc_per_heap[gen_idx], dd_min_size (dd));

                // recompute dd_fragmentation and dd_current_size
                generation* gen = hp->generation_of (gen_idx);
                size_t gen_size = hp->generation_size (gen_idx);
                dd_fragmentation (dd) = generation_free_list_space (gen);
                if (gen_idx == max_generation)
                {
                    // Just set it to 0 so it doesn't cause any problems. The next GC which will be a gen2 will update it to the correct value.
                    dd_current_size (dd) = 0;
                }
                else
                {
                    // We cannot assert this for gen2 because we didn't actually rethread gen2 FL.
                    assert (gen_size >= dd_fragmentation (dd));
                    dd_current_size (dd) = gen_size - dd_fragmentation (dd);
                }

                dprintf (3, ("h%d g%d: budget: %zd, left in budget: %zd, generation_size: %zd fragmentation: %zd current_size: %zd",
                    i,
                    gen_idx,
                    desired_alloc_per_heap[gen_idx],
                    new_alloc_per_heap[gen_idx],
                    gen_size,
                    dd_fragmentation (dd),
                    dd_current_size (dd)));
            }
        }

        // put heaps that going idle now into the decommissioned state
        for (int i = n_heaps; i < old_n_heaps; i++)
        {
            gc_heap* hp = g_heaps[i];

            hp->decommission_heap();
        }

        if (!init_only_p)
        {
            // make sure no allocation contexts point to idle heaps
            fix_allocation_contexts_heaps();
        }

        dynamic_heap_count_data.last_n_heaps = old_n_heaps;
    }

    // join the last time to change the heap count again if needed.
    if (new_n_heaps < old_n_heaps)
    {
        gc_t_join.join (this, gc_join_merge_temp_fl);
        if (gc_t_join.joined ())
        {
            dprintf (9999, ("now changing the join heap count to the smaller one %d", new_n_heaps));
            gc_t_join.update_n_threads (new_n_heaps);

            gc_t_join.restart ();
        }
    }

    if (heap_number == 0)
    {
        add_to_hc_history (hc_record_change_done);
        change_heap_count_time = GetHighPrecisionTimeStamp() - start_time;
        total_change_heap_count_time += change_heap_count_time;
        total_change_heap_count++;
        dprintf (6666, ("changing HC took %I64dus", change_heap_count_time));
    }

    return true;
}

void gc_heap::get_msl_wait_time (size_t* soh_msl_wait_time, size_t* uoh_msl_wait_time)
{
    assert (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes);

    *soh_msl_wait_time = 0;
    *uoh_msl_wait_time = 0;

    for (int i = 0; i < n_heaps; i++)
    {
        gc_heap* hp = g_heaps[i];

        soh_msl_wait_time += hp->more_space_lock_soh.msl_wait_time;
        hp->more_space_lock_soh.msl_wait_time = 0;

        uoh_msl_wait_time += hp->more_space_lock_uoh.msl_wait_time;
        hp->more_space_lock_uoh.msl_wait_time = 0;
    }
}

void gc_heap::process_datas_sample()
{
    // We get the time here instead of waiting till we assign end_gc_time because end_gc_time includes distribute_free_regions
    // but we need to get the budget from DATAS before we call distribute_free_regions. distribute_free_regions takes < 1% of
    // the GC pause so it's ok to not count it. The GC elapsed time DATAS records uses this timestamp instead of end_gc_time.
    before_distribute_free_regions_time = GetHighPrecisionTimeStamp();
    dynamic_data* dd0 = g_heaps[0]->dynamic_data_of (0);
    uint64_t gc_pause_time = before_distribute_free_regions_time - dd_time_clock (dd0);

    size_t desired_per_heap = dd_desired_allocation (dd0);
    if (settings.gc_index > 1)
    {
        size_t gc_index = VolatileLoadWithoutBarrier (&settings.gc_index);
        dynamic_heap_count_data_t::sample& sample = dynamic_heap_count_data.samples[dynamic_heap_count_data.sample_index];
        sample.elapsed_between_gcs = before_distribute_free_regions_time - last_suspended_end_time;
        sample.gc_pause_time = gc_pause_time;
        size_t soh_msl_wait_time, uoh_msl_wait_time;
        get_msl_wait_time (&soh_msl_wait_time, &uoh_msl_wait_time);
        sample.msl_wait_time = soh_msl_wait_time + uoh_msl_wait_time;
        sample.gc_index = gc_index;
        // could cache this - we will get it again soon in do_post_gc
        sample.gc_survived_size = get_total_promoted();

        // We check to see if we want to adjust the budget here for DATAS.
        size_t desired_per_heap_datas = desired_per_heap;
        float tcp = (sample.elapsed_between_gcs ?
            (((float)sample.msl_wait_time / n_heaps + sample.gc_pause_time) * 100.0f / (float)sample.elapsed_between_gcs) : 0.0f);
        size_t total_soh_stable_size = get_total_soh_stable_size();
        desired_per_heap_datas = dynamic_heap_count_data.compute_gen0_budget_per_heap (total_soh_stable_size, tcp, desired_per_heap);
        dprintf (6666, ("gen0 new_alloc %Id (%.3fmb), from datas: %Id (%.3fmb)",
            desired_per_heap, mb (desired_per_heap), desired_per_heap_datas, mb (desired_per_heap_datas)));
        dprintf (6666, ("budget DATAS %Id, previous %Id", desired_per_heap_datas, desired_per_heap));

        sample.gen0_budget_per_heap = (int)desired_per_heap_datas;
        if (desired_per_heap_datas != desired_per_heap)
        {
            dprintf (6666, ("adjusted budget for DATAS, assigning to all heaps"));
            assign_new_budget (0, desired_per_heap_datas);
        }

        dprintf (6666, ("sample#%d: %d heaps, this GC end %I64d - last sus end %I64d = %I64d, this GC pause %.3fms, msl wait %I64dus, tcp %.3f, surv %zd, gc speed %.3fmb/ms (%.3fkb/ms/heap)",
            dynamic_heap_count_data.sample_index, n_heaps, before_distribute_free_regions_time, last_suspended_end_time, sample.elapsed_between_gcs,
            (sample.gc_pause_time / 1000.0), sample.msl_wait_time, tcp, sample.gc_survived_size,
            (sample.gc_pause_time ? (sample.gc_survived_size / 1000.0 / sample.gc_pause_time) : 0),
            (sample.gc_pause_time ? ((float)sample.gc_survived_size / sample.gc_pause_time / n_heaps) : 0)));

#ifdef FEATURE_EVENT_TRACE
        GCEventFireSizeAdaptationSample_V1 (
            (uint64_t)gc_index,
            (uint32_t)sample.elapsed_between_gcs,
            (uint32_t)sample.gc_pause_time,
            (uint32_t)soh_msl_wait_time, (uint32_t)uoh_msl_wait_time,
            (uint64_t)total_soh_stable_size, (uint32_t)sample.gen0_budget_per_heap);
#endif //FEATURE_EVENT_TRACE

        dynamic_heap_count_data.sample_index = (dynamic_heap_count_data.sample_index + 1) % dynamic_heap_count_data_t::sample_size;
        (dynamic_heap_count_data.current_samples_count)++;

        if (settings.condemned_generation == max_generation)
        {
            gc_index_full_gc_end = dd_gc_clock (dd0);
            dynamic_heap_count_data_t::gen2_sample& last_g2_sample = dynamic_heap_count_data.get_last_gen2_sample();
            uint64_t prev_gen2_end_time = dd_previous_time_clock (g_heaps[0]->dynamic_data_of (max_generation)) + last_g2_sample.gc_duration;
            size_t elapsed_between_gen2_gcs = before_distribute_free_regions_time - prev_gen2_end_time;
            size_t gen2_elapsed_time = sample.gc_pause_time;
            dynamic_heap_count_data_t::gen2_sample& g2_sample = dynamic_heap_count_data.get_current_gen2_sample();
            g2_sample.gc_index = VolatileLoadWithoutBarrier (&(settings.gc_index));
            g2_sample.gc_duration = gen2_elapsed_time;
            g2_sample.gc_percent = (float)gen2_elapsed_time * 100.0f / elapsed_between_gen2_gcs;
            (dynamic_heap_count_data.current_gen2_samples_count)++;

            dprintf (6666, ("gen2 sample#%d: this GC end %I64d - last gen2 end %I64d = %I64d, GC elapsed %I64d, percent %.3f",
                dynamic_heap_count_data.gen2_sample_index, before_distribute_free_regions_time, prev_gen2_end_time, elapsed_between_gen2_gcs, gen2_elapsed_time, g2_sample.gc_percent));
            dynamic_heap_count_data.gen2_sample_index = (dynamic_heap_count_data.gen2_sample_index + 1) % dynamic_heap_count_data_t::sample_size;
        }

        calculate_new_heap_count ();
    }
    else
    {
        // For DATAS we can't just take the BCS because it's likely very large and that could totally make the max heap size larger. We just take the
        // min budget.
        size_t min_desired = dd_min_size (dd0);
        if (min_desired != desired_per_heap)
        {
            dprintf (6666, ("use the min budget for DATAS, assigning to all heaps"));
            assign_new_budget (0, min_desired);
        }
    }

    last_suspended_end_time = before_distribute_free_regions_time;
}

void gc_heap::add_to_hc_history_worker (hc_history* hist, int* current_index, hc_record_stage stage, const char* msg)
{
    dprintf (6666, ("h%d ADDING %s HC hist to entry #%d, stage %d, gc index %Id, last %d, n %d, new %d",
        heap_number, msg, *current_index, (int)stage, VolatileLoadWithoutBarrier (&settings.gc_index),
        dynamic_heap_count_data.last_n_heaps, n_heaps, dynamic_heap_count_data.new_n_heaps));
    hc_history* current_hist = &hist[*current_index];
    current_hist->gc_index = VolatileLoadWithoutBarrier (&settings.gc_index);
    current_hist->stage = (short)stage;
    current_hist->last_n_heaps = (short)dynamic_heap_count_data.last_n_heaps;
    current_hist->n_heaps = (short)n_heaps;
    current_hist->new_n_heaps = (short)dynamic_heap_count_data.new_n_heaps;
    current_hist->idle_thread_count = (short)dynamic_heap_count_data.idle_thread_count;
    current_hist->gc_t_join_n_threads = (short)gc_t_join.get_num_threads();
    current_hist->gc_t_join_join_lock = (short)gc_t_join.get_join_lock();
    current_hist->gc_t_join_joined_p = (bool)gc_t_join.joined();
#ifdef BACKGROUND_GC
    current_hist->bgc_t_join_n_threads = (short)bgc_t_join.get_num_threads();
    current_hist->bgc_t_join_join_lock = (short)bgc_t_join.get_join_lock();
    current_hist->bgc_t_join_joined_p = (bool)bgc_t_join.joined();
    current_hist->concurrent_p = (bool)settings.concurrent;
    current_hist->bgc_thread_running = (bool)bgc_thread_running;
    int bgc_thread_os_id = 0;
    if (bgc_thread)
    {
        bgc_thread_os_id = (int) GCToEEInterface::GetThreadOSThreadId(bgc_thread);
    }
    current_hist->bgc_thread_os_id = bgc_thread_os_id;
#endif //BACKGROUND_GC

    *current_index  = (*current_index + 1) % max_hc_history_count;
}

void gc_heap::add_to_hc_history (hc_record_stage stage)
{
    add_to_hc_history_worker (hchist_per_heap, &hchist_index_per_heap, stage, "GC");
}

void gc_heap::add_to_bgc_hc_history (hc_record_stage stage)
{
    add_to_hc_history_worker (bgc_hchist_per_heap, &bgc_hchist_index_per_heap, stage, "BGC");
}
#endif //DYNAMIC_HEAP_COUNT
#endif //USE_REGIONS


#if !defined(USE_REGIONS) || defined(_DEBUG)
inline
void gc_heap::init_promoted_bytes()
{
#ifdef MULTIPLE_HEAPS
    g_promoted [heap_number*16] = 0;
#else //MULTIPLE_HEAPS
    g_promoted = 0;
#endif //MULTIPLE_HEAPS
}

size_t& gc_heap::promoted_bytes (int thread)
{
#ifdef MULTIPLE_HEAPS
    return g_promoted [thread*16];
#else //MULTIPLE_HEAPS
    UNREFERENCED_PARAMETER(thread);
    return g_promoted;
#endif //MULTIPLE_HEAPS
}
#endif //!USE_REGIONS || _DEBUG

inline
void gc_heap::add_to_promoted_bytes (uint8_t* object, int thread)
{
    size_t obj_size = size (object);
    add_to_promoted_bytes (object, obj_size, thread);
}

inline
void gc_heap::add_to_promoted_bytes (uint8_t* object, size_t obj_size, int thread)
{
    assert (thread == heap_number);

#ifdef USE_REGIONS
    if (survived_per_region)
    {
        survived_per_region[get_basic_region_index_for_address (object)] += obj_size;
    }
#endif //USE_REGIONS

#if !defined(USE_REGIONS) || defined(_DEBUG)
#ifdef MULTIPLE_HEAPS
    g_promoted [heap_number*16] += obj_size;
#else //MULTIPLE_HEAPS
    g_promoted += obj_size;
#endif //MULTIPLE_HEAPS
#endif //!USE_REGIONS || _DEBUG

#ifdef _DEBUG
    // Verify we keep the 2 recordings in sync.
    //get_promoted_bytes();
#endif //_DEBUG
}

heap_segment* gc_heap::find_segment (uint8_t* interior, BOOL small_segment_only_p)
{
    heap_segment* seg = seg_mapping_table_segment_of (interior);
    if (seg)
    {
        if (small_segment_only_p && heap_segment_uoh_p (seg))
            return 0;
    }
    return seg;
}

#if !defined(_DEBUG) && !defined(__GNUC__)
inline // This causes link errors if global optimization is off
#endif //!_DEBUG && !__GNUC__
gc_heap* gc_heap::heap_of (uint8_t* o)
{
#ifdef MULTIPLE_HEAPS
    if (o == 0)
        return g_heaps [0];
    gc_heap* hp = seg_mapping_table_heap_of (o);
    return (hp ? hp : g_heaps[0]);
#else //MULTIPLE_HEAPS
    UNREFERENCED_PARAMETER(o);
    return __this;
#endif //MULTIPLE_HEAPS
}

inline
gc_heap* gc_heap::heap_of_gc (uint8_t* o)
{
#ifdef MULTIPLE_HEAPS
    if (o == 0)
        return g_heaps [0];
    gc_heap* hp = seg_mapping_table_heap_of_gc (o);
    return (hp ? hp : g_heaps[0]);
#else //MULTIPLE_HEAPS
    UNREFERENCED_PARAMETER(o);
    return __this;
#endif //MULTIPLE_HEAPS
}

// will find all heap objects (large and small)
//
// Callers of this method need to guarantee the interior pointer is within the heap range.
//
// If you need it to be stricter, eg if you only want to find an object in ephemeral range,
// you should make sure interior is within that range before calling this method.
uint8_t* gc_heap::find_object (uint8_t* interior)
{
    assert (interior != 0);

    if (!gen0_bricks_cleared)
    {
#ifdef MULTIPLE_HEAPS
        assert (!"Should have already been done in server GC");
#endif //MULTIPLE_HEAPS
        clear_gen0_bricks();
    }
    //indicate that in the future this needs to be done during allocation
    gen0_must_clear_bricks = FFIND_DECAY;

    int brick_entry = get_brick_entry(brick_of (interior));
    if (brick_entry == 0)
    {
        // this is a pointer to a UOH object
        heap_segment* seg = find_segment (interior, FALSE);
        if (seg)
        {
#ifdef FEATURE_CONSERVATIVE_GC
            if (interior >= heap_segment_allocated(seg))
                return 0;
#endif
            // If interior falls within the first free object at the beginning of a generation,
            // we don't have brick entry for it, and we may incorrectly treat it as on large object heap.
            int align_const = get_alignment_constant (heap_segment_read_only_p (seg)
#ifdef FEATURE_CONSERVATIVE_GC
                                                       || (GCConfig::GetConservativeGC() && !heap_segment_uoh_p (seg))
#endif
                                                      );
            assert (interior < heap_segment_allocated (seg));

            uint8_t* o = heap_segment_mem (seg);
            while (o < heap_segment_allocated (seg))
            {
                uint8_t* next_o = o + Align (size (o), align_const);
                assert (next_o > o);
                if ((o <= interior) && (interior < next_o))
                    return o;
                o = next_o;
            }
            return 0;
        }
        else
        {
            return 0;
        }
    }
    else
    {
        heap_segment* seg = find_segment (interior, TRUE);
        if (seg)
        {
#ifdef FEATURE_CONSERVATIVE_GC
            if (interior >= heap_segment_allocated (seg))
                return 0;
#else
            assert (interior < heap_segment_allocated (seg));
#endif
            uint8_t* o = find_first_object (interior, heap_segment_mem (seg));
            return o;
        }
        else
            return 0;
    }
}

#ifdef MULTIPLE_HEAPS

#ifdef GC_CONFIG_DRIVEN
#define m_boundary(o) {if (mark_list_index <= mark_list_end) {*mark_list_index = o;mark_list_index++;} else {mark_list_index++;}}
#else //GC_CONFIG_DRIVEN
#define m_boundary(o) {if (mark_list_index <= mark_list_end) {*mark_list_index = o;mark_list_index++;}}
#endif //GC_CONFIG_DRIVEN

#define m_boundary_fullgc(o) {}

#else //MULTIPLE_HEAPS

#ifdef GC_CONFIG_DRIVEN
#define m_boundary(o) {if (mark_list_index <= mark_list_end) {*mark_list_index = o;mark_list_index++;} else {mark_list_index++;} if (slow > o) slow = o; if (shigh < o) shigh = o;}
#else
#define m_boundary(o) {if (mark_list_index <= mark_list_end) {*mark_list_index = o;mark_list_index++;}if (slow > o) slow = o; if (shigh < o) shigh = o;}
#endif //GC_CONFIG_DRIVEN

#define m_boundary_fullgc(o) {if (slow > o) slow = o; if (shigh < o) shigh = o;}

#endif //MULTIPLE_HEAPS

inline
BOOL gc_heap::gc_mark1 (uint8_t* o)
{
    BOOL marked = !marked (o);
    set_marked (o);
    dprintf (3, ("*%zx*, newly marked: %d", (size_t)o, marked));
#if defined(USE_REGIONS) && defined(_DEBUG)
    heap_segment* seg = seg_mapping_table_segment_of (o);
    if (o > heap_segment_allocated (seg))
    {
        dprintf (REGIONS_LOG, ("%p is in seg %zx(%p) but beyond alloc %p!!",
            o, (size_t)seg, heap_segment_mem (seg), heap_segment_allocated (seg)));
        GCToOSInterface::DebugBreak();
    }
#endif //USE_REGIONS && _DEBUG
    return marked;
}

#ifdef USE_REGIONS
inline bool is_in_heap_range (uint8_t* o)
{
#ifdef FEATURE_BASICFREEZE
    // we may have frozen objects in read only segments
    // outside of the reserved address range of the gc heap
    assert (((g_gc_lowest_address <= o) && (o < g_gc_highest_address)) ||
        (o == nullptr) || (ro_segment_lookup (o) != nullptr));
    return ((g_gc_lowest_address <= o) && (o < g_gc_highest_address));
#else //FEATURE_BASICFREEZE
    // without frozen objects, every non-null pointer must be
    // within the heap
    assert ((o == nullptr) || (g_gc_lowest_address <= o) && (o < g_gc_highest_address));
    return (o != nullptr);
#endif //FEATURE_BASICFREEZE
}

inline bool gc_heap::is_in_gc_range (uint8_t* o)
{
#ifdef FEATURE_BASICFREEZE
    // we may have frozen objects in read only segments
    // outside of the reserved address range of the gc heap
    assert (((g_gc_lowest_address <= o) && (o < g_gc_highest_address)) ||
        (o == nullptr) || (ro_segment_lookup (o) != nullptr));
#else //FEATURE_BASICFREEZE
    // without frozen objects, every non-null pointer must be
    // within the heap
    assert ((o == nullptr) || (g_gc_lowest_address <= o) && (o < g_gc_highest_address));
#endif //FEATURE_BASICFREEZE
    return ((gc_low <= o) && (o < gc_high));
}
#endif //USE_REGIONS

inline
BOOL gc_heap::gc_mark (uint8_t* o, uint8_t* low, uint8_t* high, int condemned_gen)
{
#ifdef USE_REGIONS
    if ((o >= low) && (o < high))
    {
        if (condemned_gen != max_generation && get_region_gen_num (o) > condemned_gen)
        {
            return FALSE;
        }
        BOOL already_marked = marked (o);
        if (already_marked)
        {
            return FALSE;
        }
        set_marked (o);
        return TRUE;
    }
    return FALSE;
#else //USE_REGIONS
    assert (condemned_gen == -1);

    BOOL marked = FALSE;
    if ((o >= low) && (o < high))
        marked = gc_mark1 (o);
#ifdef MULTIPLE_HEAPS
    else if (o)
    {
        gc_heap* hp = heap_of_gc (o);
        assert (hp);
        if ((o >= hp->gc_low) && (o < hp->gc_high))
            marked = gc_mark1 (o);
    }
#ifdef SNOOP_STATS
    snoop_stat.objects_checked_count++;

    if (marked)
    {
        snoop_stat.objects_marked_count++;
    }
    if (!o)
    {
        snoop_stat.zero_ref_count++;
    }

#endif //SNOOP_STATS
#endif //MULTIPLE_HEAPS
    return marked;
#endif //USE_REGIONS
}

#ifdef BACKGROUND_GC

inline
BOOL gc_heap::background_marked (uint8_t* o)
{
    return mark_array_marked (o);
}
inline
BOOL gc_heap::background_mark1 (uint8_t* o)
{
    BOOL to_mark = !mark_array_marked (o);

    dprintf (3, ("b*%zx*b(%d)", (size_t)o, (to_mark ? 1 : 0)));
    if (to_mark)
    {
        mark_array_set_marked (o);
        dprintf (4, ("n*%zx*n", (size_t)o));
        return TRUE;
    }
    else
        return FALSE;
}

// TODO: we could consider filtering out NULL's here instead of going to
// look for it on other heaps
inline
BOOL gc_heap::background_mark (uint8_t* o, uint8_t* low, uint8_t* high)
{
    BOOL marked = FALSE;
    if ((o >= low) && (o < high))
        marked = background_mark1 (o);
#ifdef MULTIPLE_HEAPS
    else if (o)
    {
        gc_heap* hp = heap_of (o);
        assert (hp);
        if ((o >= hp->background_saved_lowest_address) && (o < hp->background_saved_highest_address))
            marked = background_mark1 (o);
    }
#endif //MULTIPLE_HEAPS
    return marked;
}

#endif //BACKGROUND_GC

#define new_start() {if (ppstop <= start) {break;} else {parm = start}}
#define ignore_start 0
#define use_start 1

#define go_through_object(mt,o,size,parm,start,start_useful,limit,exp)      \
{                                                                           \
    CGCDesc* map = CGCDesc::GetCGCDescFromMT((MethodTable*)(mt));           \
    CGCDescSeries* cur = map->GetHighestSeries();                           \
    ptrdiff_t cnt = (ptrdiff_t) map->GetNumSeries();                        \
                                                                            \
    if (cnt >= 0)                                                           \
    {                                                                       \
        CGCDescSeries* last = map->GetLowestSeries();                       \
        uint8_t** parm = 0;                                                 \
        do                                                                  \
        {                                                                   \
            assert (parm <= (uint8_t**)((o) + cur->GetSeriesOffset()));     \
            parm = (uint8_t**)((o) + cur->GetSeriesOffset());               \
            uint8_t** ppstop =                                              \
                (uint8_t**)((uint8_t*)parm + cur->GetSeriesSize() + (size));\
            if (!start_useful || (uint8_t*)ppstop > (start))                \
            {                                                               \
                if (start_useful && (uint8_t*)parm < (start)) parm = (uint8_t**)(start);\
                while (parm < ppstop)                                       \
                {                                                           \
                   {exp}                                                    \
                   parm++;                                                  \
                }                                                           \
            }                                                               \
            cur--;                                                          \
                                                                            \
        } while (cur >= last);                                              \
    }                                                                       \
    else                                                                    \
    {                                                                       \
        /* Handle the repeating case - array of valuetypes */               \
        uint8_t** parm = (uint8_t**)((o) + cur->startoffset);               \
        if (start_useful && start > (uint8_t*)parm)                         \
        {                                                                   \
            ptrdiff_t cs = mt->RawGetComponentSize();                         \
            parm = (uint8_t**)((uint8_t*)parm + (((start) - (uint8_t*)parm)/cs)*cs); \
        }                                                                   \
        while ((uint8_t*)parm < ((o)+(size)-plug_skew))                     \
        {                                                                   \
            for (ptrdiff_t __i = 0; __i > cnt; __i--)                         \
            {                                                               \
                HALF_SIZE_T skip =  (cur->val_serie + __i)->skip;           \
                HALF_SIZE_T nptrs = (cur->val_serie + __i)->nptrs;          \
                uint8_t** ppstop = parm + nptrs;                            \
                if (!start_useful || (uint8_t*)ppstop > (start))            \
                {                                                           \
                    if (start_useful && (uint8_t*)parm < (start)) parm = (uint8_t**)(start);      \
                    do                                                      \
                    {                                                       \
                       {exp}                                                \
                       parm++;                                              \
                    } while (parm < ppstop);                                \
                }                                                           \
                parm = (uint8_t**)((uint8_t*)ppstop + skip);                \
            }                                                               \
        }                                                                   \
    }                                                                       \
}

#define go_through_object_nostart(mt,o,size,parm,exp) {go_through_object(mt,o,size,parm,o,ignore_start,(o + size),exp); }

// 1 thing to note about this macro:
// 1) you can use *parm safely but in general you don't want to use parm
// because for the collectible types it's not an address on the managed heap.
#ifndef COLLECTIBLE_CLASS
#define go_through_object_cl(mt,o,size,parm,exp)                            \
{                                                                           \
    if (header(o)->ContainsGCPointers())                                      \
    {                                                                       \
        go_through_object_nostart(mt,o,size,parm,exp);                      \
    }                                                                       \
}
#else //COLLECTIBLE_CLASS
#define go_through_object_cl(mt,o,size,parm,exp)                            \
{                                                                           \
    if (header(o)->Collectible())                                           \
    {                                                                       \
        uint8_t* class_obj = get_class_object (o);                             \
        uint8_t** parm = &class_obj;                                           \
        do {exp} while (false);                                             \
    }                                                                       \
    if (header(o)->ContainsGCPointers())                                      \
    {                                                                       \
        go_through_object_nostart(mt,o,size,parm,exp);                      \
    }                                                                       \
}
#endif //COLLECTIBLE_CLASS

// This starts a plug. But mark_stack_tos isn't increased until set_pinned_info is called.
void gc_heap::enque_pinned_plug (uint8_t* plug,
                                 BOOL save_pre_plug_info_p,
                                 uint8_t* last_object_in_last_plug)
{
    if (mark_stack_array_length <= mark_stack_tos)
    {
        if (!grow_mark_stack (mark_stack_array, mark_stack_array_length, MARK_STACK_INITIAL_LENGTH))
        {
            // we don't want to continue here due to security
            // risks. This happens very rarely and fixing it in the
            // way so that we can continue is a bit involved and will
            // not be done in Dev10.
            GCToEEInterface::HandleFatalError((unsigned int)CORINFO_EXCEPTION_GC);
        }
    }

    dprintf (3, ("enqueuing P #%zd(%p): %p. oldest: %zd, LO: %p, pre: %d",
        mark_stack_tos, &mark_stack_array[mark_stack_tos], plug, mark_stack_bos, last_object_in_last_plug, (save_pre_plug_info_p ? 1 : 0)));
    mark& m = mark_stack_array[mark_stack_tos];
    m.first = plug;
    // Must be set now because if we have a short object we'll need the value of saved_pre_p.
    m.saved_pre_p = save_pre_plug_info_p;

    if (save_pre_plug_info_p)
    {
        // In the case of short plugs or doubly linked free lists, there may be extra bits
        // set in the method table pointer.
        // Clear these bits for the copy saved in saved_pre_plug, but not for the copy
        // saved in saved_pre_plug_reloc.
        // This is because we need these bits for compaction, but not for mark & sweep.
        size_t special_bits = clear_special_bits (last_object_in_last_plug);
        // now copy the bits over
        memcpy (&(m.saved_pre_plug), &(((plug_and_gap*)plug)[-1]), sizeof (gap_reloc_pair));
        // restore the bits in the original
        set_special_bits (last_object_in_last_plug, special_bits);

        memcpy (&(m.saved_pre_plug_reloc), &(((plug_and_gap*)plug)[-1]), sizeof (gap_reloc_pair));

        // If the last object in the last plug is too short, it requires special handling.
        size_t last_obj_size = plug - last_object_in_last_plug;
        if (last_obj_size < min_pre_pin_obj_size)
        {
            record_interesting_data_point (idp_pre_short);
#ifdef SHORT_PLUGS
            if (is_plug_padded (last_object_in_last_plug))
                record_interesting_data_point (idp_pre_short_padded);
#endif //SHORT_PLUGS
            dprintf (3, ("encountered a short object %p right before pinned plug %p!",
                         last_object_in_last_plug, plug));
            // Need to set the short bit regardless of having refs or not because we need to
            // indicate that this object is not walkable.
            m.set_pre_short();

#ifdef COLLECTIBLE_CLASS
            if (is_collectible (last_object_in_last_plug))
            {
                m.set_pre_short_collectible();
            }
#endif //COLLECTIBLE_CLASS

            if (contain_pointers (last_object_in_last_plug))
            {
                dprintf (3, ("short object: %p(%zx)", last_object_in_last_plug, last_obj_size));

                go_through_object_nostart (method_table(last_object_in_last_plug), last_object_in_last_plug, last_obj_size, pval,
                    {
                        size_t gap_offset = (((size_t)pval - (size_t)(plug - sizeof (gap_reloc_pair) - plug_skew))) / sizeof (uint8_t*);
                        dprintf (3, ("member: %p->%p, %zd ptrs from beginning of gap", (uint8_t*)pval, *pval, gap_offset));
                        m.set_pre_short_bit (gap_offset);
                    }
                );
            }
        }
    }

    m.saved_post_p = FALSE;
}

void gc_heap::save_post_plug_info (uint8_t* last_pinned_plug, uint8_t* last_object_in_last_plug, uint8_t* post_plug)
{
#ifndef _DEBUG
    UNREFERENCED_PARAMETER(last_pinned_plug);
#endif //_DEBUG

    mark& m = mark_stack_array[mark_stack_tos - 1];
    assert (last_pinned_plug == m.first);
    m.saved_post_plug_info_start = (uint8_t*)&(((plug_and_gap*)post_plug)[-1]);

    // In the case of short plugs or doubly linked free lists, there may be extra bits
    // set in the method table pointer.
    // Clear these bits for the copy saved in saved_post_plug, but not for the copy
    // saved in saved_post_plug_reloc.
    // This is because we need these bits for compaction, but not for mark & sweep.
    // Note that currently none of these bits will ever be set in the object saved *after*
    // a pinned plug - this object is currently pinned along with the pinned object before it
    size_t special_bits = clear_special_bits (last_object_in_last_plug);
    memcpy (&(m.saved_post_plug), m.saved_post_plug_info_start, sizeof (gap_reloc_pair));
    // restore the bits in the original
    set_special_bits (last_object_in_last_plug, special_bits);

    memcpy (&(m.saved_post_plug_reloc), m.saved_post_plug_info_start, sizeof (gap_reloc_pair));

    // This is important - we need to clear all bits here except the last one.
    m.saved_post_p = TRUE;

#ifdef _DEBUG
    m.saved_post_plug_debug.gap = 1;
#endif //_DEBUG

    dprintf (3, ("PP %p has NP %p right after", last_pinned_plug, post_plug));

    size_t last_obj_size = post_plug - last_object_in_last_plug;
    if (last_obj_size < min_pre_pin_obj_size)
    {
        dprintf (3, ("PP %p last obj %p is too short", last_pinned_plug, last_object_in_last_plug));
        record_interesting_data_point (idp_post_short);
#ifdef SHORT_PLUGS
        if (is_plug_padded (last_object_in_last_plug))
            record_interesting_data_point (idp_post_short_padded);
#endif //SHORT_PLUGS
        m.set_post_short();
#if defined (_DEBUG) && defined (VERIFY_HEAP)
        verify_pinned_queue_p = TRUE;
#endif // _DEBUG && VERIFY_HEAP

#ifdef COLLECTIBLE_CLASS
        if (is_collectible (last_object_in_last_plug))
        {
            m.set_post_short_collectible();
        }
#endif //COLLECTIBLE_CLASS

        if (contain_pointers (last_object_in_last_plug))
        {
            dprintf (3, ("short object: %p(%zx)", last_object_in_last_plug, last_obj_size));

            // TODO: since we won't be able to walk this object in relocation, we still need to
            // take care of collectible assemblies here.
            go_through_object_nostart (method_table(last_object_in_last_plug), last_object_in_last_plug, last_obj_size, pval,
                {
                    size_t gap_offset = (((size_t)pval - (size_t)(post_plug - sizeof (gap_reloc_pair) - plug_skew))) / sizeof (uint8_t*);
                    dprintf (3, ("member: %p->%p, %zd ptrs from beginning of gap", (uint8_t*)pval, *pval, gap_offset));
                    m.set_post_short_bit (gap_offset);
                }
            );
        }
    }
}

// enable on processors known to have a useful prefetch instruction
#if defined(TARGET_AMD64) || defined(TARGET_X86) || defined(TARGET_ARM64) || defined(TARGET_RISCV64)
#define PREFETCH
#endif

#ifdef PREFETCH
inline void Prefetch(void* addr)
{
#ifdef TARGET_WINDOWS

#if defined(TARGET_AMD64) || defined(TARGET_X86)

#ifndef _MM_HINT_T0
#define _MM_HINT_T0 1
#endif
    _mm_prefetch((const char*)addr, _MM_HINT_T0);
#elif defined(TARGET_ARM64)
    __prefetch((const char*)addr);
#endif //defined(TARGET_AMD64) || defined(TARGET_X86)

#elif defined(TARGET_UNIX)
    __builtin_prefetch(addr);
#else //!(TARGET_WINDOWS || TARGET_UNIX)
    UNREFERENCED_PARAMETER(addr);
#endif //TARGET_WINDOWS
}
#else //PREFETCH
inline void Prefetch (void* addr)
{
    UNREFERENCED_PARAMETER(addr);
}
#endif //PREFETCH
#ifdef MH_SC_MARK
inline
VOLATILE(uint8_t*)& gc_heap::ref_mark_stack (gc_heap* hp, int index)
{
    return ((VOLATILE(uint8_t*)*)(hp->mark_stack_array))[index];
}

#endif //MH_SC_MARK

#define stolen 2
#define partial 1
#define partial_object 3
inline
uint8_t* ref_from_slot (uint8_t* r)
{
    return (uint8_t*)((size_t)r & ~(stolen | partial));
}
inline
BOOL stolen_p (uint8_t* r)
{
    return (((size_t)r&2) && !((size_t)r&1));
}
inline
BOOL ready_p (uint8_t* r)
{
    return ((size_t)r != 1);
}
inline
BOOL partial_p (uint8_t* r)
{
    return (((size_t)r&1) && !((size_t)r&2));
}
inline
BOOL straight_ref_p (uint8_t* r)
{
    return (!stolen_p (r) && !partial_p (r));
}
inline
BOOL partial_object_p (uint8_t* r)
{
    return (((size_t)r & partial_object) == partial_object);
}
inline
BOOL ref_p (uint8_t* r)
{
    return (straight_ref_p (r) || partial_object_p (r));
}

mark_queue_t::mark_queue_t()
#ifdef MARK_PHASE_PREFETCH
    : curr_slot_index(0)
#endif //MARK_PHASE_PREFETCH
{
#ifdef MARK_PHASE_PREFETCH
    for (size_t i = 0; i < slot_count; i++)
    {
        slot_table[i] = nullptr;
    }
#endif //MARK_PHASE_PREFETCH
}

// place an object in the mark queue
// returns a *different* object or nullptr
// if a non-null object is returned, that object is newly marked
// object o *must* be in a condemned generation
FORCEINLINE
uint8_t *mark_queue_t::queue_mark(uint8_t *o)
{
#ifdef MARK_PHASE_PREFETCH
    Prefetch (o);

    // while the prefetch is taking effect, park our object in the queue
    // and fetch an object that has been sitting in the queue for a while
    // and where (hopefully) the memory is already in the cache
    size_t slot_index = curr_slot_index;
    uint8_t* old_o = slot_table[slot_index];
    slot_table[slot_index] = o;

    curr_slot_index = (slot_index + 1) % slot_count;
    if (old_o == nullptr)
        return nullptr;
#else //MARK_PHASE_PREFETCH
    uint8_t* old_o = o;
#endif //MARK_PHASE_PREFETCH

    // this causes us to access the method table pointer of the old object
    BOOL already_marked = marked (old_o);
    if (already_marked)
    {
        return nullptr;
    }
    set_marked (old_o);
    return old_o;
}

// place an object in the mark queue
// returns a *different* object or nullptr
// if a non-null object is returned, that object is newly marked
// check first whether the object o is indeed in a condemned generation
FORCEINLINE
uint8_t *mark_queue_t::queue_mark(uint8_t *o, int condemned_gen)
{
#ifdef USE_REGIONS
    if (!is_in_heap_range (o))
    {
        return nullptr;
    }
    if ((condemned_gen != max_generation) && (gc_heap::get_region_gen_num (o) > condemned_gen))
    {
        return nullptr;
    }
    return queue_mark(o);
#else //USE_REGIONS
    assert (condemned_gen == -1);

#ifdef MULTIPLE_HEAPS
    if (o)
    {
        gc_heap* hp = gc_heap::heap_of_gc (o);
        assert (hp);
        if ((o >= hp->gc_low) && (o < hp->gc_high))
            return queue_mark (o);
    }
#else //MULTIPLE_HEAPS
    if ((o >= gc_heap::gc_low) && (o < gc_heap::gc_high))
        return queue_mark (o);
#endif //MULTIPLE_HEAPS
    return nullptr;
#endif //USE_REGIONS
}

// retrieve a newly marked object from the queue
// returns nullptr if there is no such object
uint8_t* mark_queue_t::get_next_marked()
{
#ifdef MARK_PHASE_PREFETCH
    size_t slot_index = curr_slot_index;
    size_t empty_slot_count = 0;
    while (empty_slot_count < slot_count)
    {
        uint8_t* o = slot_table[slot_index];
        slot_table[slot_index] = nullptr;
        slot_index = (slot_index + 1) % slot_count;
        if (o != nullptr)
        {
            BOOL already_marked = marked (o);
            if (!already_marked)
            {
                set_marked (o);
                curr_slot_index = slot_index;
                return o;
            }
        }
        empty_slot_count++;
    }
#endif //MARK_PHASE_PREFETCH
    return nullptr;
}

void mark_queue_t::verify_empty()
{
#ifdef MARK_PHASE_PREFETCH
    for (size_t slot_index = 0; slot_index < slot_count; slot_index++)
    {
        assert(slot_table[slot_index] == nullptr);
    }
#endif //MARK_PHASE_PREFETCH
}

void gc_heap::mark_object_simple1 (uint8_t* oo, uint8_t* start THREAD_NUMBER_DCL)
{
    SERVER_SC_MARK_VOLATILE(uint8_t*)* mark_stack_tos = (SERVER_SC_MARK_VOLATILE(uint8_t*)*)mark_stack_array;
    SERVER_SC_MARK_VOLATILE(uint8_t*)* mark_stack_limit = (SERVER_SC_MARK_VOLATILE(uint8_t*)*)&mark_stack_array[mark_stack_array_length];
    SERVER_SC_MARK_VOLATILE(uint8_t*)* mark_stack_base = mark_stack_tos;

    // If we are doing a full GC we don't use mark list anyway so use m_boundary_fullgc that doesn't
    // update mark list.
    BOOL  full_p = (settings.condemned_generation == max_generation);
    int condemned_gen =
#ifdef USE_REGIONS
        settings.condemned_generation;
#else
        -1;
#endif //USE_REGIONS

    assert ((start >= oo) && (start < oo+size(oo)));

#ifndef MH_SC_MARK
    *mark_stack_tos = oo;
#endif //!MH_SC_MARK

    while (1)
    {
#ifdef MULTIPLE_HEAPS
#else  //MULTIPLE_HEAPS
        const int thread = 0;
#endif //MULTIPLE_HEAPS

        if (oo && ((size_t)oo != 4))
        {
            size_t s = 0;
            if (stolen_p (oo))
            {
                --mark_stack_tos;
                goto next_level;
            }
            else if (!partial_p (oo) && ((s = size (oo)) < (partial_size_th*sizeof (uint8_t*))))
            {
                BOOL overflow_p = FALSE;

                if (mark_stack_tos + (s) /sizeof (uint8_t*) >= (mark_stack_limit  - 1))
                {
                    size_t num_components = ((method_table(oo))->HasComponentSize() ? ((CObjectHeader*)oo)->GetNumComponents() : 0);
                    if (mark_stack_tos + CGCDesc::GetNumPointers(method_table(oo), s, num_components) >= (mark_stack_limit - 1))
                    {
                        overflow_p = TRUE;
                    }
                }

                if (overflow_p == FALSE)
                {
                    dprintf(3,("pushing mark for %zx ", (size_t)oo));

                    go_through_object_cl (method_table(oo), oo, s, ppslot,
                                          {
                                              uint8_t* o = mark_queue.queue_mark(*ppslot, condemned_gen);
                                              if (o != nullptr)
                                              {
                                                  if (full_p)
                                                  {
                                                      m_boundary_fullgc (o);
                                                  }
                                                  else
                                                  {
                                                      m_boundary (o);
                                                  }
                                                  add_to_promoted_bytes (o, thread);
                                                  if (contain_pointers_or_collectible (o))
                                                  {
                                                      *(mark_stack_tos++) = o;
                                                  }
                                              }
                                          }
                        );
                }
                else
                {
                    dprintf(3,("mark stack overflow for object %zx ", (size_t)oo));
                    min_overflow_address = min (min_overflow_address, oo);
                    max_overflow_address = max (max_overflow_address, oo);
                }
            }
            else
            {
                if (partial_p (oo))
                {
                    start = ref_from_slot (oo);
                    oo = ref_from_slot (*(--mark_stack_tos));
                    dprintf (4, ("oo: %zx, start: %zx\n", (size_t)oo, (size_t)start));
                    assert ((oo < start) && (start < (oo + size (oo))));
                }
#ifdef COLLECTIBLE_CLASS
                else
                {
                    // If there's a class object, push it now. We are guaranteed to have the slot since
                    // we just popped one object off.
                    if (is_collectible (oo))
                    {
                        uint8_t* class_obj = get_class_object (oo);
                        if (gc_mark (class_obj, gc_low, gc_high, condemned_gen))
                        {
                            if (full_p)
                            {
                                m_boundary_fullgc (class_obj);
                            }
                            else
                            {
                                m_boundary (class_obj);
                            }

                            add_to_promoted_bytes (class_obj, thread);
                            *(mark_stack_tos++) = class_obj;
                            // The code below expects that the oo is still stored in the stack slot that was
                            // just popped and it "pushes" it back just by incrementing the mark_stack_tos.
                            // But the class_obj has just overwritten that stack slot and so the oo needs to
                            // be stored to the new slot that's pointed to by the mark_stack_tos.
                            *mark_stack_tos = oo;
                        }
                    }

                    if (!contain_pointers (oo))
                    {
                        goto next_level;
                    }
                }
#endif //COLLECTIBLE_CLASS

                s = size (oo);

                BOOL overflow_p = FALSE;

                if (mark_stack_tos + (num_partial_refs + 2)  >= mark_stack_limit)
                {
                    overflow_p = TRUE;
                }
                if (overflow_p == FALSE)
                {
                    dprintf(3,("pushing mark for %zx ", (size_t)oo));

                    //push the object and its current
                    SERVER_SC_MARK_VOLATILE(uint8_t*)* place = ++mark_stack_tos;
                    mark_stack_tos++;
#ifdef MH_SC_MARK
                    *(place-1) = 0;
                    *(place) = (uint8_t*)partial;
#endif //MH_SC_MARK
                    int i = num_partial_refs;
                    uint8_t* ref_to_continue = 0;

                    go_through_object (method_table(oo), oo, s, ppslot,
                                       start, use_start, (oo + s),
                                       {
                                           uint8_t* o = mark_queue.queue_mark(*ppslot, condemned_gen);
                                           if (o != nullptr)
                                           {
                                                if (full_p)
                                                {
                                                    m_boundary_fullgc (o);
                                                }
                                                else
                                                {
                                                    m_boundary (o);
                                                }
                                                add_to_promoted_bytes (o, thread);
                                                if (contain_pointers_or_collectible (o))
                                                {
                                                    *(mark_stack_tos++) = o;
                                                    if (--i == 0)
                                                    {
                                                        ref_to_continue = (uint8_t*)((size_t)(ppslot+1) | partial);
                                                        goto more_to_do;
                                                    }

                                                }
                                           }

                                       }
                        );
                    //we are finished with this object
                    assert (ref_to_continue == 0);
#ifdef MH_SC_MARK
                    assert ((*(place-1)) == (uint8_t*)0);
#else //MH_SC_MARK
                    *(place-1) = 0;
#endif //MH_SC_MARK
                    *place = 0;
                    // shouldn't we decrease tos by 2 here??

more_to_do:
                    if (ref_to_continue)
                    {
                        //update the start
#ifdef MH_SC_MARK
                        assert ((*(place-1)) == (uint8_t*)0);
                        *(place-1) = (uint8_t*)((size_t)oo | partial_object);
                        assert (((*place) == (uint8_t*)1) || ((*place) == (uint8_t*)2));
#endif //MH_SC_MARK
                        *place = ref_to_continue;
                    }
                }
                else
                {
                    dprintf(3,("mark stack overflow for object %zx ", (size_t)oo));
                    min_overflow_address = min (min_overflow_address, oo);
                    max_overflow_address = max (max_overflow_address, oo);
                }
            }
        }
    next_level:
        if (!(mark_stack_empty_p()))
        {
            oo = *(--mark_stack_tos);
            start = oo;
        }
        else
            break;
    }
}

#ifdef MH_SC_MARK
BOOL same_numa_node_p (int hn1, int hn2)
{
    return (heap_select::find_numa_node_from_heap_no (hn1) == heap_select::find_numa_node_from_heap_no (hn2));
}

int find_next_buddy_heap (int this_heap_number, int current_buddy, int n_heaps)
{
    int hn = (current_buddy+1)%n_heaps;
    while (hn != current_buddy)
    {
        if ((this_heap_number != hn) && (same_numa_node_p (this_heap_number, hn)))
            return hn;
        hn = (hn+1)%n_heaps;
    }
    return current_buddy;
}

void
gc_heap::mark_steal()
{
    mark_stack_busy() = 0;
    //clear the mark stack in the snooping range
    for (int i = 0; i < max_snoop_level; i++)
    {
        ((VOLATILE(uint8_t*)*)(mark_stack_array))[i] = 0;
    }

    //pick the next heap as our buddy
    int thpn = find_next_buddy_heap (heap_number, heap_number, n_heaps);

#ifdef SNOOP_STATS
        dprintf (SNOOP_LOG, ("(GC%d)heap%d: start snooping %d", settings.gc_index, heap_number, (heap_number+1)%n_heaps));
        uint64_t begin_tick = GCToOSInterface::GetLowPrecisionTimeStamp();
#endif //SNOOP_STATS

    int idle_loop_count = 0;
    int first_not_ready_level = 0;

    while (1)
    {
        gc_heap* hp = g_heaps [thpn];
        int level = first_not_ready_level;
        first_not_ready_level = 0;

        while (check_next_mark_stack (hp) && (level < (max_snoop_level-1)))
        {
            idle_loop_count = 0;
#ifdef SNOOP_STATS
            snoop_stat.busy_count++;
            dprintf (SNOOP_LOG, ("heap%d: looking at next heap level %d stack contents: %zx",
                                 heap_number, level, (int)((uint8_t**)(hp->mark_stack_array))[level]));
#endif //SNOOP_STATS

            uint8_t* o = ref_mark_stack (hp, level);

            uint8_t* start = o;
            if (ref_p (o))
            {
                mark_stack_busy() = 1;

                BOOL success = TRUE;
                uint8_t* next = (ref_mark_stack (hp, level+1));
                if (ref_p (next))
                {
                    if (((size_t)o > 4) && !partial_object_p (o))
                    {
                        //this is a normal object, not a partial mark tuple
                        //success = (Interlocked::CompareExchangePointer (&ref_mark_stack (hp, level), 0, o)==o);
                        success = (Interlocked::CompareExchangePointer (&ref_mark_stack (hp, level), (uint8_t*)4, o)==o);
#ifdef SNOOP_STATS
                        snoop_stat.interlocked_count++;
                        if (success)
                            snoop_stat.normal_count++;
#endif //SNOOP_STATS
                    }
                    else
                    {
                        //it is a stolen entry, or beginning/ending of a partial mark
                        level++;
#ifdef SNOOP_STATS
                        snoop_stat.stolen_or_pm_count++;
#endif //SNOOP_STATS
                        success = FALSE;
                    }
                }
                else if (stolen_p (next))
                {
                    //ignore the stolen guy and go to the next level
                    success = FALSE;
                    level+=2;
#ifdef SNOOP_STATS
                    snoop_stat.stolen_entry_count++;
#endif //SNOOP_STATS
                }
                else
                {
                    assert (partial_p (next));
                    start = ref_from_slot (next);
                    //re-read the object
                    o = ref_from_slot (ref_mark_stack (hp, level));
                    if (o && start)
                    {
                        //steal the object
                        success = (Interlocked::CompareExchangePointer (&ref_mark_stack (hp, level+1),
                                                                        (uint8_t*)stolen, next) == next);
#ifdef SNOOP_STATS
                        snoop_stat.interlocked_count++;
                        if (success)
                        {
                            snoop_stat.partial_mark_parent_count++;
                        }
#endif //SNOOP_STATS
                    }
                    else
                    {
                        // stack is not ready, or o is completely different from the last time we read from this stack level.
                        // go up 2 levels to steal children or totally unrelated objects.
                        success = FALSE;
                        if (first_not_ready_level == 0)
                        {
                            first_not_ready_level = level;
                        }
                        level+=2;
#ifdef SNOOP_STATS
                        snoop_stat.pm_not_ready_count++;
#endif //SNOOP_STATS
                    }
                }
                if (success)
                {

#ifdef SNOOP_STATS
                    dprintf (SNOOP_LOG, ("heap%d: marking %zx from %d [%d] tl:%dms",
                            heap_number, (size_t)o, (heap_number+1)%n_heaps, level,
                            (GCToOSInterface::GetLowPrecisionTimeStamp()-begin_tick)));
                    uint64_t start_tick = GCToOSInterface::GetLowPrecisionTimeStamp();
#endif //SNOOP_STATS

                    mark_object_simple1 (o, start, heap_number);

#ifdef SNOOP_STATS
                    dprintf (SNOOP_LOG, ("heap%d: done marking %zx from %d [%d] %dms tl:%dms",
                            heap_number, (size_t)o, (heap_number+1)%n_heaps, level,
                            (GCToOSInterface::GetLowPrecisionTimeStamp()-start_tick),(GCToOSInterface::GetLowPrecisionTimeStamp()-begin_tick)));
#endif //SNOOP_STATS

                    mark_stack_busy() = 0;

                    //clear the mark stack in snooping range
                    for (int i = 0; i < max_snoop_level; i++)
                    {
                        if (((uint8_t**)mark_stack_array)[i] != 0)
                        {
                            ((VOLATILE(uint8_t*)*)(mark_stack_array))[i] = 0;
#ifdef SNOOP_STATS
                            snoop_stat.stack_bottom_clear_count++;
#endif //SNOOP_STATS
                        }
                    }

                    level = 0;
                }
                mark_stack_busy() = 0;
            }
            else
            {
                //slot is either partial or stolen
                level++;
            }
        }
        if ((first_not_ready_level != 0) && hp->mark_stack_busy())
        {
            continue;
        }
        if (!hp->mark_stack_busy())
        {
            first_not_ready_level = 0;
            idle_loop_count++;

            if ((idle_loop_count % (6) )==1)
            {
#ifdef SNOOP_STATS
                snoop_stat.switch_to_thread_count++;
#endif //SNOOP_STATS
                GCToOSInterface::Sleep(1);
            }
            int free_count = 1;
#ifdef SNOOP_STATS
            snoop_stat.stack_idle_count++;
            //dprintf (SNOOP_LOG, ("heap%d: counting idle threads", heap_number));
#endif //SNOOP_STATS
            for (int hpn = (heap_number+1)%n_heaps; hpn != heap_number;)
            {
                if (!((g_heaps [hpn])->mark_stack_busy()))
                {
                    free_count++;
#ifdef SNOOP_STATS
                dprintf (SNOOP_LOG, ("heap%d: %d idle", heap_number, free_count));
#endif //SNOOP_STATS
                }
                else if (same_numa_node_p (hpn, heap_number) || ((idle_loop_count%1000))==999)
                {
                    thpn = hpn;
                    break;
                }
                hpn = (hpn+1)%n_heaps;
                YieldProcessor();
            }
            if (free_count == n_heaps)
            {
                break;
            }
        }
    }
}

inline
BOOL gc_heap::check_next_mark_stack (gc_heap* next_heap)
{
#ifdef SNOOP_STATS
    snoop_stat.check_level_count++;
#endif //SNOOP_STATS
    return (next_heap->mark_stack_busy()>=1);
}
#endif //MH_SC_MARK

#ifdef SNOOP_STATS
void gc_heap::print_snoop_stat()
{
    dprintf (1234, ("%4s | %8s | %8s | %8s | %8s | %8s | %8s | %8s",
        "heap", "check", "zero", "mark", "stole", "pstack", "nstack", "nonsk"));
    dprintf (1234, ("%4d | %8d | %8d | %8d | %8d | %8d | %8d | %8d",
        snoop_stat.heap_index,
        snoop_stat.objects_checked_count,
        snoop_stat.zero_ref_count,
        snoop_stat.objects_marked_count,
        snoop_stat.stolen_stack_count,
        snoop_stat.partial_stack_count,
        snoop_stat.normal_stack_count,
        snoop_stat.non_stack_count));
    dprintf (1234, ("%4s | %8s | %8s | %8s | %8s | %8s | %8s | %8s | %8s | %8s",
        "heap", "level", "busy", "xchg", "pmparent", "s_pm", "stolen", "nready", "clear"));
    dprintf (1234, ("%4d | %8d | %8d | %8d | %8d | %8d | %8d | %8d | %8d | %8d\n",
        snoop_stat.heap_index,
        snoop_stat.check_level_count,
        snoop_stat.busy_count,
        snoop_stat.interlocked_count,
        snoop_stat.partial_mark_parent_count,
        snoop_stat.stolen_or_pm_count,
        snoop_stat.stolen_entry_count,
        snoop_stat.pm_not_ready_count,
        snoop_stat.normal_count,
        snoop_stat.stack_bottom_clear_count));

    printf ("\n%4s | %8s | %8s | %8s | %8s | %8s\n",
        "heap", "check", "zero", "mark", "idle", "switch");
    printf ("%4d | %8d | %8d | %8d | %8d | %8d\n",
        snoop_stat.heap_index,
        snoop_stat.objects_checked_count,
        snoop_stat.zero_ref_count,
        snoop_stat.objects_marked_count,
        snoop_stat.stack_idle_count,
        snoop_stat.switch_to_thread_count);
    printf ("%4s | %8s | %8s | %8s | %8s | %8s | %8s | %8s | %8s | %8s\n",
        "heap", "level", "busy", "xchg", "pmparent", "s_pm", "stolen", "nready", "normal", "clear");
    printf ("%4d | %8d | %8d | %8d | %8d | %8d | %8d | %8d | %8d | %8d\n",
        snoop_stat.heap_index,
        snoop_stat.check_level_count,
        snoop_stat.busy_count,
        snoop_stat.interlocked_count,
        snoop_stat.partial_mark_parent_count,
        snoop_stat.stolen_or_pm_count,
        snoop_stat.stolen_entry_count,
        snoop_stat.pm_not_ready_count,
        snoop_stat.normal_count,
        snoop_stat.stack_bottom_clear_count);
}
#endif //SNOOP_STATS

#ifdef HEAP_ANALYZE
void
gc_heap::ha_mark_object_simple (uint8_t** po THREAD_NUMBER_DCL)
{
    if (!internal_root_array)
    {
        internal_root_array = new (nothrow) uint8_t* [internal_root_array_length];
        if (!internal_root_array)
        {
            heap_analyze_success = FALSE;
        }
    }

    if (heap_analyze_success && (internal_root_array_length <= internal_root_array_index))
    {
        size_t new_size = 2*internal_root_array_length;

        uint64_t available_physical = 0;
        get_memory_info (NULL, &available_physical);
        if (new_size > (size_t)(available_physical / 10))
        {
            heap_analyze_success = FALSE;
        }
        else
        {
            uint8_t** tmp = new (nothrow) uint8_t* [new_size];
            if (tmp)
            {
                memcpy (tmp, internal_root_array,
                        internal_root_array_length*sizeof (uint8_t*));
                delete[] internal_root_array;
                internal_root_array = tmp;
                internal_root_array_length = new_size;
            }
            else
            {
                heap_analyze_success = FALSE;
            }
        }
    }

    if (heap_analyze_success)
    {
        _ASSERTE(internal_root_array_index < internal_root_array_length);

        uint8_t* ref = (uint8_t*)po;
        if (!current_obj ||
            !((ref >= current_obj) && (ref < (current_obj + current_obj_size))))
        {
            gc_heap* hp = gc_heap::heap_of (ref);
            current_obj = hp->find_object (ref);
            current_obj_size = size (current_obj);

            internal_root_array[internal_root_array_index] = current_obj;
            internal_root_array_index++;
        }
    }

    mark_object_simple (po THREAD_NUMBER_ARG);
}
#endif //HEAP_ANALYZE

//this method assumes that *po is in the [low. high[ range
void
gc_heap::mark_object_simple (uint8_t** po THREAD_NUMBER_DCL)
{
    int condemned_gen =
#ifdef USE_REGIONS
        settings.condemned_generation;
#else
        -1;
#endif //USE_REGIONS

    uint8_t* o = *po;
#ifndef MULTIPLE_HEAPS
    const int thread = 0;
#endif //MULTIPLE_HEAPS
    {
#ifdef SNOOP_STATS
        snoop_stat.objects_checked_count++;
#endif //SNOOP_STATS

        o = mark_queue.queue_mark (o);
        if (o != nullptr)
        {
            m_boundary (o);
            size_t s = size (o);
            add_to_promoted_bytes (o, s, thread);
            {
                go_through_object_cl (method_table(o), o, s, poo,
                                        {
                                            uint8_t* oo = mark_queue.queue_mark(*poo, condemned_gen);
                                            if (oo != nullptr)
                                            {
                                                m_boundary (oo);
                                                add_to_promoted_bytes (oo, thread);
                                                if (contain_pointers_or_collectible (oo))
                                                    mark_object_simple1 (oo, oo THREAD_NUMBER_ARG);
                                            }
                                        }
                    );
            }
        }
    }
}

inline
void gc_heap::mark_object (uint8_t* o THREAD_NUMBER_DCL)
{
#ifdef USE_REGIONS
    if (is_in_gc_range (o) && is_in_condemned_gc (o))
    {
        mark_object_simple (&o THREAD_NUMBER_ARG);
    }
#else //USE_REGIONS
    if ((o >= gc_low) && (o < gc_high))
        mark_object_simple (&o THREAD_NUMBER_ARG);
#ifdef MULTIPLE_HEAPS
    else if (o)
    {
        gc_heap* hp = heap_of (o);
        assert (hp);
        if ((o >= hp->gc_low) && (o < hp->gc_high))
            mark_object_simple (&o THREAD_NUMBER_ARG);
    }
#endif //MULTIPLE_HEAPS
#endif //USE_REGIONS
}

void gc_heap::drain_mark_queue ()
{
    int condemned_gen =
#ifdef USE_REGIONS
        settings.condemned_generation;
#else
        -1;
#endif //USE_REGIONS

#ifdef MULTIPLE_HEAPS
    THREAD_FROM_HEAP;
#else
    const int thread = 0;
#endif //MULTIPLE_HEAPS

    uint8_t* o;
    while ((o = mark_queue.get_next_marked()) != nullptr)
    {
        m_boundary (o);
        size_t s = size (o);
        add_to_promoted_bytes (o, s, thread);
        if (contain_pointers_or_collectible (o))
        {
            go_through_object_cl (method_table(o), o, s, poo,
                                    {
                                        uint8_t* oo = mark_queue.queue_mark(*poo, condemned_gen);
                                        if (oo != nullptr)
                                        {
                                            m_boundary (oo);
                                            add_to_promoted_bytes (oo, thread);
                                            if (contain_pointers_or_collectible (oo))
                                                mark_object_simple1 (oo, oo THREAD_NUMBER_ARG);
                                        }
                                    }
                );
        }
    }
}

#ifdef BACKGROUND_GC

#ifdef USE_REGIONS
void gc_heap::set_background_overflow_p (uint8_t* oo)
{
    heap_segment* overflow_region = get_region_info_for_address (oo);
    overflow_region->flags |= heap_segment_flags_overflow;
    dprintf (3,("setting overflow flag for region %p", heap_segment_mem (overflow_region)));
    background_overflow_p = TRUE;
}
#endif //USE_REGIONS

void gc_heap::background_mark_simple1 (uint8_t* oo THREAD_NUMBER_DCL)
{
    uint8_t** mark_stack_limit = &background_mark_stack_array[background_mark_stack_array_length];

    background_mark_stack_tos = background_mark_stack_array;

    while (1)
    {
#ifdef MULTIPLE_HEAPS
#else  //MULTIPLE_HEAPS
        const int thread = 0;
#endif //MULTIPLE_HEAPS
        if (oo)
        {
            size_t s = 0;
            if ((((size_t)oo & 1) == 0) && ((s = size (oo)) < (partial_size_th*sizeof (uint8_t*))))
            {
                BOOL overflow_p = FALSE;

                if (background_mark_stack_tos + (s) /sizeof (uint8_t*) >= (mark_stack_limit - 1))
                {
                    size_t num_components = ((method_table(oo))->HasComponentSize() ? ((CObjectHeader*)oo)->GetNumComponents() : 0);
                    size_t num_pointers = CGCDesc::GetNumPointers(method_table(oo), s, num_components);
                    if (background_mark_stack_tos + num_pointers >= (mark_stack_limit - 1))
                    {
                        dprintf (2, ("h%d: %zd left, obj (mt: %p) %zd ptrs",
                            heap_number,
                            (size_t)(mark_stack_limit - 1 - background_mark_stack_tos),
                            method_table(oo),
                            num_pointers));

                        bgc_overflow_count++;
                        overflow_p = TRUE;
                    }
                }

                if (overflow_p == FALSE)
                {
                    dprintf(3,("pushing mark for %zx ", (size_t)oo));

                    go_through_object_cl (method_table(oo), oo, s, ppslot,
                    {
                        uint8_t* o = *ppslot;
                        Prefetch(o);
                        if (background_mark (o,
                                             background_saved_lowest_address,
                                             background_saved_highest_address))
                        {
                            //m_boundary (o);
                            size_t obj_size = size (o);
                            bpromoted_bytes (thread) += obj_size;
                            if (contain_pointers_or_collectible (o))
                            {
                                *(background_mark_stack_tos++) = o;

                            }
                        }
                    }
                        );
                }
                else
                {
                    dprintf (3,("background mark stack overflow for object %zx ", (size_t)oo));
#ifdef USE_REGIONS
                    set_background_overflow_p (oo);
#else //USE_REGIONS
                    background_min_overflow_address = min (background_min_overflow_address, oo);
                    background_max_overflow_address = max (background_max_overflow_address, oo);
#endif //USE_REGIONS
                }
            }
            else
            {
                uint8_t* start = oo;
                if ((size_t)oo & 1)
                {
                    oo = (uint8_t*)((size_t)oo & ~1);
                    start = *(--background_mark_stack_tos);
                    dprintf (4, ("oo: %zx, start: %zx\n", (size_t)oo, (size_t)start));
                }
#ifdef COLLECTIBLE_CLASS
                else
                {
                    // If there's a class object, push it now. We are guaranteed to have the slot since
                    // we just popped one object off.
                    if (is_collectible (oo))
                    {
                        uint8_t* class_obj = get_class_object (oo);
                        if (background_mark (class_obj,
                                            background_saved_lowest_address,
                                            background_saved_highest_address))
                        {
                            size_t obj_size = size (class_obj);
                            bpromoted_bytes (thread) += obj_size;

                            *(background_mark_stack_tos++) = class_obj;
                        }
                    }

                    if (!contain_pointers (oo))
                    {
                        goto next_level;
                    }
                }
#endif //COLLECTIBLE_CLASS

                s = size (oo);

                BOOL overflow_p = FALSE;

                if (background_mark_stack_tos + (num_partial_refs + 2)  >= mark_stack_limit)
                {
                    size_t num_components = ((method_table(oo))->HasComponentSize() ? ((CObjectHeader*)oo)->GetNumComponents() : 0);
                    size_t num_pointers = CGCDesc::GetNumPointers(method_table(oo), s, num_components);

                    dprintf (2, ("h%d: PM: %zd left, obj %p (mt: %p) start: %p, total: %zd",
                        heap_number,
                        (size_t)(mark_stack_limit - background_mark_stack_tos),
                        oo,
                        method_table(oo),
                        start,
                        num_pointers));

                    bgc_overflow_count++;
                    overflow_p = TRUE;
                }
                if (overflow_p == FALSE)
                {
                    dprintf(3,("pushing mark for %zx ", (size_t)oo));

                    //push the object and its current
                    uint8_t** place = background_mark_stack_tos++;
                    *(place) = start;
                    *(background_mark_stack_tos++) = (uint8_t*)((size_t)oo | 1);

                    int num_pushed_refs = num_partial_refs;
                    int num_processed_refs = num_pushed_refs * 16;

                    go_through_object (method_table(oo), oo, s, ppslot,
                                       start, use_start, (oo + s),
                    {
                        uint8_t* o = *ppslot;
                        Prefetch(o);

                        if (background_mark (o,
                                            background_saved_lowest_address,
                                            background_saved_highest_address))
                        {
                            //m_boundary (o);
                            size_t obj_size = size (o);
                            bpromoted_bytes (thread) += obj_size;
                            if (contain_pointers_or_collectible (o))
                            {
                                *(background_mark_stack_tos++) = o;
                                if (--num_pushed_refs == 0)
                                {
                                    //update the start
                                    *place = (uint8_t*)(ppslot+1);
                                    goto more_to_do;
                                }

                            }
                        }
                        if (--num_processed_refs == 0)
                        {
                            // give foreground GC a chance to run
                            *place = (uint8_t*)(ppslot + 1);
                            goto more_to_do;
                        }

                        }
                        );
                    //we are finished with this object
                    *place = 0;
                    *(place+1) = 0;

                more_to_do:;
                }
                else
                {
                    dprintf (3,("background mark stack overflow for object %zx ", (size_t)oo));
#ifdef USE_REGIONS
                    set_background_overflow_p (oo);
#else //USE_REGIONS
                    background_min_overflow_address = min (background_min_overflow_address, oo);
                    background_max_overflow_address = max (background_max_overflow_address, oo);
#endif //USE_REGIONS
                }
            }
        }

#ifdef COLLECTIBLE_CLASS
next_level:
#endif // COLLECTIBLE_CLASS
        allow_fgc();

        if (!(background_mark_stack_tos == background_mark_stack_array))
        {
            oo = *(--background_mark_stack_tos);
        }
        else
            break;
    }

    assert (background_mark_stack_tos == background_mark_stack_array);


}

//this version is different than the foreground GC because
//it can't keep pointers to the inside of an object
//while calling background_mark_simple1. The object could be moved
//by an intervening foreground gc.
//this method assumes that *po is in the [low. high[ range
void
gc_heap::background_mark_simple (uint8_t* o THREAD_NUMBER_DCL)
{
#ifdef MULTIPLE_HEAPS
#else  //MULTIPLE_HEAPS
    const int thread = 0;
#endif //MULTIPLE_HEAPS
    {
        dprintf (3, ("bmarking %p", o));

        if (background_mark1 (o))
        {
            //m_boundary (o);
            size_t s = size (o);
            bpromoted_bytes (thread) += s;

            if (contain_pointers_or_collectible (o))
            {
                background_mark_simple1 (o THREAD_NUMBER_ARG);
            }
        }
        allow_fgc();
    }
}

inline
uint8_t* gc_heap::background_mark_object (uint8_t* o THREAD_NUMBER_DCL)
{
    if ((o >= background_saved_lowest_address) && (o < background_saved_highest_address))
    {
        background_mark_simple (o THREAD_NUMBER_ARG);
    }
    else
    {
        if (o)
        {
            dprintf (3, ("or-%p", o));
        }
    }
    return o;
}

void gc_heap::background_promote (Object** ppObject, ScanContext* sc, uint32_t flags)
{
    UNREFERENCED_PARAMETER(sc);
    //in order to save space on the array, mark the object,
    //knowing that it will be visited later
    assert (settings.concurrent);

    THREAD_NUMBER_FROM_CONTEXT;
#ifndef MULTIPLE_HEAPS
    const int thread = 0;
#endif //!MULTIPLE_HEAPS

    uint8_t* o = (uint8_t*)*ppObject;

    if (!is_in_find_object_range (o))
    {
        return;
    }

#ifdef DEBUG_DestroyedHandleValue
    // we can race with destroy handle during concurrent scan
    if (o == (uint8_t*)DEBUG_DestroyedHandleValue)
        return;
#endif //DEBUG_DestroyedHandleValue

    HEAP_FROM_THREAD;

    gc_heap* hp = gc_heap::heap_of (o);

    if ((o < hp->background_saved_lowest_address) || (o >= hp->background_saved_highest_address))
    {
        return;
    }

    if (flags & GC_CALL_INTERIOR)
    {
        o = hp->find_object (o);
        if (o == 0)
            return;
    }

#ifdef FEATURE_CONSERVATIVE_GC
    // For conservative GC, a value on stack may point to middle of a free object.
    // In this case, we don't need to promote the pointer.
    if (GCConfig::GetConservativeGC() && ((CObjectHeader*)o)->IsFree())
    {
        return;
    }
#endif //FEATURE_CONSERVATIVE_GC

#ifdef _DEBUG
    ((CObjectHeader*)o)->Validate();
#endif //_DEBUG

    //needs to be called before the marking because it is possible for a foreground
    //gc to take place during the mark and move the object
    STRESS_LOG3(LF_GC|LF_GCROOTS, LL_INFO1000000, "    GCHeap::Promote: Promote GC Root *%p = %p MT = %pT", ppObject, o, o ? ((Object*) o)->GetGCSafeMethodTable() : NULL);

    hpt->background_mark_simple (o THREAD_NUMBER_ARG);
}

//used by the ephemeral collection to scan the local background structures
//containing references.
void
gc_heap::scan_background_roots (promote_func* fn, int hn, ScanContext *pSC)
{
    ScanContext sc;
    if (pSC == 0)
        pSC = &sc;

    pSC->thread_number = hn;
    pSC->thread_count = n_heaps;

    BOOL relocate_p = (fn == &GCHeap::Relocate);

    dprintf (3, ("Scanning background mark list"));

    //scan mark_list
    size_t mark_list_finger = 0;
    while (mark_list_finger < c_mark_list_index)
    {
        uint8_t** o = &c_mark_list [mark_list_finger];
        if (!relocate_p)
        {
            // We may not be able to calculate the size during relocate as POPO
            // may have written over the object.
            size_t s = size (*o);
            assert (Align (s) >= Align (min_obj_size));
            dprintf(3,("background root %zx", (size_t)*o));
        }
        (*fn) ((Object**)o, pSC, 0);
        mark_list_finger++;
    }

    //scan the mark stack
    dprintf (3, ("Scanning background mark stack"));

    uint8_t** finger = background_mark_stack_array;
    while (finger < background_mark_stack_tos)
    {
        if ((finger + 1) < background_mark_stack_tos)
        {
            // We need to check for the partial mark case here.
            uint8_t* parent_obj = *(finger + 1);
            if ((size_t)parent_obj & 1)
            {
                uint8_t* place = *finger;
                size_t place_offset = 0;
                uint8_t* real_parent_obj = (uint8_t*)((size_t)parent_obj & ~1);

                if (relocate_p)
                {
                    *(finger + 1) = real_parent_obj;
                    place_offset = place - real_parent_obj;
                    dprintf(3,("relocating background root %zx", (size_t)real_parent_obj));
                    (*fn) ((Object**)(finger + 1), pSC, 0);
                    real_parent_obj = *(finger + 1);
                    *finger = real_parent_obj + place_offset;
                    *(finger + 1) = (uint8_t*)((size_t)real_parent_obj | 1);
                    dprintf(3,("roots changed to %p, %p", *finger, *(finger + 1)));
                }
                else
                {
                    uint8_t** temp = &real_parent_obj;
                    dprintf(3,("marking background root %zx", (size_t)real_parent_obj));
                    (*fn) ((Object**)temp, pSC, 0);
                }

                finger += 2;
                continue;
            }
        }
        dprintf(3,("background root %zx", (size_t)*finger));
        (*fn) ((Object**)finger, pSC, 0);
        finger++;
    }
}

void gc_heap::grow_bgc_mark_stack (size_t new_size)
{
    if ((background_mark_stack_array_length < new_size) &&
        ((new_size - background_mark_stack_array_length) > (background_mark_stack_array_length / 2)))
    {
        dprintf (2, ("h%d: ov grow to %zd", heap_number, new_size));

        uint8_t** tmp = new (nothrow) uint8_t* [new_size];
        if (tmp)
        {
            delete [] background_mark_stack_array;
            background_mark_stack_array = tmp;
            background_mark_stack_array_length = new_size;
            background_mark_stack_tos = background_mark_stack_array;
        }
    }
}

void gc_heap::check_bgc_mark_stack_length()
{
    if ((settings.condemned_generation < (max_generation - 1)) || gc_heap::background_running_p())
        return;

    size_t total_heap_size = get_total_heap_size();

    if (total_heap_size < ((size_t)4*1024*1024*1024))
        return;

#ifdef MULTIPLE_HEAPS
    int total_heaps = n_heaps;
#else
    int total_heaps = 1;
#endif //MULTIPLE_HEAPS
    size_t size_based_on_heap = total_heap_size / (size_t)(100 * 100 * total_heaps * sizeof (uint8_t*));

    size_t new_size = max (background_mark_stack_array_length, size_based_on_heap);

    grow_bgc_mark_stack (new_size);
}

uint8_t* gc_heap::background_seg_end (heap_segment* seg, BOOL concurrent_p)
{
#ifndef USE_REGIONS
    if (concurrent_p && (seg == saved_overflow_ephemeral_seg))
    {
        // for now we stop at where gen1 started when we started processing
        return background_min_soh_overflow_address;
    }
    else
#endif //!USE_REGIONS
    {
        return heap_segment_allocated (seg);
    }
}

uint8_t* gc_heap::background_first_overflow (uint8_t* min_add,
                                          heap_segment* seg,
                                          BOOL concurrent_p,
                                          BOOL small_object_p)
{
#ifdef USE_REGIONS
        return heap_segment_mem (seg);
#else
    uint8_t* o = 0;

    if (small_object_p)
    {
        if (in_range_for_segment (min_add, seg))
        {
            // min_add was the beginning of gen1 when we did the concurrent
            // overflow. Now we could be in a situation where min_add is
            // actually the same as allocated for that segment (because
            // we expanded heap), in which case we can not call
            // find first on this address or we will AV.
            if (min_add >= heap_segment_allocated (seg))
            {
                return min_add;
            }
            else
            {
                if (concurrent_p &&
                    ((seg == saved_overflow_ephemeral_seg) && (min_add >= background_min_soh_overflow_address)))
                {
                    return background_min_soh_overflow_address;
                }
                else
                {
                    o = find_first_object (min_add, heap_segment_mem (seg));
                    return o;
                }
            }
        }
    }

    o = max (heap_segment_mem (seg), min_add);
    return o;
#endif //USE_REGIONS
}

void gc_heap::background_process_mark_overflow_internal (uint8_t* min_add, uint8_t* max_add,
                                                         BOOL concurrent_p)
{
    if (concurrent_p)
    {
        current_bgc_state = bgc_overflow_soh;
    }

    size_t total_marked_objects = 0;

#ifdef MULTIPLE_HEAPS
    int thread = heap_number;
#endif //MULTIPLE_HEAPS

    int start_gen_idx = get_start_generation_index();
#ifdef USE_REGIONS
    if (concurrent_p)
        start_gen_idx = max_generation;
#endif //USE_REGIONS

    exclusive_sync* loh_alloc_lock = 0;

#ifndef USE_REGIONS
    dprintf (2,("Processing Mark overflow [%zx %zx]", (size_t)min_add, (size_t)max_add));
#endif
#ifdef MULTIPLE_HEAPS
    // We don't have each heap scan all heaps concurrently because we are worried about
    // multiple threads calling things like find_first_object.
    int h_start = (concurrent_p ? heap_number : 0);
    int h_end = (concurrent_p ? (heap_number + 1) : n_heaps);
    for (int hi = h_start; hi < h_end; hi++)
    {
        gc_heap*  hp = (concurrent_p ? this : g_heaps [(heap_number + hi) % n_heaps]);

#else
    {
        gc_heap*  hp = 0;

#endif //MULTIPLE_HEAPS
        BOOL small_object_segments = TRUE;
        loh_alloc_lock = hp->bgc_alloc_lock;

        for (int i = start_gen_idx; i < total_generation_count; i++)
        {
            int align_const = get_alignment_constant (small_object_segments);
            generation* gen = hp->generation_of (i);
            heap_segment* seg = heap_segment_in_range (generation_start_segment (gen));
            _ASSERTE(seg != NULL);

            uint8_t* current_min_add = min_add;
            uint8_t* current_max_add = max_add;

            while (seg)
            {
#ifdef USE_REGIONS
                if (heap_segment_overflow_p (seg))
                {
                    seg->flags &= ~heap_segment_flags_overflow;
                    current_min_add = heap_segment_mem (seg);
                    current_max_add = heap_segment_allocated (seg);
                    dprintf (2,("Processing Mark overflow [%zx %zx]", (size_t)current_min_add, (size_t)current_max_add));
                }
                else
                {
                    current_min_add = current_max_add = 0;
                }
#endif //USE_REGIONS
                uint8_t* o = hp->background_first_overflow (current_min_add, seg, concurrent_p, small_object_segments);

                while ((o < hp->background_seg_end (seg, concurrent_p)) && (o <= current_max_add))
                {
                    dprintf (3, ("considering %zx", (size_t)o));

                    size_t s;

                    if (concurrent_p && !small_object_segments)
                    {
                        loh_alloc_lock->bgc_mark_set (o);

                        if (((CObjectHeader*)o)->IsFree())
                        {
                            s = unused_array_size (o);
                        }
                        else
                        {
                            s = size (o);
                        }
                    }
                    else
                    {
                        s = size (o);
                    }

                    if (background_object_marked (o, FALSE) && contain_pointers_or_collectible (o))
                    {
                        total_marked_objects++;
                        go_through_object_cl (method_table(o), o, s, poo,
                                              uint8_t* oo = *poo;
                                              background_mark_object (oo THREAD_NUMBER_ARG);
                                             );
                    }

                    if (concurrent_p && !small_object_segments)
                    {
                        loh_alloc_lock->bgc_mark_done ();
                    }

                    o = o + Align (s, align_const);

                    if (concurrent_p)
                    {
                        allow_fgc();
                    }
                }

#ifdef USE_REGIONS
                if (current_max_add != 0)
#endif //USE_REGIONS
                {
                    dprintf (2, ("went through overflow objects in segment %p (%d) (so far %zd marked)",
                        heap_segment_mem (seg), (small_object_segments ? 0 : 1), total_marked_objects));
                }
#ifndef USE_REGIONS
                if (concurrent_p && (seg == hp->saved_overflow_ephemeral_seg))
                {
                    break;
                }
#endif //!USE_REGIONS
                seg = heap_segment_next_in_range (seg);
            }

            if (concurrent_p)
            {
                current_bgc_state = bgc_overflow_uoh;
            }

            dprintf (2, ("h%d: SOH: ov-mo: %zd", heap_number, total_marked_objects));
            fire_overflow_event (min_add, max_add, total_marked_objects, i);
            if (i >= soh_gen2)
            {
                concurrent_print_time_delta (concurrent_p ? "Cov SOH" : "Nov SOH");
                small_object_segments = FALSE;
            }

            total_marked_objects = 0;
        }
    }
}

BOOL gc_heap::background_process_mark_overflow (BOOL concurrent_p)
{
    BOOL grow_mark_array_p = TRUE;

    if (concurrent_p)
    {
        assert (!processed_eph_overflow_p);
#ifndef USE_REGIONS
        if ((background_max_overflow_address != 0) &&
            (background_min_overflow_address != MAX_PTR))
        {
            // We have overflow to process but we know we can't process the ephemeral generations
            // now (we actually could process till the current gen1 start but since we are going to
            // make overflow per segment, for now I'll just stop at the saved gen1 start.
            saved_overflow_ephemeral_seg = ephemeral_heap_segment;
            background_max_soh_overflow_address = heap_segment_reserved (saved_overflow_ephemeral_seg);
            background_min_soh_overflow_address = generation_allocation_start (generation_of (max_generation - 1));
        }
#endif //!USE_REGIONS
    }
    else
    {
#ifndef USE_REGIONS
        assert ((saved_overflow_ephemeral_seg == 0) ||
                ((background_max_soh_overflow_address != 0) &&
                 (background_min_soh_overflow_address != MAX_PTR)));
#endif //!USE_REGIONS

        if (!processed_eph_overflow_p)
        {
            // if there was no more overflow we just need to process what we didn't process
            // on the saved ephemeral segment.
#ifdef USE_REGIONS
            if (!background_overflow_p)
#else
            if ((background_max_overflow_address == 0) && (background_min_overflow_address == MAX_PTR))
#endif //USE_REGIONS
            {
                dprintf (2, ("final processing mark overflow - no more overflow since last time"));
                grow_mark_array_p = FALSE;
            }
#ifdef USE_REGIONS
            background_overflow_p = TRUE;
#else
            background_min_overflow_address = min (background_min_overflow_address,
                                                background_min_soh_overflow_address);
            background_max_overflow_address = max (background_max_overflow_address,
                                                background_max_soh_overflow_address);
#endif //!USE_REGIONS
            processed_eph_overflow_p = TRUE;
        }
    }

    BOOL  overflow_p = FALSE;
recheck:
#ifdef USE_REGIONS
    if (background_overflow_p)
#else
    if ((! ((background_max_overflow_address == 0)) ||
         ! ((background_min_overflow_address == MAX_PTR))))
#endif
    {
        overflow_p = TRUE;

        if (grow_mark_array_p)
        {
            // Try to grow the array.
            size_t new_size = max ((size_t)MARK_STACK_INITIAL_LENGTH, 2*background_mark_stack_array_length);

            if ((new_size * sizeof(mark)) > 100*1024)
            {
                size_t new_max_size = (get_total_heap_size() / 10) / sizeof(mark);

                new_size = min(new_max_size, new_size);
            }

            grow_bgc_mark_stack (new_size);
        }
        else
        {
            grow_mark_array_p = TRUE;
        }

#ifdef USE_REGIONS
        uint8_t*  min_add = 0;
        uint8_t*  max_add = 0;
        background_overflow_p = FALSE;
#else
        uint8_t*  min_add = background_min_overflow_address;
        uint8_t*  max_add = background_max_overflow_address;

        background_max_overflow_address = 0;
        background_min_overflow_address = MAX_PTR;
#endif

        background_process_mark_overflow_internal (min_add, max_add, concurrent_p);
        if (!concurrent_p)
        {
            goto recheck;
        }
    }

    return overflow_p;
}
#endif //BACKGROUND_GC

inline
void gc_heap::mark_through_object (uint8_t* oo, BOOL mark_class_object_p THREAD_NUMBER_DCL)
{
#ifndef COLLECTIBLE_CLASS
    UNREFERENCED_PARAMETER(mark_class_object_p);
    BOOL to_mark_class_object = FALSE;
#else //COLLECTIBLE_CLASS
    BOOL to_mark_class_object = (mark_class_object_p && (is_collectible(oo)));
#endif //COLLECTIBLE_CLASS
    if (contain_pointers (oo) || to_mark_class_object)
    {
        dprintf(3,( "Marking through %zx", (size_t)oo));
        size_t s = size (oo);

#ifdef COLLECTIBLE_CLASS
        if (to_mark_class_object)
        {
            uint8_t* class_obj = get_class_object (oo);
            mark_object (class_obj THREAD_NUMBER_ARG);
        }
#endif //COLLECTIBLE_CLASS

        if (contain_pointers (oo))
        {
            go_through_object_nostart (method_table(oo), oo, s, po,
                                uint8_t* o = *po;
                                mark_object (o THREAD_NUMBER_ARG);
                                );
        }
    }
}

size_t gc_heap::get_total_heap_size()
{
    size_t total_heap_size = 0;

    // It's correct to start from max_generation for this method because
    // generation_sizes will return all SOH sizes when passed max_generation.
#ifdef MULTIPLE_HEAPS
    int hn = 0;

    for (hn = 0; hn < gc_heap::n_heaps; hn++)
    {
        gc_heap* hp2 = gc_heap::g_heaps [hn];
        for (int i = max_generation; i < total_generation_count; i++)
        {
            total_heap_size += hp2->generation_sizes (hp2->generation_of (i));
        }
    }
#else
    for (int i = max_generation; i < total_generation_count; i++)
    {
        total_heap_size += generation_sizes (generation_of (i));
    }
#endif //MULTIPLE_HEAPS

    return total_heap_size;
}

size_t gc_heap::get_total_fragmentation()
{
    size_t total_fragmentation = 0;

#ifdef MULTIPLE_HEAPS
    for (int hn = 0; hn < gc_heap::n_heaps; hn++)
    {
        gc_heap* hp = gc_heap::g_heaps[hn];
#else //MULTIPLE_HEAPS
    {
        gc_heap* hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS
        for (int i = 0; i < total_generation_count; i++)
        {
            generation* gen = hp->generation_of (i);
            total_fragmentation += (generation_free_list_space (gen) + generation_free_obj_space (gen));
        }
    }

    return total_fragmentation;
}

size_t gc_heap::get_total_gen_fragmentation (int gen_number)
{
    size_t total_fragmentation = 0;

#ifdef MULTIPLE_HEAPS
    for (int hn = 0; hn < gc_heap::n_heaps; hn++)
    {
        gc_heap* hp = gc_heap::g_heaps[hn];
#else //MULTIPLE_HEAPS
    {
        gc_heap* hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS
        generation* gen = hp->generation_of (gen_number);
        total_fragmentation += (generation_free_list_space (gen) + generation_free_obj_space (gen));
    }

    return total_fragmentation;
}

#ifdef USE_REGIONS
int gc_heap::get_total_new_gen0_regions_in_plns ()
{
    int total_new_gen0_regions_in_plns = 0;

#ifdef MULTIPLE_HEAPS
    for (int hn = 0; hn < gc_heap::n_heaps; hn++)
    {
        gc_heap* hp = gc_heap::g_heaps[hn];
#else //MULTIPLE_HEAPS
    {
        gc_heap* hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS
        total_new_gen0_regions_in_plns += hp->new_gen0_regions_in_plns;
    }

    return total_new_gen0_regions_in_plns;
}

int gc_heap::get_total_new_regions_in_prr ()
{
    int total_new_regions_in_prr = 0;

#ifdef MULTIPLE_HEAPS
    for (int hn = 0; hn < gc_heap::n_heaps; hn++)
    {
        gc_heap* hp = gc_heap::g_heaps[hn];
#else //MULTIPLE_HEAPS
        {
            gc_heap* hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS
            total_new_regions_in_prr += hp->new_regions_in_prr;
        }

        return total_new_regions_in_prr;
}

int gc_heap::get_total_new_regions_in_threading ()
{
    int total_new_regions_in_threading = 0;

#ifdef MULTIPLE_HEAPS
    for (int hn = 0; hn < gc_heap::n_heaps; hn++)
    {
        gc_heap* hp = gc_heap::g_heaps[hn];
#else //MULTIPLE_HEAPS
    {
        gc_heap* hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS
        total_new_regions_in_threading += hp->new_regions_in_threading;
    }

    return total_new_regions_in_threading;
}
#endif //USE_REGIONS

size_t gc_heap::get_total_gen_estimated_reclaim (int gen_number)
{
    size_t total_estimated_reclaim = 0;

#ifdef MULTIPLE_HEAPS
    for (int hn = 0; hn < gc_heap::n_heaps; hn++)
    {
        gc_heap* hp = gc_heap::g_heaps[hn];
#else //MULTIPLE_HEAPS
    {
        gc_heap* hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS
        total_estimated_reclaim += hp->estimated_reclaim (gen_number);
    }

    return total_estimated_reclaim;
}

size_t gc_heap::get_total_gen_size (int gen_number)
{
#ifdef MULTIPLE_HEAPS
    size_t size = 0;
    for (int hn = 0; hn < gc_heap::n_heaps; hn++)
    {
        gc_heap* hp = gc_heap::g_heaps[hn];
        size += hp->generation_size (gen_number);
    }
#else
    size_t size = generation_size (gen_number);
#endif //MULTIPLE_HEAPS
    return size;
}

size_t gc_heap::committed_size()
{
    size_t total_committed = 0;

    const size_t kB = 1024;

    for (int i = get_start_generation_index(); i < total_generation_count; i++)
    {
        generation* gen = generation_of (i);
        heap_segment* seg = heap_segment_rw (generation_start_segment (gen));
        size_t gen_committed = 0;
        size_t gen_allocated = 0;

        while (seg)
        {
            uint8_t* start =
#ifdef USE_REGIONS
                get_region_start (seg);
#else
                (uint8_t*)seg;
#endif //USE_REGIONS

            gen_committed += heap_segment_committed (seg) - start;
            gen_allocated += heap_segment_allocated (seg) - start;

            seg = heap_segment_next (seg);
        }
        dprintf (3, ("h%d committed in gen%d %zdkB, allocated %zdkB, committed-allocated %zdkB", heap_number, i, gen_committed/kB, gen_allocated/kB, (gen_committed - gen_allocated)/kB));

        total_committed += gen_committed;
    }

#ifdef USE_REGIONS
    size_t committed_in_free = 0;

    for (int kind = basic_free_region; kind < count_free_region_kinds; kind++)
    {
        committed_in_free += free_regions[kind].get_size_committed_in_free();
    }

    dprintf (3, ("h%d committed in free %zdkB", heap_number, committed_in_free/kB));

    total_committed += committed_in_free;
#endif //USE_REGIONS

    return total_committed;
}

size_t gc_heap::get_total_committed_size()
{
    size_t total_committed = 0;

#ifdef MULTIPLE_HEAPS
    int hn = 0;

    for (hn = 0; hn < gc_heap::n_heaps; hn++)
    {
        gc_heap* hp = gc_heap::g_heaps [hn];
        total_committed += hp->committed_size();
    }
#else
    total_committed = committed_size();
#endif //MULTIPLE_HEAPS

    return total_committed;
}

size_t gc_heap::uoh_committed_size (int gen_number, size_t* allocated)
{
    generation* gen = generation_of (gen_number);
    heap_segment* seg = heap_segment_rw (generation_start_segment (gen));
    size_t total_committed = 0;
    size_t total_allocated = 0;

    while (seg)
    {
        uint8_t* start =
#ifdef USE_REGIONS
            get_region_start (seg);
#else
            (uint8_t*)seg;
#endif //USE_REGIONS
        total_committed += heap_segment_committed (seg) - start;
        total_allocated += heap_segment_allocated (seg) - start;
        seg = heap_segment_next (seg);
    }

    *allocated = total_allocated;
    return total_committed;
}

void gc_heap::get_memory_info (uint32_t* memory_load,
                               uint64_t* available_physical,
                               uint64_t* available_page_file)
{
    GCToOSInterface::GetMemoryStatus(is_restricted_physical_mem ? total_physical_mem  : 0,  memory_load, available_physical, available_page_file);
}

//returns TRUE is an overflow happened.
BOOL gc_heap::process_mark_overflow(int condemned_gen_number)
{
    size_t last_promoted_bytes = get_promoted_bytes();

    BOOL  overflow_p = FALSE;
recheck:
    drain_mark_queue();
    if ((! (max_overflow_address == 0) ||
         ! (min_overflow_address == MAX_PTR)))
    {
        overflow_p = TRUE;
        // Try to grow the array.
        size_t new_size =
            max ((size_t)MARK_STACK_INITIAL_LENGTH, 2*mark_stack_array_length);

        if ((new_size * sizeof(mark)) > 100*1024)
        {
            size_t new_max_size = (get_total_heap_size() / 10) / sizeof(mark);

            new_size = min(new_max_size, new_size);
        }

        if ((mark_stack_array_length < new_size) &&
            ((new_size - mark_stack_array_length) > (mark_stack_array_length / 2)))
        {
            mark* tmp = new (nothrow) mark [new_size];
            if (tmp)
            {
                delete[] mark_stack_array;
                mark_stack_array = tmp;
                mark_stack_array_length = new_size;
            }
        }

        uint8_t*  min_add = min_overflow_address;
        uint8_t*  max_add = max_overflow_address;
        max_overflow_address = 0;
        min_overflow_address = MAX_PTR;
        process_mark_overflow_internal (condemned_gen_number, min_add, max_add);
        goto recheck;
    }

    size_t current_promoted_bytes = get_promoted_bytes();
    if (current_promoted_bytes != last_promoted_bytes)
        fire_mark_event (ETW::GC_ROOT_OVERFLOW, current_promoted_bytes, last_promoted_bytes);
    return overflow_p;
}

void gc_heap::process_mark_overflow_internal (int condemned_gen_number,
                                              uint8_t* min_add, uint8_t* max_add)
{
#ifdef MULTIPLE_HEAPS
    int thread = heap_number;
#endif //MULTIPLE_HEAPS
    BOOL  full_p = (condemned_gen_number == max_generation);

    dprintf(3,("Processing Mark overflow [%zx %zx]", (size_t)min_add, (size_t)max_add));

    size_t obj_count = 0;

#ifdef MULTIPLE_HEAPS
    for (int hi = 0; hi < n_heaps; hi++)
    {
        gc_heap*  hp = g_heaps [(heap_number + hi) % n_heaps];

#else
    {
        gc_heap*  hp = 0;
#endif //MULTIPLE_HEAPS
        int gen_limit = full_p ? total_generation_count : condemned_gen_number + 1;

        for (int i = get_stop_generation_index (condemned_gen_number); i < gen_limit; i++)
        {
            generation* gen = hp->generation_of (i);
            heap_segment* seg = heap_segment_in_range (generation_start_segment (gen));
            int align_const = get_alignment_constant (i < uoh_start_generation);

            _ASSERTE(seg != NULL);

            while (seg)
            {
                uint8_t*  o = max (heap_segment_mem (seg), min_add);
                uint8_t*  end = heap_segment_allocated (seg);

                while ((o < end) && (o <= max_add))
                {
                    assert ((min_add <= o) && (max_add >= o));
                    dprintf (3, ("considering %zx", (size_t)o));
                    if (marked (o))
                    {
                        mark_through_object (o, TRUE THREAD_NUMBER_ARG);
                        obj_count++;
                    }

                    o = o + Align (size (o), align_const);
                }

                seg = heap_segment_next_in_range (seg);
            }
        }
#ifndef MULTIPLE_HEAPS
        // we should have found at least one object
        assert (obj_count > 0);
#endif //MULTIPLE_HEAPS
    }
}

// Scanning for promotion for dependent handles need special handling. Because the primary holds a strong
// reference to the secondary (when the primary itself is reachable) and this can cause a cascading series of
// promotions (the secondary of one handle is or promotes the primary of another) we might need to perform the
// promotion scan multiple times.
// This helper encapsulates the logic to complete all dependent handle promotions when running a server GC. It
// also has the effect of processing any mark stack overflow.

#ifdef MULTIPLE_HEAPS
// When multiple heaps are enabled we have must utilize a more complex algorithm in order to keep all the GC
// worker threads synchronized. The algorithms are sufficiently divergent that we have different
// implementations based on whether MULTIPLE_HEAPS is defined or not.
//
// Define some static variables used for synchronization in the method below. These should really be defined
// locally but MSVC complains when the VOLATILE macro is expanded into an instantiation of the Volatile class.
//
// A note about the synchronization used within this method. Communication between the worker threads is
// achieved via two shared booleans (defined below). These both act as latches that are transitioned only from
// false -> true by unsynchronized code. They are only read or reset to false by a single thread under the
// protection of a join.
static VOLATILE(BOOL) s_fUnpromotedHandles = FALSE;
static VOLATILE(BOOL) s_fUnscannedPromotions = FALSE;
static VOLATILE(BOOL) s_fScanRequired;
void gc_heap::scan_dependent_handles (int condemned_gen_number, ScanContext *sc, BOOL initial_scan_p)
{
    // Whenever we call this method there may have been preceding object promotions. So set
    // s_fUnscannedPromotions unconditionally (during further iterations of the scanning loop this will be set
    // based on the how the scanning proceeded).
    s_fUnscannedPromotions = TRUE;

    // We don't know how many times we need to loop yet. In particular we can't base the loop condition on
    // the state of this thread's portion of the dependent handle table. That's because promotions on other
    // threads could cause handle promotions to become necessary here. Even if there are definitely no more
    // promotions possible in this thread's handles, we still have to stay in lock-step with those worker
    // threads that haven't finished yet (each GC worker thread has to join exactly the same number of times
    // as all the others or they'll get out of step).
    while (true)
    {
        // The various worker threads are all currently racing in this code. We need to work out if at least
        // one of them think they have work to do this cycle. Each thread needs to rescan its portion of the
        // dependent handle table when both of the following conditions apply:
        //  1) At least one (arbitrary) object might have been promoted since the last scan (because if this
        //     object happens to correspond to a primary in one of our handles we might potentially have to
        //     promote the associated secondary).
        //  2) The table for this thread has at least one handle with a secondary that isn't promoted yet.
        //
        // The first condition is represented by s_fUnscannedPromotions. This is always non-zero for the first
        // iteration of this loop (see comment above) and in subsequent cycles each thread updates this
        // whenever a mark stack overflow occurs or scanning their dependent handles results in a secondary
        // being promoted. This value is cleared back to zero in a synchronized fashion in the join that
        // follows below. Note that we can't read this outside of the join since on any iteration apart from
        // the first threads will be racing between reading this value and completing their previous
        // iteration's table scan.
        //
        // The second condition is tracked by the dependent handle code itself on a per worker thread basis
        // (and updated by the GcDhReScan() method). We call GcDhUnpromotedHandlesExist() on each thread to
        // determine the local value and collect the results into the s_fUnpromotedHandles variable in what is
        // effectively an OR operation. As per s_fUnscannedPromotions we can't read the final result until
        // we're safely joined.
        if (GCScan::GcDhUnpromotedHandlesExist(sc))
            s_fUnpromotedHandles = TRUE;

        drain_mark_queue();

        // Synchronize all the threads so we can read our state variables safely. The shared variable
        // s_fScanRequired, indicating whether we should scan the tables or terminate the loop, will be set by
        // a single thread inside the join.
        gc_t_join.join(this, gc_join_scan_dependent_handles);
        if (gc_t_join.joined())
        {
            // We're synchronized so it's safe to read our shared state variables. We update another shared
            // variable to indicate to all threads whether we'll be scanning for another cycle or terminating
            // the loop. We scan if there has been at least one object promotion since last time and at least
            // one thread has a dependent handle table with a potential handle promotion possible.
            s_fScanRequired = s_fUnscannedPromotions && s_fUnpromotedHandles;

            // Reset our shared state variables (ready to be set again on this scan or with a good initial
            // value for the next call if we're terminating the loop).
            s_fUnscannedPromotions = FALSE;
            s_fUnpromotedHandles = FALSE;

            if (!s_fScanRequired)
            {
                // We're terminating the loop. Perform any last operations that require single threaded access.
                if (!initial_scan_p)
                {
                    // On the second invocation we reconcile all mark overflow ranges across the heaps. This can help
                    // load balance if some of the heaps have an abnormally large workload.
                    uint8_t* all_heaps_max = 0;
                    uint8_t* all_heaps_min = MAX_PTR;
                    int i;
                    for (i = 0; i < n_heaps; i++)
                    {
                        if (all_heaps_max < g_heaps[i]->max_overflow_address)
                            all_heaps_max = g_heaps[i]->max_overflow_address;
                        if (all_heaps_min > g_heaps[i]->min_overflow_address)
                            all_heaps_min = g_heaps[i]->min_overflow_address;
                    }
                    for (i = 0; i < n_heaps; i++)
                    {
                        g_heaps[i]->max_overflow_address = all_heaps_max;
                        g_heaps[i]->min_overflow_address = all_heaps_min;
                    }
                }
            }

            dprintf(3, ("Starting all gc thread mark stack overflow processing"));
            gc_t_join.restart();
        }

        // Handle any mark stack overflow: scanning dependent handles relies on all previous object promotions
        // being visible. If there really was an overflow (process_mark_overflow returns true) then set the
        // global flag indicating that at least one object promotion may have occurred (the usual comment
        // about races applies). (Note it's OK to set this flag even if we're about to terminate the loop and
        // exit the method since we unconditionally set this variable on method entry anyway).
        if (process_mark_overflow(condemned_gen_number))
            s_fUnscannedPromotions = TRUE;

        // If we decided that no scan was required we can terminate the loop now.
        if (!s_fScanRequired)
            break;

        // Otherwise we must join with the other workers to ensure that all mark stack overflows have been
        // processed before we start scanning dependent handle tables (if overflows remain while we scan we
        // could miss noting the promotion of some primary objects).
        gc_t_join.join(this, gc_join_rescan_dependent_handles);
        if (gc_t_join.joined())
        {
            dprintf(3, ("Starting all gc thread for dependent handle promotion"));
            gc_t_join.restart();
        }

        // If the portion of the dependent handle table managed by this worker has handles that could still be
        // promoted perform a rescan. If the rescan resulted in at least one promotion note this fact since it
        // could require a rescan of handles on this or other workers.
        if (GCScan::GcDhUnpromotedHandlesExist(sc))
            if (GCScan::GcDhReScan(sc))
                s_fUnscannedPromotions = TRUE;
    }
}
#else //MULTIPLE_HEAPS
// Non-multiple heap version of scan_dependent_handles: much simpler without the need to keep multiple worker
// threads synchronized.
void gc_heap::scan_dependent_handles (int condemned_gen_number, ScanContext *sc, BOOL initial_scan_p)
{
    UNREFERENCED_PARAMETER(initial_scan_p);

    // Whenever we call this method there may have been preceding object promotions. So set
    // fUnscannedPromotions unconditionally (during further iterations of the scanning loop this will be set
    // based on the how the scanning proceeded).
    bool fUnscannedPromotions = true;

    // Loop until there are either no more dependent handles that can have their secondary promoted or we've
    // managed to perform a scan without promoting anything new.
    while (GCScan::GcDhUnpromotedHandlesExist(sc) && fUnscannedPromotions)
    {
        // On each iteration of the loop start with the assumption that no further objects have been promoted.
        fUnscannedPromotions = false;

        // Handle any mark stack overflow: scanning dependent handles relies on all previous object promotions
        // being visible. If there was an overflow (process_mark_overflow returned true) then additional
        // objects now appear to be promoted and we should set the flag.
        if (process_mark_overflow(condemned_gen_number))
            fUnscannedPromotions = true;

        // mark queue must be empty after process_mark_overflow
        mark_queue.verify_empty();

        // Perform the scan and set the flag if any promotions resulted.
        if (GCScan::GcDhReScan(sc))
            fUnscannedPromotions = true;
    }

    // Process any mark stack overflow that may have resulted from scanning handles (or if we didn't need to
    // scan any handles at all this is the processing of overflows that may have occurred prior to this method
    // invocation).
    process_mark_overflow(condemned_gen_number);
}
#endif //MULTIPLE_HEAPS

size_t gc_heap::get_generation_start_size (int gen_number)
{
#ifdef USE_REGIONS
    return 0;
#else
    return Align (size (generation_allocation_start (generation_of (gen_number))),
                  get_alignment_constant (gen_number <= max_generation));
#endif //!USE_REGIONS
}

inline
int gc_heap::get_num_heaps()
{
#ifdef MULTIPLE_HEAPS
    return n_heaps;
#else
    return 1;
#endif //MULTIPLE_HEAPS
}

BOOL gc_heap::decide_on_promotion_surv (size_t threshold)
{
#ifdef MULTIPLE_HEAPS
    for (int i = 0; i < gc_heap::n_heaps; i++)
    {
        gc_heap* hp = gc_heap::g_heaps[i];
#else //MULTIPLE_HEAPS
    {
        gc_heap* hp = pGenGCHeap;
        int i = 0;
#endif //MULTIPLE_HEAPS
        dynamic_data* dd = hp->dynamic_data_of (min ((int)(settings.condemned_generation + 1), (int)max_generation));
        size_t older_gen_size = dd_current_size (dd) + (dd_desired_allocation (dd) - dd_new_allocation (dd));

        size_t promoted = hp->total_promoted_bytes;

        dprintf (6666, ("h%d promotion threshold: %zd, promoted bytes: %zd size n+1: %zd -> %s",
            i, threshold, promoted, older_gen_size,
            (((threshold > (older_gen_size)) || (promoted > threshold)) ? "promote" : "don't promote")));

        if ((threshold > (older_gen_size)) || (promoted > threshold))
        {
            return TRUE;
        }
    }

    return FALSE;
}

inline
void gc_heap::fire_mark_event (int root_type, size_t& current_promoted_bytes, size_t& last_promoted_bytes)
{
#ifdef FEATURE_EVENT_TRACE
    if (informational_event_enabled_p)
    {
        current_promoted_bytes = get_promoted_bytes();
        size_t root_promoted = current_promoted_bytes - last_promoted_bytes;
        dprintf (3, ("h%d marked root %s: %zd (%zd - %zd)",
            heap_number, str_root_kinds[root_type], root_promoted,
            current_promoted_bytes, last_promoted_bytes));
        FIRE_EVENT(GCMarkWithType, heap_number, root_type, root_promoted);
        last_promoted_bytes = current_promoted_bytes;
    }
#endif // FEATURE_EVENT_TRACE
}

#ifdef FEATURE_EVENT_TRACE
inline
void gc_heap::record_mark_time (uint64_t& mark_time,
                                uint64_t& current_mark_time,
                                uint64_t& last_mark_time)
{
    if (informational_event_enabled_p)
    {
        current_mark_time = GetHighPrecisionTimeStamp();
        mark_time = limit_time_to_uint32 (current_mark_time - last_mark_time);
        dprintf (3, ("%zd - %zd = %zd",
            current_mark_time, last_mark_time, (current_mark_time - last_mark_time)));
        last_mark_time = current_mark_time;
    }
}
#endif // FEATURE_EVENT_TRACE

#ifdef USE_REGIONS
void gc_heap::verify_region_to_generation_map()
{
#ifdef _DEBUG
    uint8_t* local_ephemeral_low = MAX_PTR;
    uint8_t* local_ephemeral_high = nullptr;
    for (int gen_number = soh_gen0; gen_number < total_generation_count; gen_number++)
    {
#ifdef MULTIPLE_HEAPS
        for (int i = 0; i < n_heaps; i++)
        {
            gc_heap* hp = g_heaps[i];
#else //MULTIPLE_HEAPS
        {
            gc_heap* hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS
            generation *gen = hp->generation_of (gen_number);
            for (heap_segment *region = generation_start_segment (gen); region != nullptr; region = heap_segment_next (region))
            {
                if (heap_segment_read_only_p (region))
                {
                    // the region to generation map doesn't cover read only segments
                    continue;
                }
                size_t region_index_start = get_basic_region_index_for_address (get_region_start (region));
                size_t region_index_end = get_basic_region_index_for_address (heap_segment_reserved (region));
                int gen_num = min (gen_number, (int)soh_gen2);
                assert (gen_num == heap_segment_gen_num (region));
                int plan_gen_num = heap_segment_plan_gen_num (region);
                bool is_demoted = (region->flags & heap_segment_flags_demoted) != 0;
                bool is_sweep_in_plan = heap_segment_swept_in_plan (region);
                for (size_t region_index = region_index_start; region_index < region_index_end; region_index++)
                {
                    region_info region_info_bits = map_region_to_generation[region_index];
                    assert ((region_info_bits & RI_GEN_MASK) == gen_num);
                    assert ((region_info_bits >> RI_PLAN_GEN_SHR) == plan_gen_num);
                    assert (((region_info_bits & RI_SIP) != 0) == is_sweep_in_plan);
                    assert (((region_info_bits & RI_DEMOTED) != 0) == is_demoted);
                }
            }
        }
    }
#endif //_DEBUG
}

// recompute ephemeral range - it may have become too large because of temporary allocation
// and deallocation of regions
void gc_heap::compute_gc_and_ephemeral_range (int condemned_gen_number, bool end_of_gc_p)
{
    ephemeral_low = MAX_PTR;
    ephemeral_high = nullptr;
    gc_low = MAX_PTR;
    gc_high = nullptr;
    if (condemned_gen_number >= soh_gen2 || end_of_gc_p)
    {
        gc_low = g_gc_lowest_address;
        gc_high = g_gc_highest_address;
    }
    if (end_of_gc_p)
    {
#if 1
        // simple and safe value
        ephemeral_low = g_gc_lowest_address;
#else
        // conservative value - should still avoid changing
        // ephemeral bounds in the write barrier while app is running
        // scan our address space for a region that is either free
        // or in an ephemeral generation
        uint8_t* addr = g_gc_lowest_address;
        while (true)
        {
            heap_segment* region = get_region_info (addr);
            if (is_free_region (region))
                break;
            if (heap_segment_gen_num (region) <= soh_gen1)
                break;
            addr += ((size_t)1) << min_segment_size_shr;
        }
        ephemeral_low = addr;
#endif
        ephemeral_high = g_gc_highest_address;
    }
    else
    {
        for (int gen_number = soh_gen0; gen_number <= soh_gen1; gen_number++)
        {
#ifdef MULTIPLE_HEAPS
            for (int i = 0; i < n_heaps; i++)
            {
                gc_heap* hp = g_heaps[i];
#else //MULTIPLE_HEAPS
            {
                gc_heap* hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS
                generation *gen = hp->generation_of (gen_number);
                for (heap_segment *region = generation_start_segment (gen); region != nullptr; region = heap_segment_next (region))
                {
                    ephemeral_low = min ((uint8_t*)ephemeral_low, get_region_start (region));
                    ephemeral_high = max ((uint8_t*)ephemeral_high, heap_segment_reserved (region));
                    if (gen_number <= condemned_gen_number)
                    {
                        gc_low = min (gc_low, get_region_start (region));
                        gc_high = max (gc_high, heap_segment_reserved (region));
                    }
                }
            }
        }
    }
    dprintf (2, ("ephemeral_low = %p, ephemeral_high = %p, gc_low = %p, gc_high = %p", (uint8_t*)ephemeral_low, (uint8_t*)ephemeral_high, gc_low, gc_high));
}
#endif //USE_REGIONS

void gc_heap::mark_phase (int condemned_gen_number)
{
    assert (settings.concurrent == FALSE);

    ScanContext sc;
    sc.thread_number = heap_number;
    sc.thread_count = n_heaps;
    sc.promotion = TRUE;
    sc.concurrent = FALSE;

    dprintf (2, (ThreadStressLog::gcStartMarkMsg(), heap_number, condemned_gen_number));
    BOOL  full_p = (condemned_gen_number == max_generation);

    int gen_to_init = condemned_gen_number;
    if (condemned_gen_number == max_generation)
    {
        gen_to_init = total_generation_count - 1;
    }

    for (int gen_idx = 0; gen_idx <= gen_to_init; gen_idx++)
    {
        dynamic_data* dd = dynamic_data_of (gen_idx);
        dd_begin_data_size (dd) = generation_size (gen_idx) -
                                   dd_fragmentation (dd) -
#ifdef USE_REGIONS
                                   0;
#else
                                   get_generation_start_size (gen_idx);
#endif //USE_REGIONS
        dprintf (2, ("begin data size for gen%d is %zd", gen_idx, dd_begin_data_size (dd)));
        dd_survived_size (dd) = 0;
        dd_pinned_survived_size (dd) = 0;
        dd_artificial_pinned_survived_size (dd) = 0;
        dd_added_pinned_size (dd) = 0;
#ifdef SHORT_PLUGS
        dd_padding_size (dd) = 0;
#endif //SHORT_PLUGS
#if defined (RESPECT_LARGE_ALIGNMENT) || defined (FEATURE_STRUCTALIGN)
        dd_num_npinned_plugs (dd) = 0;
#endif //RESPECT_LARGE_ALIGNMENT || FEATURE_STRUCTALIGN
    }

    if (gen0_must_clear_bricks > 0)
        gen0_must_clear_bricks--;

    size_t last_promoted_bytes = 0;
    size_t current_promoted_bytes = 0;
#if !defined(USE_REGIONS) || defined(_DEBUG)
    init_promoted_bytes();
#endif //!USE_REGIONS || _DEBUG
    reset_mark_stack();

#ifdef SNOOP_STATS
    memset (&snoop_stat, 0, sizeof(snoop_stat));
    snoop_stat.heap_index = heap_number;
#endif //SNOOP_STATS

#ifdef MH_SC_MARK
    if (full_p)
    {
        //initialize the mark stack
        for (int i = 0; i < max_snoop_level; i++)
        {
            ((uint8_t**)(mark_stack_array))[i] = 0;
        }

        mark_stack_busy() = 1;
    }
#endif //MH_SC_MARK

    static uint32_t num_sizedrefs = 0;

#ifdef MH_SC_MARK
    static BOOL do_mark_steal_p = FALSE;
#endif //MH_SC_MARK

#ifdef FEATURE_CARD_MARKING_STEALING
    reset_card_marking_enumerators();
#endif // FEATURE_CARD_MARKING_STEALING

#ifdef STRESS_REGIONS
    heap_segment* gen0_region = generation_start_segment (generation_of (0));
    while (gen0_region)
    {
        size_t gen0_region_size = heap_segment_allocated (gen0_region) - heap_segment_mem (gen0_region);

        if (gen0_region_size > 0)
        {
            if ((num_gen0_regions % pinning_seg_interval) == 0)
            {
                dprintf (REGIONS_LOG, ("h%d potentially creating pinning in region %zx",
                    heap_number, heap_segment_mem (gen0_region)));

                int align_const = get_alignment_constant (TRUE);
                // Pinning the first and the middle object in the region.
                uint8_t* boundary = heap_segment_mem (gen0_region);
                uint8_t* obj_to_pin = boundary;
                int num_pinned_objs = 0;
                while (obj_to_pin < heap_segment_allocated (gen0_region))
                {
                    if (obj_to_pin >= boundary && !((CObjectHeader*)obj_to_pin)->IsFree())
                    {
                        pin_by_gc (obj_to_pin);
                        num_pinned_objs++;
                        if (num_pinned_objs >= 2)
                            break;
                        boundary += (gen0_region_size / 2) + 1;
                    }
                    obj_to_pin += Align (size (obj_to_pin), align_const);
                }
            }
        }

        num_gen0_regions++;
        gen0_region = heap_segment_next (gen0_region);
    }
#endif //STRESS_REGIONS

#ifdef FEATURE_EVENT_TRACE
    static uint64_t current_mark_time = 0;
    static uint64_t last_mark_time = 0;
#endif //FEATURE_EVENT_TRACE

#ifdef USE_REGIONS
    special_sweep_p = false;
#endif //USE_REGIONS

#ifdef MULTIPLE_HEAPS
    gc_t_join.join(this, gc_join_begin_mark_phase);
    if (gc_t_join.joined())
#endif //MULTIPLE_HEAPS
    {
        maxgen_size_inc_p = false;

#ifdef USE_REGIONS
        region_count = global_region_allocator.get_used_region_count();
        grow_mark_list_piece();
        verify_region_to_generation_map();
        compute_gc_and_ephemeral_range (condemned_gen_number, false);
#endif //USE_REGIONS

        GCToEEInterface::BeforeGcScanRoots(condemned_gen_number, /* is_bgc */ false, /* is_concurrent */ false);

#ifdef FEATURE_SIZED_REF_HANDLES
        num_sizedrefs = GCToEEInterface::GetTotalNumSizedRefHandles();
#endif // FEATURE_SIZED_REF_HANDLES

#ifdef FEATURE_EVENT_TRACE
        informational_event_enabled_p = EVENT_ENABLED (GCMarkWithType);
        if (informational_event_enabled_p)
        {
            last_mark_time = GetHighPrecisionTimeStamp();
            // We may not have SizedRefs to mark so init it to 0.
            gc_time_info[time_mark_sizedref] = 0;
        }
#endif //FEATURE_EVENT_TRACE

#ifdef MULTIPLE_HEAPS
#ifdef MH_SC_MARK
        if (full_p)
        {
            size_t total_heap_size = get_total_heap_size();

            if (total_heap_size > (100 * 1024 * 1024))
            {
                do_mark_steal_p = TRUE;
            }
            else
            {
                do_mark_steal_p = FALSE;
            }
        }
        else
        {
            do_mark_steal_p = FALSE;
        }
#endif //MH_SC_MARK

        gc_t_join.restart();
#endif //MULTIPLE_HEAPS
    }

    {
        //set up the mark lists from g_mark_list
        assert (g_mark_list);
#ifdef MULTIPLE_HEAPS
        mark_list_size = g_mark_list_total_size / n_heaps;
        mark_list = &g_mark_list [heap_number*mark_list_size];
#else
        mark_list = g_mark_list;
#endif //MULTIPLE_HEAPS
        //dont use the mark list for full gc
        //because multiple segments are more complex to handle and the list
        //is likely to overflow
        if (condemned_gen_number < max_generation)
            mark_list_end = &mark_list [mark_list_size-1];
        else
            mark_list_end = &mark_list [0];
        mark_list_index = &mark_list [0];

#ifdef USE_REGIONS
        if (g_mark_list_piece != nullptr)
        {
#ifdef MULTIPLE_HEAPS
            // two arrays with g_mark_list_piece_size entries per heap
            mark_list_piece_start = &g_mark_list_piece[heap_number * 2 * g_mark_list_piece_size];
            mark_list_piece_end = &mark_list_piece_start[g_mark_list_piece_size];
#endif //MULTIPLE_HEAPS
            survived_per_region = (size_t*)&g_mark_list_piece[heap_number * 2 * g_mark_list_piece_size];
            old_card_survived_per_region = (size_t*)&survived_per_region[g_mark_list_piece_size];
            size_t region_info_to_clear = region_count * sizeof (size_t);
            memset (survived_per_region, 0, region_info_to_clear);
            memset (old_card_survived_per_region, 0, region_info_to_clear);
        }
        else
        {
#ifdef MULTIPLE_HEAPS
            // disable use of mark list altogether
            mark_list_piece_start = nullptr;
            mark_list_piece_end = nullptr;
            mark_list_end = &mark_list[0];
#endif //MULTIPLE_HEAPS
            survived_per_region = nullptr;
            old_card_survived_per_region = nullptr;
        }
#endif // USE_REGIONS && MULTIPLE_HEAPS

#ifndef MULTIPLE_HEAPS
        shigh = (uint8_t*) 0;
        slow  = MAX_PTR;
#endif //MULTIPLE_HEAPS

#ifdef FEATURE_SIZED_REF_HANDLES
        if ((condemned_gen_number == max_generation) && (num_sizedrefs > 0))
        {
            GCScan::GcScanSizedRefs(GCHeap::Promote, condemned_gen_number, max_generation, &sc);
            drain_mark_queue();
            fire_mark_event (ETW::GC_ROOT_SIZEDREF, current_promoted_bytes, last_promoted_bytes);

#ifdef MULTIPLE_HEAPS
            gc_t_join.join(this, gc_join_scan_sizedref_done);
            if (gc_t_join.joined())
#endif //MULTIPLE_HEAPS
            {
#ifdef FEATURE_EVENT_TRACE
                record_mark_time (gc_time_info[time_mark_sizedref], current_mark_time, last_mark_time);
#endif //FEATURE_EVENT_TRACE

#ifdef MULTIPLE_HEAPS
                dprintf(3, ("Done with marking all sized refs. Starting all gc thread for marking other strong roots"));
                gc_t_join.restart();
#endif //MULTIPLE_HEAPS
            }
        }
#endif // FEATURE_SIZED_REF_HANDLES

#if defined(FEATURE_BASICFREEZE) && !defined(USE_REGIONS)
        if (ro_segments_in_range)
        {
            dprintf(3,("Marking in range ro segments"));
            mark_ro_segments();
            // Should fire an ETW event here.
        }
#endif //FEATURE_BASICFREEZE && !USE_REGIONS

        dprintf(3,("Marking Roots"));

        GCScan::GcScanRoots(GCHeap::Promote,
                                condemned_gen_number, max_generation,
                                &sc);
        drain_mark_queue();
        fire_mark_event (ETW::GC_ROOT_STACK, current_promoted_bytes, last_promoted_bytes);

#ifdef BACKGROUND_GC
        if (gc_heap::background_running_p())
        {
            scan_background_roots (GCHeap::Promote, heap_number, &sc);
            drain_mark_queue();
            fire_mark_event (ETW::GC_ROOT_BGC, current_promoted_bytes, last_promoted_bytes);
        }
#endif //BACKGROUND_GC

#ifdef FEATURE_PREMORTEM_FINALIZATION
        dprintf(3, ("Marking finalization data"));
        finalize_queue->GcScanRoots(GCHeap::Promote, heap_number, 0);
        drain_mark_queue();
        fire_mark_event (ETW::GC_ROOT_FQ, current_promoted_bytes, last_promoted_bytes);
#endif // FEATURE_PREMORTEM_FINALIZATION

        dprintf(3,("Marking handle table"));
        GCScan::GcScanHandles(GCHeap::Promote,
                                    condemned_gen_number, max_generation,
                                    &sc);
        drain_mark_queue();
        fire_mark_event (ETW::GC_ROOT_HANDLES, current_promoted_bytes, last_promoted_bytes);

        if (!full_p)
        {
#ifdef USE_REGIONS
            save_current_survived();
#endif //USE_REGIONS

#ifdef FEATURE_CARD_MARKING_STEALING
            n_eph_soh = 0;
            n_gen_soh = 0;
            n_eph_loh = 0;
            n_gen_loh = 0;
#endif //FEATURE_CARD_MARKING_STEALING

#ifdef CARD_BUNDLE
#ifdef MULTIPLE_HEAPS
            if (gc_t_join.r_join(this, gc_r_join_update_card_bundle))
            {
#endif //MULTIPLE_HEAPS

#ifndef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
                // If we are manually managing card bundles, every write to the card table should already be
                // accounted for in the card bundle table so there's nothing to update here.
                update_card_table_bundle();
#endif
                if (card_bundles_enabled())
                {
                    verify_card_bundles();
                }

#ifdef MULTIPLE_HEAPS
                gc_t_join.r_restart();
            }
#endif //MULTIPLE_HEAPS
#endif //CARD_BUNDLE

            card_fn mark_object_fn = &gc_heap::mark_object_simple;
#ifdef HEAP_ANALYZE
            heap_analyze_success = TRUE;
            if (heap_analyze_enabled)
            {
                internal_root_array_index = 0;
                current_obj = 0;
                current_obj_size = 0;
                mark_object_fn = &gc_heap::ha_mark_object_simple;
            }
#endif //HEAP_ANALYZE

#if defined(MULTIPLE_HEAPS) && defined(FEATURE_CARD_MARKING_STEALING)
            if (!card_mark_done_soh)
#endif // MULTIPLE_HEAPS && FEATURE_CARD_MARKING_STEALING
            {
                dprintf (3, ("Marking cross generation pointers on heap %d", heap_number));
                mark_through_cards_for_segments(mark_object_fn, FALSE THIS_ARG);
#if defined(MULTIPLE_HEAPS) && defined(FEATURE_CARD_MARKING_STEALING)
                card_mark_done_soh = true;
#endif // MULTIPLE_HEAPS && FEATURE_CARD_MARKING_STEALING
            }

#if defined(MULTIPLE_HEAPS) && defined(FEATURE_CARD_MARKING_STEALING)
            if (!card_mark_done_uoh)
#endif // MULTIPLE_HEAPS && FEATURE_CARD_MARKING_STEALING
            {
                dprintf (3, ("Marking cross generation pointers for uoh objects on heap %d", heap_number));
                for (int i = uoh_start_generation; i < total_generation_count; i++)
                {
#ifndef ALLOW_REFERENCES_IN_POH
                    if (i != poh_generation)
#endif //ALLOW_REFERENCES_IN_POH
                        mark_through_cards_for_uoh_objects(mark_object_fn, i, FALSE THIS_ARG);
                }

#if defined(MULTIPLE_HEAPS) && defined(FEATURE_CARD_MARKING_STEALING)
                card_mark_done_uoh = true;
#endif // MULTIPLE_HEAPS && FEATURE_CARD_MARKING_STEALING
            }

#if defined(MULTIPLE_HEAPS) && defined(FEATURE_CARD_MARKING_STEALING)
            // check the other heaps cyclically and try to help out where the marking isn't done
            for (int i = 0; i < gc_heap::n_heaps; i++)
            {
                int heap_number_to_look_at = (i + heap_number) % gc_heap::n_heaps;
                gc_heap* hp = gc_heap::g_heaps[heap_number_to_look_at];
                if (!hp->card_mark_done_soh)
                {
                    dprintf(3, ("Marking cross generation pointers on heap %d", hp->heap_number));
                    hp->mark_through_cards_for_segments(mark_object_fn, FALSE THIS_ARG);
                    hp->card_mark_done_soh = true;
                }

                if (!hp->card_mark_done_uoh)
                {
                    dprintf(3, ("Marking cross generation pointers for large objects on heap %d", hp->heap_number));
                    for (int i = uoh_start_generation; i < total_generation_count; i++)
                    {
#ifndef ALLOW_REFERENCES_IN_POH
                        if (i != poh_generation)
#endif //ALLOW_REFERENCES_IN_POH
                            hp->mark_through_cards_for_uoh_objects(mark_object_fn, i, FALSE THIS_ARG);
                    }

                    hp->card_mark_done_uoh = true;
                }
            }
#endif // MULTIPLE_HEAPS && FEATURE_CARD_MARKING_STEALING

            drain_mark_queue();

#ifdef USE_REGIONS
            update_old_card_survived();
#endif //USE_REGIONS

            fire_mark_event (ETW::GC_ROOT_OLDER, current_promoted_bytes, last_promoted_bytes);
        }
    }

#ifdef MH_SC_MARK
    if (do_mark_steal_p)
    {
        mark_steal();
        drain_mark_queue();
        fire_mark_event (ETW::GC_ROOT_STEAL, current_promoted_bytes, last_promoted_bytes);
    }
#endif //MH_SC_MARK

    // Dependent handles need to be scanned with a special algorithm (see the header comment on
    // scan_dependent_handles for more detail). We perform an initial scan without synchronizing with other
    // worker threads or processing any mark stack overflow. This is not guaranteed to complete the operation
    // but in a common case (where there are no dependent handles that are due to be collected) it allows us
    // to optimize away further scans. The call to scan_dependent_handles is what will cycle through more
    // iterations if required and will also perform processing of any mark stack overflow once the dependent
    // handle table has been fully promoted.
    GCScan::GcDhInitialScan(GCHeap::Promote, condemned_gen_number, max_generation, &sc);
    scan_dependent_handles(condemned_gen_number, &sc, true);

    // mark queue must be empty after scan_dependent_handles
    mark_queue.verify_empty();
    fire_mark_event (ETW::GC_ROOT_DH_HANDLES, current_promoted_bytes, last_promoted_bytes);

#ifdef FEATURE_JAVAMARSHAL

#ifdef MULTIPLE_HEAPS
    dprintf(3, ("Joining for short weak handle scan"));
    gc_t_join.join(this, gc_join_bridge_processing);
    if (gc_t_join.joined())
    {
#endif //MULTIPLE_HEAPS
        global_bridge_list = GCScan::GcProcessBridgeObjects (condemned_gen_number, max_generation, &sc, &num_global_bridge_objs);

#ifdef MULTIPLE_HEAPS
        dprintf (3, ("Starting all gc thread after bridge processing"));
        gc_t_join.restart();
    }
#endif //MULTIPLE_HEAPS

    {
        int thread = heap_number;
        // Each thread will receive an equal chunk of bridge objects, with the last thread
        // handling a few more objects from the remainder.
        size_t count_per_heap = num_global_bridge_objs / n_heaps;
        size_t start_index = thread * count_per_heap;
        size_t end_index = (thread == n_heaps - 1) ? num_global_bridge_objs : (thread + 1) * count_per_heap;

        for (size_t obj_idx = start_index; obj_idx < end_index; obj_idx++)
        {
            mark_object_simple (&global_bridge_list[obj_idx] THREAD_NUMBER_ARG);
        }

        drain_mark_queue();
        // using GC_ROOT_DH_HANDLES temporarily. add a new value for GC_ROOT_BRIDGE
        fire_mark_event (ETW::GC_ROOT_DH_HANDLES, current_promoted_bytes, last_promoted_bytes);
    }
#endif //FEATURE_JAVAMARSHAL

#ifdef MULTIPLE_HEAPS
    dprintf(3, ("Joining for short weak handle scan"));
    gc_t_join.join(this, gc_join_null_dead_short_weak);
    if (gc_t_join.joined())
#endif //MULTIPLE_HEAPS
    {
#ifdef FEATURE_EVENT_TRACE
        record_mark_time (gc_time_info[time_mark_roots], current_mark_time, last_mark_time);
#endif //FEATURE_EVENT_TRACE

        uint64_t promoted_bytes_global = 0;
#ifdef HEAP_ANALYZE
        heap_analyze_enabled = FALSE;
#ifdef MULTIPLE_HEAPS
        for (int i = 0; i < n_heaps; i++)
        {
            promoted_bytes_global += g_heaps[i]->get_promoted_bytes();
        }
#else
        promoted_bytes_global = get_promoted_bytes();
#endif //MULTIPLE_HEAPS

        GCToEEInterface::AnalyzeSurvivorsFinished (settings.gc_index, condemned_gen_number, promoted_bytes_global, GCHeap::ReportGenerationBounds);
#endif // HEAP_ANALYZE
        GCToEEInterface::AfterGcScanRoots (condemned_gen_number, max_generation, &sc);

#ifdef MULTIPLE_HEAPS
        if (!full_p)
        {
            // we used r_join and need to reinitialize states for it here.
            gc_t_join.r_init();
        }

        dprintf(3, ("Starting all gc thread for short weak handle scan"));
        gc_t_join.restart();
#endif //MULTIPLE_HEAPS
    }

#ifdef FEATURE_CARD_MARKING_STEALING
    reset_card_marking_enumerators();

    if (!full_p)
    {
        int generation_skip_ratio_soh = ((n_eph_soh > MIN_SOH_CROSS_GEN_REFS) ?
                                         (int)(((float)n_gen_soh / (float)n_eph_soh) * 100) : 100);
        int generation_skip_ratio_loh = ((n_eph_loh > MIN_LOH_CROSS_GEN_REFS) ?
                                         (int)(((float)n_gen_loh / (float)n_eph_loh) * 100) : 100);

        generation_skip_ratio = min (generation_skip_ratio_soh, generation_skip_ratio_loh);
#ifdef SIMPLE_DPRINTF
        dprintf (6666, ("h%d skip ratio soh: %d (n_gen_soh: %Id, n_eph_soh: %Id), loh: %d (n_gen_loh: %Id, n_eph_loh: %Id), size 0: %Id-%Id, 1: %Id-%Id, 2: %Id-%Id, 3: %Id-%Id",
            heap_number,
            generation_skip_ratio_soh, VolatileLoadWithoutBarrier (&n_gen_soh), VolatileLoadWithoutBarrier (&n_eph_soh),
            generation_skip_ratio_loh, VolatileLoadWithoutBarrier (&n_gen_loh), VolatileLoadWithoutBarrier (&n_eph_loh),
            generation_size (0), dd_fragmentation (dynamic_data_of (0)),
            generation_size (1), dd_fragmentation (dynamic_data_of (1)),
            generation_size (2), dd_fragmentation (dynamic_data_of (2)),
            generation_size (3), dd_fragmentation (dynamic_data_of (3))));
#endif //SIMPLE_DPRINTF
    }
#endif // FEATURE_CARD_MARKING_STEALING

    // null out the target of short weakref that were not promoted.
    GCScan::GcShortWeakPtrScan (condemned_gen_number, max_generation,&sc);

#ifdef MULTIPLE_HEAPS
    dprintf(3, ("Joining for finalization"));
    gc_t_join.join(this, gc_join_scan_finalization);
    if (gc_t_join.joined())
    {
#endif //MULTIPLE_HEAPS

#ifdef FEATURE_EVENT_TRACE
        record_mark_time (gc_time_info[time_mark_short_weak], current_mark_time, last_mark_time);
#endif //FEATURE_EVENT_TRACE

#ifdef MULTIPLE_HEAPS
        dprintf(3, ("Starting all gc thread for Finalization"));
        gc_t_join.restart();
    }
#endif //MULTIPLE_HEAPS

    //Handle finalization.
    size_t promoted_bytes_live = get_promoted_bytes();

#ifdef FEATURE_PREMORTEM_FINALIZATION
    dprintf (3, ("Finalize marking"));
    finalize_queue->ScanForFinalization (GCHeap::Promote, condemned_gen_number, __this);
    drain_mark_queue();
    fire_mark_event (ETW::GC_ROOT_NEW_FQ, current_promoted_bytes, last_promoted_bytes);
    GCToEEInterface::DiagWalkFReachableObjects(__this);

    // Scan dependent handles again to promote any secondaries associated with primaries that were promoted
    // for finalization. As before scan_dependent_handles will also process any mark stack overflow.
    scan_dependent_handles(condemned_gen_number, &sc, false);

    // mark queue must be empty after scan_dependent_handles
    mark_queue.verify_empty();
    fire_mark_event (ETW::GC_ROOT_DH_HANDLES, current_promoted_bytes, last_promoted_bytes);
#endif //FEATURE_PREMORTEM_FINALIZATION

    total_promoted_bytes = get_promoted_bytes();

#ifdef MULTIPLE_HEAPS
    static VOLATILE(int32_t) syncblock_scan_p;
    dprintf(3, ("Joining for weak pointer deletion"));
    gc_t_join.join(this, gc_join_null_dead_long_weak);
    if (gc_t_join.joined())
    {
        dprintf(3, ("Starting all gc thread for weak pointer deletion"));
#endif //MULTIPLE_HEAPS

#ifdef FEATURE_EVENT_TRACE
        record_mark_time (gc_time_info[time_mark_scan_finalization], current_mark_time, last_mark_time);
#endif //FEATURE_EVENT_TRACE

#ifdef USE_REGIONS
        sync_promoted_bytes();
        equalize_promoted_bytes(settings.condemned_generation);
#endif //USE_REGIONS

#ifdef MULTIPLE_HEAPS
        syncblock_scan_p = 0;
        gc_t_join.restart();
    }
#endif //MULTIPLE_HEAPS

    // null out the target of long weakref that were not promoted.
    GCScan::GcWeakPtrScan (condemned_gen_number, max_generation, &sc);

#ifdef MULTIPLE_HEAPS
    size_t total_mark_list_size = sort_mark_list();
    // first thread to finish sorting will scan the sync syncblk cache
    if ((syncblock_scan_p == 0) && (Interlocked::Increment(&syncblock_scan_p) == 1))
#endif //MULTIPLE_HEAPS
    {
        // scan for deleted entries in the syncblk cache
        GCScan::GcWeakPtrScanBySingleThread(condemned_gen_number, max_generation, &sc);
    }

#ifdef MULTIPLE_HEAPS
    dprintf (3, ("Joining for sync block cache entry scanning"));
    gc_t_join.join(this, gc_join_null_dead_syncblk);
    if (gc_t_join.joined())
#endif //MULTIPLE_HEAPS
    {
#ifdef FEATURE_EVENT_TRACE
        record_mark_time (gc_time_info[time_mark_long_weak], current_mark_time, last_mark_time);
        gc_time_info[time_plan] = last_mark_time;
#endif //FEATURE_EVENT_TRACE

        //decide on promotion
        if (!settings.promotion)
        {
            size_t m = 0;
            for (int n = 0; n <= condemned_gen_number;n++)
            {
#ifdef MULTIPLE_HEAPS
                m +=  (size_t)(dd_min_size (dynamic_data_of (n))*(n+1)*0.1);
#else
                m +=  (size_t)(dd_min_size (dynamic_data_of (n))*(n+1)*0.06);
#endif //MULTIPLE_HEAPS
            }

            settings.promotion = decide_on_promotion_surv (m);
        }

#ifdef MULTIPLE_HEAPS
#ifdef SNOOP_STATS
        if (do_mark_steal_p)
        {
            size_t objects_checked_count = 0;
            size_t zero_ref_count = 0;
            size_t objects_marked_count = 0;
            size_t check_level_count = 0;
            size_t busy_count = 0;
            size_t interlocked_count = 0;
            size_t partial_mark_parent_count = 0;
            size_t stolen_or_pm_count = 0;
            size_t stolen_entry_count = 0;
            size_t pm_not_ready_count = 0;
            size_t normal_count = 0;
            size_t stack_bottom_clear_count = 0;

            for (int i = 0; i < n_heaps; i++)
            {
                gc_heap* hp = g_heaps[i];
                hp->print_snoop_stat();
                objects_checked_count += hp->snoop_stat.objects_checked_count;
                zero_ref_count += hp->snoop_stat.zero_ref_count;
                objects_marked_count += hp->snoop_stat.objects_marked_count;
                check_level_count += hp->snoop_stat.check_level_count;
                busy_count += hp->snoop_stat.busy_count;
                interlocked_count += hp->snoop_stat.interlocked_count;
                partial_mark_parent_count += hp->snoop_stat.partial_mark_parent_count;
                stolen_or_pm_count += hp->snoop_stat.stolen_or_pm_count;
                stolen_entry_count += hp->snoop_stat.stolen_entry_count;
                pm_not_ready_count += hp->snoop_stat.pm_not_ready_count;
                normal_count += hp->snoop_stat.normal_count;
                stack_bottom_clear_count += hp->snoop_stat.stack_bottom_clear_count;
            }

            fflush (stdout);

            printf ("-------total stats-------\n");
            printf ("%8s | %8s | %8s | %8s | %8s | %8s | %8s | %8s | %8s | %8s | %8s | %8s\n",
                "checked", "zero", "marked", "level", "busy", "xchg", "pmparent", "s_pm", "stolen", "nready", "normal", "clear");
            printf ("%8d | %8d | %8d | %8d | %8d | %8d | %8d | %8d | %8d | %8d | %8d | %8d\n",
                objects_checked_count,
                zero_ref_count,
                objects_marked_count,
                check_level_count,
                busy_count,
                interlocked_count,
                partial_mark_parent_count,
                stolen_or_pm_count,
                stolen_entry_count,
                pm_not_ready_count,
                normal_count,
                stack_bottom_clear_count);
        }
#endif //SNOOP_STATS

        dprintf(3, ("Starting all threads for end of mark phase"));
        gc_t_join.restart();
#endif //MULTIPLE_HEAPS
    }

#if defined(MULTIPLE_HEAPS) && !defined(USE_REGIONS)
    merge_mark_lists (total_mark_list_size);
#endif //MULTIPLE_HEAPS && !USE_REGIONS

    finalization_promoted_bytes = total_promoted_bytes - promoted_bytes_live;

    mark_queue.verify_empty();

    dprintf(2,("---- End of mark phase ----"));
}

inline
void gc_heap::pin_object (uint8_t* o, uint8_t** ppObject)
{
    dprintf (3, ("Pinning %zx->%zx", (size_t)ppObject, (size_t)o));
    set_pinned (o);

#ifdef FEATURE_EVENT_TRACE
    if(EVENT_ENABLED(PinObjectAtGCTime))
    {
        fire_etw_pin_object_event(o, ppObject);
    }
#endif // FEATURE_EVENT_TRACE

    num_pinned_objects++;
}

size_t gc_heap::get_total_pinned_objects()
{
#ifdef MULTIPLE_HEAPS
    size_t total_num_pinned_objects = 0;
    for (int i = 0; i < gc_heap::n_heaps; i++)
    {
        gc_heap* hp = gc_heap::g_heaps[i];
        total_num_pinned_objects += hp->num_pinned_objects;
    }
    return total_num_pinned_objects;
#else //MULTIPLE_HEAPS
    return num_pinned_objects;
#endif //MULTIPLE_HEAPS
}

void gc_heap::reinit_pinned_objects()
{
#ifdef MULTIPLE_HEAPS
    for (int i = 0; i < gc_heap::n_heaps; i++)
    {
        gc_heap::g_heaps[i]->num_pinned_objects = 0;
    }
#else //MULTIPLE_HEAPS
    num_pinned_objects = 0;
#endif //MULTIPLE_HEAPS
}

void gc_heap::reset_mark_stack ()
{
    reset_pinned_queue();
    max_overflow_address = 0;
    min_overflow_address = MAX_PTR;
}

#ifdef FEATURE_STRUCTALIGN
//
// The word with left child, right child, and align info is laid out as follows:
//
//      |   upper short word   |   lower short word   |
//      |<------------> <----->|<------------> <----->|
//      |  left child   info hi| right child   info lo|
// x86: |    10 bits     6 bits|   10 bits      6 bits|
//
// where left/right child are signed values and concat(info hi, info lo) is unsigned.
//
// The "align info" encodes two numbers: the required alignment (a power of two)
// and the misalignment (the number of machine words the destination address needs
// to be adjusted by to provide alignment - so this number is always smaller than
// the required alignment).  Thus, the two can be represented as the "logical or"
// of the two numbers.  Note that the actual pad is computed from the misalignment
// by adding the alignment iff the misalignment is non-zero and less than min_obj_size.
//

// The number of bits in a brick.
#if defined (TARGET_AMD64)
#define brick_bits (12)
#else
#define brick_bits (11)
#endif //TARGET_AMD64
static_assert(brick_size == (1 << brick_bits));

// The number of bits needed to represent the offset to a child node.
// "brick_bits + 1" allows us to represent a signed offset within a brick.
#define child_bits (brick_bits + 1 - LOG2_PTRSIZE)

// The number of bits in each of the pad hi, pad lo fields.
#define pad_bits (sizeof(short) * 8 - child_bits)

#define child_from_short(w) (((signed short)(w) / (1 << (pad_bits - LOG2_PTRSIZE))) & ~((1 << LOG2_PTRSIZE) - 1))
#define pad_mask ((1 << pad_bits) - 1)
#define pad_from_short(w) ((size_t)(w) & pad_mask)
#else // FEATURE_STRUCTALIGN
#define child_from_short(w) (w)
#endif // FEATURE_STRUCTALIGN

inline
short node_left_child(uint8_t* node)
{
    return child_from_short(((plug_and_pair*)node)[-1].m_pair.left);
}

inline
void set_node_left_child(uint8_t* node, ptrdiff_t val)
{
    assert (val > -(ptrdiff_t)brick_size);
    assert (val < (ptrdiff_t)brick_size);
    assert (Aligned (val));
#ifdef FEATURE_STRUCTALIGN
    size_t pad = pad_from_short(((plug_and_pair*)node)[-1].m_pair.left);
    ((plug_and_pair*)node)[-1].m_pair.left = ((short)val << (pad_bits - LOG2_PTRSIZE)) | (short)pad;
#else // FEATURE_STRUCTALIGN
    ((plug_and_pair*)node)[-1].m_pair.left = (short)val;
#endif // FEATURE_STRUCTALIGN
    assert (node_left_child (node) == val);
}

inline
short node_right_child(uint8_t* node)
{
    return child_from_short(((plug_and_pair*)node)[-1].m_pair.right);
}

inline
void set_node_right_child(uint8_t* node, ptrdiff_t val)
{
    assert (val > -(ptrdiff_t)brick_size);
    assert (val < (ptrdiff_t)brick_size);
    assert (Aligned (val));
#ifdef FEATURE_STRUCTALIGN
    size_t pad = pad_from_short(((plug_and_pair*)node)[-1].m_pair.right);
    ((plug_and_pair*)node)[-1].m_pair.right = ((short)val << (pad_bits - LOG2_PTRSIZE)) | (short)pad;
#else // FEATURE_STRUCTALIGN
    ((plug_and_pair*)node)[-1].m_pair.right = (short)val;
#endif // FEATURE_STRUCTALIGN
    assert (node_right_child (node) == val);
}

#ifdef FEATURE_STRUCTALIGN
void node_aligninfo (uint8_t* node, int& requiredAlignment, ptrdiff_t& pad)
{
    // Extract the single-number aligninfo from the fields.
    short left = ((plug_and_pair*)node)[-1].m_pair.left;
    short right = ((plug_and_pair*)node)[-1].m_pair.right;
    ptrdiff_t pad_shifted = (pad_from_short(left) << pad_bits) | pad_from_short(right);
    ptrdiff_t aligninfo = pad_shifted * DATA_ALIGNMENT;

    // Replicate the topmost bit into all lower bits.
    ptrdiff_t x = aligninfo;
    x |= x >> 8;
    x |= x >> 4;
    x |= x >> 2;
    x |= x >> 1;

    // Clear all bits but the highest.
    requiredAlignment = (int)(x ^ (x >> 1));
    pad = aligninfo - requiredAlignment;
    pad += AdjustmentForMinPadSize(pad, requiredAlignment);
}

inline
ptrdiff_t node_alignpad (uint8_t* node)
{
    int requiredAlignment;
    ptrdiff_t alignpad;
    node_aligninfo (node, requiredAlignment, alignpad);
    return alignpad;
}

void clear_node_aligninfo (uint8_t* node)
{
    ((plug_and_pair*)node)[-1].m_pair.left &= ~0 << pad_bits;
    ((plug_and_pair*)node)[-1].m_pair.right &= ~0 << pad_bits;
}

void set_node_aligninfo (uint8_t* node, int requiredAlignment, ptrdiff_t pad)
{
    // Encode the alignment requirement and alignment offset as a single number
    // as described above.
    ptrdiff_t aligninfo = (size_t)requiredAlignment + (pad & (requiredAlignment-1));
    assert (Aligned (aligninfo));
    ptrdiff_t aligninfo_shifted = aligninfo / DATA_ALIGNMENT;
    assert (aligninfo_shifted < (1 << (pad_bits + pad_bits)));

    ptrdiff_t hi = aligninfo_shifted >> pad_bits;
    assert (pad_from_short(((plug_and_gap*)node)[-1].m_pair.left) == 0);
    ((plug_and_pair*)node)[-1].m_pair.left |= hi;

    ptrdiff_t lo = aligninfo_shifted & pad_mask;
    assert (pad_from_short(((plug_and_gap*)node)[-1].m_pair.right) == 0);
    ((plug_and_pair*)node)[-1].m_pair.right |= lo;

#ifdef _DEBUG
    int requiredAlignment2;
    ptrdiff_t pad2;
    node_aligninfo (node, requiredAlignment2, pad2);
    assert (requiredAlignment == requiredAlignment2);
    assert (pad == pad2);
#endif // _DEBUG
}
#endif // FEATURE_STRUCTALIGN

inline
void loh_set_node_relocation_distance(uint8_t* node, ptrdiff_t val)
{
    ptrdiff_t* place = &(((loh_obj_and_pad*)node)[-1].reloc);
    *place = val;
}

inline
ptrdiff_t loh_node_relocation_distance(uint8_t* node)
{
    return (((loh_obj_and_pad*)node)[-1].reloc);
}

inline
ptrdiff_t node_relocation_distance (uint8_t* node)
{
    return (((plug_and_reloc*)(node))[-1].reloc & ~3);
}

inline
void set_node_relocation_distance(uint8_t* node, ptrdiff_t val)
{
    assert (val == (val & ~3));
    ptrdiff_t* place = &(((plug_and_reloc*)node)[-1].reloc);
    //clear the left bit and the relocation field
    *place &= 1;
    *place |= val;
}

#define node_left_p(node) (((plug_and_reloc*)(node))[-1].reloc & 2)

#define set_node_left(node) ((plug_and_reloc*)(node))[-1].reloc |= 2;

#ifndef FEATURE_STRUCTALIGN
void set_node_realigned(uint8_t* node)
{
    ((plug_and_reloc*)(node))[-1].reloc |= 1;
}

void clear_node_realigned(uint8_t* node)
{
#ifdef RESPECT_LARGE_ALIGNMENT
    ((plug_and_reloc*)(node))[-1].reloc &= ~1;
#else
    UNREFERENCED_PARAMETER(node);
#endif //RESPECT_LARGE_ALIGNMENT
}
#endif // FEATURE_STRUCTALIGN

inline
size_t  node_gap_size (uint8_t* node)
{
    return ((plug_and_gap *)node)[-1].gap;
}

void set_gap_size (uint8_t* node, size_t size)
{
    assert (Aligned (size));

    // clear the 2 uint32_t used by the node.
    ((plug_and_gap *)node)[-1].reloc = 0;
    ((plug_and_gap *)node)[-1].lr =0;
    ((plug_and_gap *)node)[-1].gap = size;

    assert ((size == 0 )||(size >= sizeof(plug_and_reloc)));

}

uint8_t* gc_heap::insert_node (uint8_t* new_node, size_t sequence_number,
                   uint8_t* tree, uint8_t* last_node)
{
    dprintf (3, ("IN: %zx(%zx), T: %zx(%zx), L: %zx(%zx) [%zx]",
                 (size_t)new_node, brick_of(new_node),
                 (size_t)tree, brick_of(tree),
                 (size_t)last_node, brick_of(last_node),
                 sequence_number));
    if (power_of_two_p (sequence_number))
    {
        set_node_left_child (new_node, (tree - new_node));
        dprintf (3, ("NT: %zx, LC->%zx", (size_t)new_node, (tree - new_node)));
        tree = new_node;
    }
    else
    {
        if (oddp (sequence_number))
        {
            set_node_right_child (last_node, (new_node - last_node));
            dprintf (3, ("%p RC->%zx", last_node, (new_node - last_node)));
        }
        else
        {
            uint8_t*  earlier_node = tree;
            size_t imax = logcount(sequence_number) - 2;
            for (size_t i = 0; i != imax; i++)
            {
                earlier_node = earlier_node + node_right_child (earlier_node);
            }
            int tmp_offset = node_right_child (earlier_node);
            assert (tmp_offset); // should never be empty
            set_node_left_child (new_node, ((earlier_node + tmp_offset ) - new_node));
            set_node_right_child (earlier_node, (new_node - earlier_node));

            dprintf (3, ("%p LC->%zx, %p RC->%zx",
                new_node, ((earlier_node + tmp_offset ) - new_node),
                earlier_node, (new_node - earlier_node)));
        }
    }
    return tree;
}

size_t gc_heap::update_brick_table (uint8_t* tree, size_t current_brick,
                                    uint8_t* x, uint8_t* plug_end)
{
    dprintf (3, ("tree: %p, current b: %zx, x: %p, plug_end: %p",
        tree, current_brick, x, plug_end));

    if (tree != NULL)
    {
        dprintf (3, ("b- %zx->%zx pointing to tree %p",
            current_brick, (size_t)(tree - brick_address (current_brick)), tree));
        set_brick (current_brick, (tree - brick_address (current_brick)));
    }
    else
    {
        dprintf (3, ("b- %zx->-1", current_brick));
        set_brick (current_brick, -1);
    }
    size_t  b = 1 + current_brick;
    ptrdiff_t  offset = 0;
    size_t last_br = brick_of (plug_end-1);
    current_brick = brick_of (x-1);
    dprintf (3, ("ubt: %zx->%zx]->%zx]", b, last_br, current_brick));
    while (b <= current_brick)
    {
        if (b <= last_br)
        {
            set_brick (b, --offset);
        }
        else
        {
            set_brick (b,-1);
        }
        b++;
    }
    return brick_of (x);
}

#ifndef USE_REGIONS
void gc_heap::plan_generation_start (generation* gen, generation* consing_gen, uint8_t* next_plug_to_allocate)
{
#ifdef HOST_64BIT
    // We should never demote big plugs to gen0.
    if (gen == youngest_generation)
    {
        heap_segment* seg = ephemeral_heap_segment;
        size_t mark_stack_large_bos = mark_stack_bos;
        size_t large_plug_pos = 0;
        while (mark_stack_large_bos < mark_stack_tos)
        {
            if (mark_stack_array[mark_stack_large_bos].len > demotion_plug_len_th)
            {
                while (mark_stack_bos <= mark_stack_large_bos)
                {
                    size_t entry = deque_pinned_plug();
                    size_t len = pinned_len (pinned_plug_of (entry));
                    uint8_t* plug = pinned_plug (pinned_plug_of(entry));
                    if (len > demotion_plug_len_th)
                    {
                        dprintf (2, ("ps(%d): S %p (%zd)(%p)", gen->gen_num, plug, len, (plug+len)));
                    }
                    pinned_len (pinned_plug_of (entry)) = plug - generation_allocation_pointer (consing_gen);
                    assert(mark_stack_array[entry].len == 0 ||
                            mark_stack_array[entry].len >= Align(min_obj_size));
                    generation_allocation_pointer (consing_gen) = plug + len;
                    generation_allocation_limit (consing_gen) = heap_segment_plan_allocated (seg);
                    set_allocator_next_pin (consing_gen);
                }
            }

            mark_stack_large_bos++;
        }
    }
#endif // HOST_64BIT

    generation_plan_allocation_start (gen) =
        allocate_in_condemned_generations (consing_gen, Align (min_obj_size), -1);
    generation_plan_allocation_start_size (gen) = Align (min_obj_size);
    size_t allocation_left = (size_t)(generation_allocation_limit (consing_gen) - generation_allocation_pointer (consing_gen));
    if (next_plug_to_allocate)
    {
        size_t dist_to_next_plug = (size_t)(next_plug_to_allocate - generation_allocation_pointer (consing_gen));
        if (allocation_left > dist_to_next_plug)
        {
            allocation_left = dist_to_next_plug;
        }
    }
    if (allocation_left < Align (min_obj_size))
    {
        generation_plan_allocation_start_size (gen) += allocation_left;
        generation_allocation_pointer (consing_gen) += allocation_left;
    }

    dprintf (2, ("plan alloc gen%d(%p) start at %zx (ptr: %p, limit: %p, next: %p)", gen->gen_num,
        generation_plan_allocation_start (gen),
        generation_plan_allocation_start_size (gen),
        generation_allocation_pointer (consing_gen), generation_allocation_limit (consing_gen),
        next_plug_to_allocate));
}

void gc_heap::realloc_plan_generation_start (generation* gen, generation* consing_gen)
{
    BOOL adjacentp = FALSE;

    generation_plan_allocation_start (gen) =
        allocate_in_expanded_heap (consing_gen, Align(min_obj_size), adjacentp, 0,
#ifdef SHORT_PLUGS
                                   FALSE, NULL,
#endif //SHORT_PLUGS
                                   FALSE, -1 REQD_ALIGN_AND_OFFSET_ARG);

    generation_plan_allocation_start_size (gen) = Align (min_obj_size);
    size_t allocation_left = (size_t)(generation_allocation_limit (consing_gen) - generation_allocation_pointer (consing_gen));
    if ((allocation_left < Align (min_obj_size)) &&
         (generation_allocation_limit (consing_gen)!=heap_segment_plan_allocated (generation_allocation_segment (consing_gen))))
    {
        generation_plan_allocation_start_size (gen) += allocation_left;
        generation_allocation_pointer (consing_gen) += allocation_left;
    }

    dprintf (2, ("plan re-alloc gen%d start at %p (ptr: %p, limit: %p)", gen->gen_num,
        generation_plan_allocation_start (consing_gen),
        generation_allocation_pointer (consing_gen),
        generation_allocation_limit (consing_gen)));
}

void gc_heap::plan_generation_starts (generation*& consing_gen)
{
    //make sure that every generation has a planned allocation start
    int  gen_number = settings.condemned_generation;
    while (gen_number >= 0)
    {
        if (gen_number < max_generation)
        {
            consing_gen = ensure_ephemeral_heap_segment (consing_gen);
        }
        generation* gen = generation_of (gen_number);
        if (0 == generation_plan_allocation_start (gen))
        {
            plan_generation_start (gen, consing_gen, 0);
            assert (generation_plan_allocation_start (gen));
        }
        gen_number--;
    }
    // now we know the planned allocation size
    heap_segment_plan_allocated (ephemeral_heap_segment) =
        generation_allocation_pointer (consing_gen);
}

void gc_heap::advance_pins_for_demotion (generation* gen)
{
    uint8_t* original_youngest_start = generation_allocation_start (youngest_generation);
    heap_segment* seg = ephemeral_heap_segment;

    if ((!(pinned_plug_que_empty_p())))
    {
        size_t gen1_pinned_promoted = generation_pinned_allocation_compact_size (generation_of (max_generation));
        size_t gen1_pins_left = dd_pinned_survived_size (dynamic_data_of (max_generation - 1)) - gen1_pinned_promoted;
        size_t total_space_to_skip = last_gen1_pin_end - generation_allocation_pointer (gen);
        float pin_frag_ratio = (float)gen1_pins_left / (float)total_space_to_skip;
        float pin_surv_ratio = (float)gen1_pins_left / (float)(dd_survived_size (dynamic_data_of (max_generation - 1)));
        bool actual_promote_gen1_pins_p = decide_on_gen1_pin_promotion (pin_frag_ratio, pin_surv_ratio);
        if (actual_promote_gen1_pins_p)
        {
            while (!pinned_plug_que_empty_p() &&
                    (pinned_plug (oldest_pin()) < original_youngest_start))
            {
                size_t entry = deque_pinned_plug();
                size_t len = pinned_len (pinned_plug_of (entry));
                uint8_t* plug = pinned_plug (pinned_plug_of(entry));
                pinned_len (pinned_plug_of (entry)) = plug - generation_allocation_pointer (gen);
                assert(mark_stack_array[entry].len == 0 ||
                        mark_stack_array[entry].len >= Align(min_obj_size));
                generation_allocation_pointer (gen) = plug + len;
                generation_allocation_limit (gen) = heap_segment_plan_allocated (seg);
                set_allocator_next_pin (gen);
                attribute_pin_higher_gen_alloc (plug, len);

                dprintf (2, ("skipping gap %zu, pin %p (%zd)",
                    pinned_len (pinned_plug_of (entry)), plug, len));
            }
        }
        dprintf (2, ("ad_p_d: PL: %zd, SL: %zd, pfr: %d, psr: %d",
            gen1_pins_left, total_space_to_skip, (int)(pin_frag_ratio*100), (int)(pin_surv_ratio*100)));
    }
}

void gc_heap::process_ephemeral_boundaries (uint8_t* x,
                                            int& active_new_gen_number,
                                            int& active_old_gen_number,
                                            generation*& consing_gen,
                                            BOOL& allocate_in_condemned)
{
retry:
    if ((active_old_gen_number > 0) &&
        (x >= generation_allocation_start (generation_of (active_old_gen_number - 1))))
    {
        dprintf (2, ("crossing gen%d, x is %p", active_old_gen_number - 1, x));

        if (!pinned_plug_que_empty_p())
        {
            dprintf (2, ("oldest pin: %p(%zd)",
                pinned_plug (oldest_pin()),
                (x - pinned_plug (oldest_pin()))));
        }

        if (active_old_gen_number <= (settings.promotion ? (max_generation - 1) : max_generation))
        {
            active_new_gen_number--;
        }

        active_old_gen_number--;
        assert ((!settings.promotion) || (active_new_gen_number>0));

        if (active_new_gen_number == (max_generation - 1))
        {
#ifdef FREE_USAGE_STATS
            if (settings.condemned_generation == max_generation)
            {
                // We need to do this before we skip the rest of the pinned plugs.
                generation* gen_2 = generation_of (max_generation);
                generation* gen_1 = generation_of (max_generation - 1);

                size_t total_num_pinned_free_spaces_left = 0;

                // We are about to allocate gen1, check to see how efficient fitting in gen2 pinned free spaces is.
                for (int j = 0; j < NUM_GEN_POWER2; j++)
                {
                    dprintf (1, ("[h%d][#%zd]2^%d: current: %zd, S: 2: %zd, 1: %zd(%zd)",
                        heap_number,
                        settings.gc_index,
                        (j + 10),
                        gen_2->gen_current_pinned_free_spaces[j],
                        gen_2->gen_plugs[j], gen_1->gen_plugs[j],
                        (gen_2->gen_plugs[j] + gen_1->gen_plugs[j])));

                    total_num_pinned_free_spaces_left += gen_2->gen_current_pinned_free_spaces[j];
                }

                float pinned_free_list_efficiency = 0;
                size_t total_pinned_free_space = generation_allocated_in_pinned_free (gen_2) + generation_pinned_free_obj_space (gen_2);
                if (total_pinned_free_space != 0)
                {
                    pinned_free_list_efficiency = (float)(generation_allocated_in_pinned_free (gen_2)) / (float)total_pinned_free_space;
                }

                dprintf (1, ("[h%d] gen2 allocated %zd bytes with %zd bytes pinned free spaces (effi: %d%%), %zd (%zd) left",
                            heap_number,
                            generation_allocated_in_pinned_free (gen_2),
                            total_pinned_free_space,
                            (int)(pinned_free_list_efficiency * 100),
                            generation_pinned_free_obj_space (gen_2),
                            total_num_pinned_free_spaces_left));
            }
#endif //FREE_USAGE_STATS

            //Go past all of the pinned plugs for this generation.
            while (!pinned_plug_que_empty_p() &&
                   (!in_range_for_segment ((pinned_plug (oldest_pin())), ephemeral_heap_segment)))
            {
                size_t  entry = deque_pinned_plug();
                mark*  m = pinned_plug_of (entry);
                uint8_t*  plug = pinned_plug (m);
                size_t  len = pinned_len (m);
                // detect pinned block in different segment (later) than
                // allocation segment, skip those until the oldest pin is in the ephemeral seg.
                // adjust the allocation segment along the way (at the end it will
                // be the ephemeral segment.
                heap_segment* nseg = heap_segment_in_range (generation_allocation_segment (consing_gen));

                _ASSERTE(nseg != NULL);

                while (!((plug >= generation_allocation_pointer (consing_gen))&&
                        (plug < heap_segment_allocated (nseg))))
                {
                    //adjust the end of the segment to be the end of the plug
                    assert (generation_allocation_pointer (consing_gen)>=
                            heap_segment_mem (nseg));
                    assert (generation_allocation_pointer (consing_gen)<=
                            heap_segment_committed (nseg));

                    heap_segment_plan_allocated (nseg) =
                        generation_allocation_pointer (consing_gen);
                    //switch allocation segment
                    nseg = heap_segment_next_rw (nseg);
                    generation_allocation_segment (consing_gen) = nseg;
                    //reset the allocation pointer and limits
                    generation_allocation_pointer (consing_gen) =
                        heap_segment_mem (nseg);
                }
                set_new_pin_info (m, generation_allocation_pointer (consing_gen));
                assert(pinned_len(m) == 0 || pinned_len(m) >= Align(min_obj_size));
                generation_allocation_pointer (consing_gen) = plug + len;
                generation_allocation_limit (consing_gen) =
                    generation_allocation_pointer (consing_gen);
            }
            allocate_in_condemned = TRUE;
            consing_gen = ensure_ephemeral_heap_segment (consing_gen);
        }

        if (active_new_gen_number != max_generation)
        {
            if (active_new_gen_number == (max_generation - 1))
            {
                maxgen_pinned_compact_before_advance = generation_pinned_allocation_compact_size (generation_of (max_generation));
                if (decide_promote_gen1_pins_p)
                    advance_pins_for_demotion (consing_gen);
            }

            plan_generation_start (generation_of (active_new_gen_number), consing_gen, x);

            dprintf (2, ("process eph: allocated gen%d start at %p",
                active_new_gen_number,
                generation_plan_allocation_start (generation_of (active_new_gen_number))));

            if ((demotion_low == MAX_PTR) && !pinned_plug_que_empty_p())
            {
                uint8_t* pplug = pinned_plug (oldest_pin());
                if (object_gennum (pplug) > 0)
                {
                    demotion_low = pplug;
                    dprintf (3, ("process eph: dlow->%p", demotion_low));
                }
            }

            assert (generation_plan_allocation_start (generation_of (active_new_gen_number)));
        }

        goto retry;
    }
}
#endif //!USE_REGIONS

#ifdef FEATURE_BASICFREEZE
inline
void gc_heap::seg_set_mark_bits (heap_segment* seg)
{
    uint8_t* o = heap_segment_mem (seg);
    while (o < heap_segment_allocated (seg))
    {
        set_marked (o);
        o = o + Align (size(o));
    }
}

inline
void gc_heap::seg_clear_mark_bits (heap_segment* seg)
{
    uint8_t* o = heap_segment_mem (seg);
    while (o < heap_segment_allocated (seg))
    {
        if (marked (o))
        {
            clear_marked (o);
        }
        o = o + Align (size (o));
    }
}

// We have to do this for in range ro segments because these objects' life time isn't accurately
// expressed. The expectation is all objects on ro segs are live. So we just artifically mark
// all of them on the in range ro segs.
void gc_heap::mark_ro_segments()
{
#ifndef USE_REGIONS
    if ((settings.condemned_generation == max_generation) && ro_segments_in_range)
    {
        heap_segment* seg = generation_start_segment (generation_of (max_generation));

        while (seg)
        {
            if (!heap_segment_read_only_p (seg))
                break;

            if (heap_segment_in_range_p (seg))
            {
#ifdef BACKGROUND_GC
                if (settings.concurrent)
                {
                    seg_set_mark_array_bits_soh (seg);
                }
                else
#endif //BACKGROUND_GC
                {
                    seg_set_mark_bits (seg);
                }
            }
            seg = heap_segment_next (seg);
        }
    }
#endif //!USE_REGIONS
}

void gc_heap::sweep_ro_segments()
{
#ifndef USE_REGIONS
    if ((settings.condemned_generation == max_generation) && ro_segments_in_range)
    {
        heap_segment* seg = generation_start_segment (generation_of (max_generation));;

        while (seg)
        {
            if (!heap_segment_read_only_p (seg))
                break;

            if (heap_segment_in_range_p (seg))
            {
#ifdef BACKGROUND_GC
                if (settings.concurrent)
                {
                    seg_clear_mark_array_bits_soh (seg);
                }
                else
#endif //BACKGROUND_GC
                {
                    seg_clear_mark_bits (seg);
                }
            }
            seg = heap_segment_next (seg);
        }
    }
#endif //!USE_REGIONS
}
#endif // FEATURE_BASICFREEZE

#ifdef FEATURE_LOH_COMPACTION
inline
BOOL gc_heap::loh_pinned_plug_que_empty_p()
{
    return (loh_pinned_queue_bos == loh_pinned_queue_tos);
}

void gc_heap::loh_set_allocator_next_pin()
{
    if (!(loh_pinned_plug_que_empty_p()))
    {
        mark*  oldest_entry = loh_oldest_pin();
        uint8_t* plug = pinned_plug (oldest_entry);
        generation* gen = large_object_generation;
        if ((plug >= generation_allocation_pointer (gen)) &&
            (plug <  generation_allocation_limit (gen)))
        {
            generation_allocation_limit (gen) = pinned_plug (oldest_entry);
        }
        else
            assert (!((plug < generation_allocation_pointer (gen)) &&
                      (plug >= heap_segment_mem (generation_allocation_segment (gen)))));
    }
}

size_t gc_heap::loh_deque_pinned_plug ()
{
    size_t m = loh_pinned_queue_bos;
    loh_pinned_queue_bos++;
    return m;
}

inline
mark* gc_heap::loh_pinned_plug_of (size_t bos)
{
    return &loh_pinned_queue[bos];
}

inline
mark* gc_heap::loh_oldest_pin()
{
    return loh_pinned_plug_of (loh_pinned_queue_bos);
}

// If we can't grow the queue, then don't compact.
BOOL gc_heap::loh_enque_pinned_plug (uint8_t* plug, size_t len)
{
    assert(len >= Align(min_obj_size, get_alignment_constant (FALSE)));

    if (loh_pinned_queue_length <= loh_pinned_queue_tos)
    {
        if (!grow_mark_stack (loh_pinned_queue, loh_pinned_queue_length, LOH_PIN_QUEUE_LENGTH))
        {
            return FALSE;
        }
    }
    dprintf (3, (" P: %p(%zd)", plug, len));
    mark& m = loh_pinned_queue[loh_pinned_queue_tos];
    m.first = plug;
    m.len = len;
    loh_pinned_queue_tos++;
    loh_set_allocator_next_pin();
    return TRUE;
}

inline
BOOL gc_heap::loh_size_fit_p (size_t size, uint8_t* alloc_pointer, uint8_t* alloc_limit, bool end_p)
{
    dprintf (1235, ("trying to fit %zd(%zd) between %p and %p (%zd)",
        size,
        (2* AlignQword (loh_padding_obj_size) +  size),
        alloc_pointer,
        alloc_limit,
        (alloc_limit - alloc_pointer)));

    // If it's at the end, we don't need to allocate the tail padding
    size_t pad = 1 + (end_p ? 0 : 1);
    pad *= AlignQword (loh_padding_obj_size);

    return ((alloc_pointer + pad + size) <= alloc_limit);
}

uint8_t* gc_heap::loh_allocate_in_condemned (size_t size)
{
    generation* gen = large_object_generation;
    dprintf (1235, ("E: p:%p, l:%p, s: %zd",
        generation_allocation_pointer (gen),
        generation_allocation_limit (gen),
        size));

retry:
    {
        heap_segment* seg = generation_allocation_segment (gen);
        if (!(loh_size_fit_p (size, generation_allocation_pointer (gen), generation_allocation_limit (gen),
                              (generation_allocation_limit (gen) == heap_segment_plan_allocated (seg)))))
        {
            if ((!(loh_pinned_plug_que_empty_p()) &&
                 (generation_allocation_limit (gen) ==
                  pinned_plug (loh_oldest_pin()))))
            {
                mark* m = loh_pinned_plug_of (loh_deque_pinned_plug());
                size_t len = pinned_len (m);
                uint8_t* plug = pinned_plug (m);
                dprintf (1235, ("AIC: %p->%p(%zd)", generation_allocation_pointer (gen), plug, plug - generation_allocation_pointer (gen)));
                pinned_len (m) = plug - generation_allocation_pointer (gen);
                generation_allocation_pointer (gen) = plug + len;

                generation_allocation_limit (gen) = heap_segment_plan_allocated (seg);
                loh_set_allocator_next_pin();
                dprintf (1235, ("s: p: %p, l: %p (%zd)",
                    generation_allocation_pointer (gen),
                    generation_allocation_limit (gen),
                    (generation_allocation_limit (gen) - generation_allocation_pointer (gen))));

                goto retry;
            }

            if (generation_allocation_limit (gen) != heap_segment_plan_allocated (seg))
            {
                generation_allocation_limit (gen) = heap_segment_plan_allocated (seg);
                dprintf (1235, ("l->pa(%p)", generation_allocation_limit (gen)));
            }
            else
            {
                if (heap_segment_plan_allocated (seg) != heap_segment_committed (seg))
                {
                    heap_segment_plan_allocated (seg) = heap_segment_committed (seg);
                    generation_allocation_limit (gen) = heap_segment_plan_allocated (seg);
                    dprintf (1235, ("l->c(%p)", generation_allocation_limit (gen)));
                }
                else
                {
                    if (loh_size_fit_p (size, generation_allocation_pointer (gen), heap_segment_reserved (seg), true) &&
                        (grow_heap_segment (seg, (generation_allocation_pointer (gen) + size + AlignQword (loh_padding_obj_size)))))
                    {
                        dprintf (1235, ("growing seg from %p to %p\n", heap_segment_committed (seg),
                                         (generation_allocation_pointer (gen) + size)));

                        heap_segment_plan_allocated (seg) = heap_segment_committed (seg);
                        generation_allocation_limit (gen) = heap_segment_plan_allocated (seg);

                        dprintf (1235, ("g: p: %p, l: %p (%zd)",
                            generation_allocation_pointer (gen),
                            generation_allocation_limit (gen),
                            (generation_allocation_limit (gen) - generation_allocation_pointer (gen))));
                    }
                    else
                    {
                        heap_segment* next_seg = heap_segment_next (seg);
                        assert (generation_allocation_pointer (gen)>=
                                heap_segment_mem (seg));
                        // Verify that all pinned plugs for this segment are consumed
                        if (!loh_pinned_plug_que_empty_p() &&
                            ((pinned_plug (loh_oldest_pin()) <
                              heap_segment_allocated (seg)) &&
                             (pinned_plug (loh_oldest_pin()) >=
                              generation_allocation_pointer (gen))))
                        {
                            LOG((LF_GC, LL_INFO10, "remaining pinned plug %zx while leaving segment on allocation",
                                         pinned_plug (loh_oldest_pin())));
                            dprintf (1, ("queue empty: %d", loh_pinned_plug_que_empty_p()));
                            FATAL_GC_ERROR();
                        }
                        assert (generation_allocation_pointer (gen)>=
                                heap_segment_mem (seg));
                        assert (generation_allocation_pointer (gen)<=
                                heap_segment_committed (seg));
                        heap_segment_plan_allocated (seg) = generation_allocation_pointer (gen);

                        if (next_seg)
                        {
                            // for LOH do we want to try starting from the first LOH every time though?
                            generation_allocation_segment (gen) = next_seg;
                            generation_allocation_pointer (gen) = heap_segment_mem (next_seg);
                            generation_allocation_limit (gen) = generation_allocation_pointer (gen);

                            dprintf (1235, ("n: p: %p, l: %p (%zd)",
                                generation_allocation_pointer (gen),
                                generation_allocation_limit (gen),
                                (generation_allocation_limit (gen) - generation_allocation_pointer (gen))));
                        }
                        else
                        {
                            dprintf (1, ("We ran out of space compacting, shouldn't happen"));
                            FATAL_GC_ERROR();
                        }
                    }
                }
            }
            loh_set_allocator_next_pin();

            dprintf (1235, ("r: p: %p, l: %p (%zd)",
                generation_allocation_pointer (gen),
                generation_allocation_limit (gen),
                (generation_allocation_limit (gen) - generation_allocation_pointer (gen))));

            goto retry;
        }
    }

    {
        assert (generation_allocation_pointer (gen)>=
                heap_segment_mem (generation_allocation_segment (gen)));
        uint8_t* result = generation_allocation_pointer (gen);
        size_t loh_pad = AlignQword (loh_padding_obj_size);

        generation_allocation_pointer (gen) += size + loh_pad;
        assert (generation_allocation_pointer (gen) <= generation_allocation_limit (gen));

        dprintf (1235, ("p: %p, l: %p (%zd)",
            generation_allocation_pointer (gen),
            generation_allocation_limit (gen),
            (generation_allocation_limit (gen) - generation_allocation_pointer (gen))));

        assert (result + loh_pad);
        return result + loh_pad;
    }
}

BOOL gc_heap::loh_compaction_requested()
{
    // If hard limit is specified GC will automatically decide if LOH needs to be compacted.
    return (loh_compaction_always_p || (loh_compaction_mode != loh_compaction_default));
}

inline
void gc_heap::check_loh_compact_mode (BOOL all_heaps_compacted_p)
{
    if (settings.loh_compaction && (loh_compaction_mode == loh_compaction_once))
    {
        if (all_heaps_compacted_p)
        {
            // If the compaction mode says to compact once and we are going to compact LOH,
            // we need to revert it back to no compaction.
            loh_compaction_mode = loh_compaction_default;
        }
    }
}

BOOL gc_heap::plan_loh()
{
#ifdef FEATURE_EVENT_TRACE
    uint64_t start_time = 0, end_time;
    if (informational_event_enabled_p)
    {
        memset (loh_compact_info, 0, (sizeof (etw_loh_compact_info) * get_num_heaps()));
        start_time = GetHighPrecisionTimeStamp();
    }
#endif //FEATURE_EVENT_TRACE

    if (!loh_pinned_queue)
    {
        loh_pinned_queue = new (nothrow) (mark [LOH_PIN_QUEUE_LENGTH]);
        if (!loh_pinned_queue)
        {
            dprintf (1, ("Cannot allocate the LOH pinned queue (%zd bytes), no compaction",
                         LOH_PIN_QUEUE_LENGTH * sizeof (mark)));
            return FALSE;
        }

        loh_pinned_queue_length = LOH_PIN_QUEUE_LENGTH;
    }

    loh_pinned_queue_decay = LOH_PIN_DECAY;

    loh_pinned_queue_tos = 0;
    loh_pinned_queue_bos = 0;

    generation* gen        = large_object_generation;
    heap_segment* start_seg = heap_segment_rw (generation_start_segment (gen));
    _ASSERTE(start_seg != NULL);
    heap_segment* seg      = start_seg;
    uint8_t* o             = get_uoh_start_object (seg, gen);

    dprintf (1235, ("before GC LOH size: %zd, free list: %zd, free obj: %zd\n",
        generation_size (loh_generation),
        generation_free_list_space (gen),
        generation_free_obj_space (gen)));

    while (seg)
    {
        heap_segment_plan_allocated (seg) = heap_segment_mem (seg);
        seg = heap_segment_next (seg);
    }

    seg = start_seg;

    // We don't need to ever realloc gen3 start so don't touch it.
    heap_segment_plan_allocated (seg) = o;
    generation_allocation_pointer (gen) = o;
    generation_allocation_limit (gen) = generation_allocation_pointer (gen);
    generation_allocation_segment (gen) = start_seg;

    uint8_t* free_space_start = o;
    uint8_t* free_space_end = o;
    uint8_t* new_address = 0;

    while (1)
    {
        if (o >= heap_segment_allocated (seg))
        {
            seg = heap_segment_next (seg);
            if (seg == 0)
            {
                break;
            }

            o = heap_segment_mem (seg);
        }

        if (marked (o))
        {
            free_space_end = o;
            size_t size = AlignQword (size (o));
            dprintf (1235, ("%p(%zd) M", o, size));

            if (pinned (o))
            {
                // We don't clear the pinned bit yet so we can check in
                // compact phase how big a free object we should allocate
                // in front of the pinned object. We use the reloc address
                // field to store this.
                if (!loh_enque_pinned_plug (o, size))
                {
                    return FALSE;
                }
                new_address = o;
            }
            else
            {
                new_address = loh_allocate_in_condemned (size);
            }

            loh_set_node_relocation_distance (o, (new_address - o));
            dprintf (1235, ("lobj %p-%p -> %p-%p (%zd)", o, (o + size), new_address, (new_address + size), (new_address - o)));

            o = o + size;
            free_space_start = o;
            if (o < heap_segment_allocated (seg))
            {
                assert (!marked (o));
            }
        }
        else
        {
            while (o < heap_segment_allocated (seg) && !marked (o))
            {
                dprintf (1235, ("%p(%zd) F (%d)", o, AlignQword (size (o)), ((method_table (o) == g_gc_pFreeObjectMethodTable) ? 1 : 0)));
                o = o + AlignQword (size (o));
            }
        }
    }

    while (!loh_pinned_plug_que_empty_p())
    {
        mark* m = loh_pinned_plug_of (loh_deque_pinned_plug());
        size_t len = pinned_len (m);
        uint8_t* plug = pinned_plug (m);

        // detect pinned block in different segment (later) than
        // allocation segment
        heap_segment* nseg = heap_segment_rw (generation_allocation_segment (gen));

        while ((plug < generation_allocation_pointer (gen)) ||
               (plug >= heap_segment_allocated (nseg)))
        {
            assert ((plug < heap_segment_mem (nseg)) ||
                    (plug > heap_segment_reserved (nseg)));
            //adjust the end of the segment to be the end of the plug
            assert (generation_allocation_pointer (gen)>=
                    heap_segment_mem (nseg));
            assert (generation_allocation_pointer (gen)<=
                    heap_segment_committed (nseg));

            heap_segment_plan_allocated (nseg) =
                generation_allocation_pointer (gen);
            //switch allocation segment
            nseg = heap_segment_next_rw (nseg);
            generation_allocation_segment (gen) = nseg;
            //reset the allocation pointer and limits
            generation_allocation_pointer (gen) =
                heap_segment_mem (nseg);
        }

        dprintf (1235, ("SP: %p->%p(%zd)", generation_allocation_pointer (gen), plug, plug - generation_allocation_pointer (gen)));
        pinned_len (m) = plug - generation_allocation_pointer (gen);
        generation_allocation_pointer (gen) = plug + len;
    }

    heap_segment_plan_allocated (generation_allocation_segment (gen)) = generation_allocation_pointer (gen);
    generation_allocation_pointer (gen) = 0;
    generation_allocation_limit (gen) = 0;

#ifdef FEATURE_EVENT_TRACE
    if (informational_event_enabled_p)
    {
        end_time = GetHighPrecisionTimeStamp();
        loh_compact_info[heap_number].time_plan = limit_time_to_uint32 (end_time - start_time);
    }
#endif //FEATURE_EVENT_TRACE

    return TRUE;
}

void gc_heap::compact_loh()
{
    assert (loh_compaction_requested() || heap_hard_limit || conserve_mem_setting || (settings.reason == reason_induced_aggressive));

#ifdef FEATURE_EVENT_TRACE
    uint64_t start_time = 0, end_time;
    if (informational_event_enabled_p)
    {
        start_time = GetHighPrecisionTimeStamp();
    }
#endif //FEATURE_EVENT_TRACE

    generation* gen        = large_object_generation;
    heap_segment* start_seg = heap_segment_rw (generation_start_segment (gen));
    _ASSERTE(start_seg != NULL);
    heap_segment* seg      = start_seg;
    heap_segment* prev_seg = 0;
    uint8_t* o             = get_uoh_start_object (seg, gen);

    // We don't need to ever realloc gen3 start so don't touch it.
    uint8_t* free_space_start = o;
    uint8_t* free_space_end = o;
    generation_allocator (gen)->clear();
    generation_free_list_space (gen) = 0;
    generation_free_obj_space (gen) = 0;

    loh_pinned_queue_bos = 0;

    while (1)
    {
        if (o >= heap_segment_allocated (seg))
        {
            heap_segment* next_seg = heap_segment_next (seg);

            // REGIONS TODO: for regions we can get rid of the start_seg. Just need
            // to update start region accordingly.
            if ((heap_segment_plan_allocated (seg) == heap_segment_mem (seg)) &&
                (seg != start_seg) && !heap_segment_read_only_p (seg))
            {
                dprintf (3, ("Preparing empty large segment %zx", (size_t)seg));
                assert (prev_seg);
                heap_segment_next (prev_seg) = next_seg;
                heap_segment_next (seg) = freeable_uoh_segment;
                freeable_uoh_segment = seg;
#ifdef USE_REGIONS
                update_start_tail_regions (gen, seg, prev_seg, next_seg);
#endif //USE_REGIONS
            }
            else
            {
                if (!heap_segment_read_only_p (seg))
                {
                    // We grew the segment to accommodate allocations.
                    if (heap_segment_plan_allocated (seg) > heap_segment_allocated (seg))
                    {
                        if ((heap_segment_plan_allocated (seg) - plug_skew)  > heap_segment_used (seg))
                        {
                            heap_segment_used (seg) = heap_segment_plan_allocated (seg) - plug_skew;
                        }
                    }

                    heap_segment_allocated (seg) = heap_segment_plan_allocated (seg);
                    dprintf (3, ("Trimming seg to %p[", heap_segment_allocated (seg)));
                    decommit_heap_segment_pages (seg, 0);
                    dprintf (1236, ("CLOH: seg: %p, alloc: %p, used: %p, committed: %p",
                        seg,
                        heap_segment_allocated (seg),
                        heap_segment_used (seg),
                        heap_segment_committed (seg)));
                    //heap_segment_used (seg) = heap_segment_allocated (seg) - plug_skew;
                    dprintf (1236, ("CLOH: used is set to %p", heap_segment_used (seg)));
                }
                prev_seg = seg;
            }

            seg = next_seg;
            if (seg == 0)
                break;
            else
            {
                o = heap_segment_mem (seg);
            }
        }

        if (marked (o))
        {
            free_space_end = o;
            size_t size = AlignQword (size (o));

            size_t loh_pad;
            uint8_t* reloc = o;
            clear_marked (o);

            if (pinned (o))
            {
                // We are relying on the fact the pinned objects are always looked at in the same order
                // in plan phase and in compact phase.
                mark* m = loh_pinned_plug_of (loh_deque_pinned_plug());
                uint8_t* plug = pinned_plug (m);
                assert (plug == o);

                loh_pad = pinned_len (m);
                clear_pinned (o);
            }
            else
            {
                loh_pad = AlignQword (loh_padding_obj_size);

                reloc += loh_node_relocation_distance (o);
                gcmemcopy (reloc, o, size, TRUE);
            }

            thread_gap ((reloc - loh_pad), loh_pad, gen);

            o = o + size;
            free_space_start = o;
            if (o < heap_segment_allocated (seg))
            {
                assert (!marked (o));
            }
        }
        else
        {
            while (o < heap_segment_allocated (seg) && !marked (o))
            {
                o = o + AlignQword (size (o));
            }
        }
    }

#ifdef FEATURE_EVENT_TRACE
    if (informational_event_enabled_p)
    {
        end_time = GetHighPrecisionTimeStamp();
        loh_compact_info[heap_number].time_compact = limit_time_to_uint32 (end_time - start_time);
    }
#endif //FEATURE_EVENT_TRACE

    assert (loh_pinned_plug_que_empty_p());

    dprintf (1235, ("after GC LOH size: %zd, free list: %zd, free obj: %zd\n\n",
        generation_size (loh_generation),
        generation_free_list_space (gen),
        generation_free_obj_space (gen)));
}

#ifdef FEATURE_EVENT_TRACE
inline
void gc_heap::loh_reloc_survivor_helper (uint8_t** pval, size_t& total_refs, size_t& zero_refs)
{
    uint8_t* val = *pval;
    if (!val)
        zero_refs++;
    total_refs++;

    reloc_survivor_helper (pval);
}
#endif //FEATURE_EVENT_TRACE

void gc_heap::relocate_in_loh_compact()
{
    generation* gen        = large_object_generation;
    heap_segment* seg      = heap_segment_rw (generation_start_segment (gen));
    uint8_t* o              = get_uoh_start_object (seg, gen);

#ifdef FEATURE_EVENT_TRACE
    size_t total_refs = 0;
    size_t zero_refs = 0;
    uint64_t start_time = 0, end_time;
    if (informational_event_enabled_p)
    {
        start_time = GetHighPrecisionTimeStamp();
    }
#endif //FEATURE_EVENT_TRACE

    while (1)
    {
        if (o >= heap_segment_allocated (seg))
        {
            seg = heap_segment_next (seg);
            if (seg == 0)
            {
                break;
            }

            o = heap_segment_mem (seg);
        }

        if (marked (o))
        {
            size_t size = AlignQword (size (o));

            check_class_object_demotion (o);
            if (contain_pointers (o))
            {
#ifdef FEATURE_EVENT_TRACE
                if (informational_event_enabled_p)
                {
                    go_through_object_nostart (method_table (o), o, size(o), pval,
                    {
                        loh_reloc_survivor_helper (pval, total_refs, zero_refs);
                    });
                }
                else
#endif //FEATURE_EVENT_TRACE
                {
                    go_through_object_nostart (method_table (o), o, size(o), pval,
                    {
                        reloc_survivor_helper (pval);
                    });
                }
            }
            o = o + size;
            if (o < heap_segment_allocated (seg))
            {
                assert (!marked (o));
            }
        }
        else
        {
            while (o < heap_segment_allocated (seg) && !marked (o))
            {
                o = o + AlignQword (size (o));
            }
        }
    }

#ifdef FEATURE_EVENT_TRACE
    if (informational_event_enabled_p)
    {
        end_time = GetHighPrecisionTimeStamp();
        loh_compact_info[heap_number].time_relocate = limit_time_to_uint32 (end_time - start_time);
        loh_compact_info[heap_number].total_refs = total_refs;
        loh_compact_info[heap_number].zero_refs = zero_refs;
    }
#endif //FEATURE_EVENT_TRACE

    dprintf (1235, ("after GC LOH size: %zd, free list: %zd, free obj: %zd\n\n",
        generation_size (loh_generation),
        generation_free_list_space (gen),
        generation_free_obj_space (gen)));
}

void gc_heap::walk_relocation_for_loh (void* profiling_context, record_surv_fn fn)
{
    generation* gen        = large_object_generation;
    heap_segment* seg      = heap_segment_rw (generation_start_segment (gen));
    uint8_t* o             = get_uoh_start_object (seg, gen);

    while (1)
    {
        if (o >= heap_segment_allocated (seg))
        {
            seg = heap_segment_next (seg);
            if (seg == 0)
            {
                break;
            }

            o = heap_segment_mem (seg);
        }

        if (marked (o))
        {
            size_t size = AlignQword (size (o));

            ptrdiff_t reloc = loh_node_relocation_distance (o);

            STRESS_LOG_PLUG_MOVE(o, (o + size), -reloc);

            fn (o, (o + size), reloc, profiling_context, !!settings.compaction, false);

            o = o + size;
            if (o < heap_segment_allocated (seg))
            {
                assert (!marked (o));
            }
        }
        else
        {
            while (o < heap_segment_allocated (seg) && !marked (o))
            {
                o = o + AlignQword (size (o));
            }
        }
    }
}

BOOL gc_heap::loh_object_p (uint8_t* o)
{
#ifdef MULTIPLE_HEAPS
    gc_heap* hp = gc_heap::g_heaps [0];
    int brick_entry = hp->brick_table[hp->brick_of (o)];
#else //MULTIPLE_HEAPS
    int brick_entry = brick_table[brick_of (o)];
#endif //MULTIPLE_HEAPS

    return (brick_entry == 0);
}
#endif //FEATURE_LOH_COMPACTION

void gc_heap::convert_to_pinned_plug (BOOL& last_npinned_plug_p,
                                      BOOL& last_pinned_plug_p,
                                      BOOL& pinned_plug_p,
                                      size_t ps,
                                      size_t& artificial_pinned_size)
{
    last_npinned_plug_p = FALSE;
    last_pinned_plug_p = TRUE;
    pinned_plug_p = TRUE;
    artificial_pinned_size = ps;
}

// Because we have the artificial pinning, we can't guarantee that pinned and npinned
// plugs are always interleaved.
void gc_heap::store_plug_gap_info (uint8_t* plug_start,
                                   uint8_t* plug_end,
                                   BOOL& last_npinned_plug_p,
                                   BOOL& last_pinned_plug_p,
                                   uint8_t*& last_pinned_plug,
                                   BOOL& pinned_plug_p,
                                   uint8_t* last_object_in_last_plug,
                                   BOOL& merge_with_last_pin_p,
                                   // this is only for verification purpose
                                   size_t last_plug_len)
{
    UNREFERENCED_PARAMETER(last_plug_len);

    if (!last_npinned_plug_p && !last_pinned_plug_p)
    {
        //dprintf (3, ("last full plug end: %zx, full plug start: %zx", plug_end, plug_start));
        dprintf (3, ("Free: %zx", (plug_start - plug_end)));
        assert ((plug_start == plug_end) || ((size_t)(plug_start - plug_end) >= Align (min_obj_size)));
        set_gap_size (plug_start, plug_start - plug_end);
    }

    if (pinned (plug_start))
    {
        BOOL save_pre_plug_info_p = FALSE;

        if (last_npinned_plug_p || last_pinned_plug_p)
        {
            //if (last_plug_len == Align (min_obj_size))
            //{
            //    dprintf (3, ("debugging only - last npinned plug is min, check to see if it's correct"));
            //    GCToOSInterface::DebugBreak();
            //}
            save_pre_plug_info_p = TRUE;
        }

        pinned_plug_p = TRUE;
        last_npinned_plug_p = FALSE;

        if (last_pinned_plug_p)
        {
            dprintf (3, ("last plug %p was also pinned, should merge", last_pinned_plug));
            merge_with_last_pin_p = TRUE;
        }
        else
        {
            last_pinned_plug_p = TRUE;
            last_pinned_plug = plug_start;

            enque_pinned_plug (last_pinned_plug, save_pre_plug_info_p, last_object_in_last_plug);

            if (save_pre_plug_info_p)
            {
#ifdef DOUBLY_LINKED_FL
                if (last_object_in_last_plug == generation_last_free_list_allocated(generation_of(max_generation)))
                {
                    saved_pinned_plug_index = mark_stack_tos;
                }
#endif //DOUBLY_LINKED_FL
                set_gap_size (plug_start, sizeof (gap_reloc_pair));
            }
        }
    }
    else
    {
        if (last_pinned_plug_p)
        {
            //if (Align (last_plug_len) < min_pre_pin_obj_size)
            //{
            //    dprintf (3, ("debugging only - last pinned plug is min, check to see if it's correct"));
            //    GCToOSInterface::DebugBreak();
            //}

            save_post_plug_info (last_pinned_plug, last_object_in_last_plug, plug_start);
            set_gap_size (plug_start, sizeof (gap_reloc_pair));

            verify_pins_with_post_plug_info("after saving post plug info");
        }
        last_npinned_plug_p = TRUE;
        last_pinned_plug_p = FALSE;
    }
}

void gc_heap::record_interesting_data_point (interesting_data_point idp)
{
#ifdef GC_CONFIG_DRIVEN
    (interesting_data_per_gc[idp])++;
#else
    UNREFERENCED_PARAMETER(idp);
#endif //GC_CONFIG_DRIVEN
}

#ifdef USE_REGIONS
void gc_heap::skip_pins_in_alloc_region (generation* consing_gen, int plan_gen_num)
{
    heap_segment* alloc_region = generation_allocation_segment (consing_gen);
    size_t skipped_pins_len = 0;
    while (!pinned_plug_que_empty_p())
    {
        uint8_t* oldest_plug = pinned_plug (oldest_pin());

        if ((oldest_plug >= generation_allocation_pointer (consing_gen)) &&
            (oldest_plug < heap_segment_allocated (alloc_region)))
        {
            mark* m =       pinned_plug_of (deque_pinned_plug());
            uint8_t* plug = pinned_plug (m);
            size_t len =    pinned_len (m);

            skipped_pins_len += len;
            set_new_pin_info (m, generation_allocation_pointer (consing_gen));
            dprintf (REGIONS_LOG, ("pin %p b: %zx->%zx", plug, brick_of (plug),
                (size_t)(brick_table[brick_of (plug)])));

            generation_allocation_pointer (consing_gen) = plug + len;
        }
        else
        {
            // Exit when we detect the first pin that's not on the alloc seg anymore.
            break;
        }
    }

    dprintf (REGIONS_LOG, ("finished with alloc region %p, (%s) plan gen -> %d",
        heap_segment_mem (alloc_region),
        (heap_segment_swept_in_plan (alloc_region) ? "SIP" : "non SIP"),
        (heap_segment_swept_in_plan (alloc_region) ?
            heap_segment_plan_gen_num (alloc_region) : plan_gen_num)));

    attribute_pin_higher_gen_alloc (heap_segment_gen_num (alloc_region), plan_gen_num, skipped_pins_len);

    set_region_plan_gen_num_sip (alloc_region, plan_gen_num);
    heap_segment_plan_allocated (alloc_region) = generation_allocation_pointer (consing_gen);
}

void gc_heap::decide_on_demotion_pin_surv (heap_segment* region, int* no_pinned_surv_region_count, bool promote_gen1_pins_p, bool large_pins_p)
{
    int gen_num = heap_segment_gen_num (region);
    int new_gen_num = 0;
    int pinned_surv = heap_segment_pinned_survived (region);
    int promote_pins_p = large_pins_p;

    if (pinned_surv == 0)
    {
        (*no_pinned_surv_region_count)++;
        dprintf (REGIONS_LOG, ("h%d gen%d region %Ix will be empty", heap_number, heap_segment_gen_num (region), heap_segment_mem (region)));
    }
    else
    {
        if (!promote_pins_p && (gen_num == (max_generation - 1)) && promote_gen1_pins_p)
        {
            promote_pins_p = true;
        }

        if (promote_pins_p)
        {
            new_gen_num = get_plan_gen_num (heap_segment_gen_num (region));
        }

        attribute_pin_higher_gen_alloc (gen_num, new_gen_num, pinned_surv);
    }

    dprintf (REGIONS_LOG, ("h%d gen%d region pinned surv %d %s -> g%d",
        heap_number, gen_num, pinned_surv, (promote_pins_p ? "PROMOTE" : "DEMOTE"), new_gen_num));

    set_region_plan_gen_num (region, new_gen_num);
}

// If the next plan gen number is different, since different generations cannot share the same
// region, we need to get a new alloc region and skip all remaining pins in the alloc region if
// any.
void gc_heap::process_last_np_surv_region (generation* consing_gen,
                                           int current_plan_gen_num,
                                           int next_plan_gen_num)
{
    heap_segment* alloc_region = generation_allocation_segment (consing_gen);
    //assert (in_range_for_segment (generation_allocation_pointer (consing_gen), alloc_region));
    // I'm not using in_range_for_segment here because alloc pointer/limit can be exactly the same
    // as reserved. size_fit_p in allocate_in_condemned_generations can be used to fit the exact
    // size of a plug at the end of the segment which makes alloc pointer/limit both reserved
    // on exit of that method.
    uint8_t* consing_gen_alloc_ptr = generation_allocation_pointer (consing_gen);
    assert ((consing_gen_alloc_ptr >= heap_segment_mem (alloc_region)) &&
            (consing_gen_alloc_ptr <= heap_segment_reserved (alloc_region)));

    dprintf (REGIONS_LOG, ("h%d PLN: (%s) plan gen%d->%d, consing alloc region: %p, ptr: %p (%Id) (consing gen: %d)",
        heap_number, (settings.promotion ? "promotion" : "no promotion"), current_plan_gen_num, next_plan_gen_num,
        heap_segment_mem (alloc_region),
        generation_allocation_pointer (consing_gen),
        (generation_allocation_pointer (consing_gen) - heap_segment_mem (alloc_region)),
        consing_gen->gen_num));

    if (current_plan_gen_num != next_plan_gen_num)
    {
        // If we haven't needed to consume this alloc region at all, we can use it to allocate the new
        // gen.
        if (generation_allocation_pointer (consing_gen) == heap_segment_mem (alloc_region))
        {
            dprintf (REGIONS_LOG, ("h%d alloc region %p unused, using it to plan %d",
                heap_number, heap_segment_mem (alloc_region), next_plan_gen_num));
            return;
        }

        // skip all the pins in this region since we cannot use it to plan the next gen.
        skip_pins_in_alloc_region (consing_gen, current_plan_gen_num);

        heap_segment* next_region = heap_segment_next_non_sip (alloc_region);

        if (!next_region)
        {
            int gen_num = heap_segment_gen_num (alloc_region);
            if (gen_num > 0)
            {
                next_region = generation_start_segment (generation_of (gen_num - 1));
                dprintf (REGIONS_LOG, ("h%d consing switching to next gen%d seg %p",
                    heap_number, heap_segment_gen_num (next_region), heap_segment_mem (next_region)));
            }
            else
            {
                if (settings.promotion)
                {
                    assert (next_plan_gen_num == 0);
                    next_region = get_new_region (0);
                    if (next_region)
                    {
                        dprintf (REGIONS_LOG, ("h%d getting a new region for gen0 plan start seg to %p",
                            heap_number, heap_segment_mem (next_region)));

                        regions_per_gen[0]++;
                        new_gen0_regions_in_plns++;
                    }
                    else
                    {
                        dprintf (REGIONS_LOG, ("h%d couldn't get a region to plan gen0, special sweep on",
                            heap_number));
                        special_sweep_p = true;
                    }
                }
                else
                {
                    assert (!"ran out of regions for non promotion case??");
                }
            }
        }
        else
        {
            dprintf (REGIONS_LOG, ("h%d consing switching to next seg %p in gen%d to alloc in",
                heap_number, heap_segment_mem (next_region), heap_segment_gen_num (next_region)));
        }

        if (next_region)
        {
            init_alloc_info (consing_gen, next_region);

            dprintf (REGIONS_LOG, ("h%d consing(%d) alloc seg: %p(%p, %p), ptr: %p, planning gen%d",
                heap_number, consing_gen->gen_num,
                heap_segment_mem (generation_allocation_segment (consing_gen)),
                heap_segment_allocated (generation_allocation_segment (consing_gen)),
                heap_segment_plan_allocated (generation_allocation_segment (consing_gen)),
                generation_allocation_pointer (consing_gen), next_plan_gen_num));
        }
        else
        {
            assert (special_sweep_p);
        }
    }
}

void gc_heap::process_remaining_regions (int current_plan_gen_num, generation* consing_gen)
{
    assert ((current_plan_gen_num == 0) || (!settings.promotion && (current_plan_gen_num == -1)));

    if (special_sweep_p)
    {
        assert (pinned_plug_que_empty_p());
    }

    dprintf (REGIONS_LOG, ("h%d PRR: (%s) plan %d: consing alloc seg: %p, ptr: %p",
        heap_number, (settings.promotion ? "promotion" : "no promotion"), current_plan_gen_num,
        heap_segment_mem (generation_allocation_segment (consing_gen)),
        generation_allocation_pointer (consing_gen)));

    if (current_plan_gen_num == -1)
    {
        assert (!settings.promotion);
        current_plan_gen_num = 0;

        // For the non promotion case we need to take care of the alloc region we are on right
        // now if there's already planned allocations in it. We cannot let it go through
        // decide_on_demotion_pin_surv which is only concerned with pinned surv.
        heap_segment* alloc_region = generation_allocation_segment (consing_gen);
        if (generation_allocation_pointer (consing_gen) > heap_segment_mem (alloc_region))
        {
            skip_pins_in_alloc_region (consing_gen, current_plan_gen_num);
            heap_segment* next_region = heap_segment_next_non_sip (alloc_region);

            if ((next_region == 0) && (heap_segment_gen_num (alloc_region) > 0))
            {
                next_region = generation_start_segment (generation_of (heap_segment_gen_num (alloc_region) - 1));
            }

            if (next_region)
            {
                init_alloc_info (consing_gen, next_region);
            }
            else
            {
                assert (pinned_plug_que_empty_p ());
                if (!pinned_plug_que_empty_p ())
                {
                    dprintf (REGIONS_LOG, ("we still have a pin at %Ix but no more regions!?", pinned_plug (oldest_pin ())));
                    GCToOSInterface::DebugBreak ();
                }

                // Instead of checking for this condition we just set the alloc region to 0 so it's easier to check
                // later.
                //
                // set generation_allocation_segment to 0, we know we don't have pins so we will not be going through the while loop below
                //
                generation_allocation_segment (consing_gen) = 0;
                generation_allocation_pointer (consing_gen) = 0;
                generation_allocation_limit (consing_gen) = 0;
            }
        }
    }

    // What has been planned doesn't change at this point. So at this point we know exactly which generation still doesn't
    // have any regions planned and this method is responsible to attempt to plan at least one region in each of those gens.
    // So we look at each of the remaining regions (that are non SIP, since SIP regions have already been planned) and decide
    // which generation it should be planned in.
    //
    // + if we are in a gen1 GC due to cards, we will decide if we need to promote based on the same criteria as segments. And
    //   we never demote large pins to gen0.
    //
    // + we will record how many regions have no survival at all - those will be empty and can be used to plan any non gen0 generation if needed.
    //
    //   Note! We could actually promote a region with non zero pinned survivors to whichever generation we'd like (eg, we could
    //   promote a gen0 region to gen2). However it means we'd need to set cards on those objects because we will not have a chance
    //   later. The benefit of doing this is small in general as when we get into this method, it's very rare we don't already
    //   have planned regions in higher generations. So I don't think it's worth the complexicity for now. We may consider it
    //   for the future.
    //
    // + if after we are done walking the remaining regions, we still haven't successfully planned all the needed generations,
    //   we check to see if we have enough in the regions that will be empty (note that we call set_region_plan_gen_num on
    //   these regions which means they are planned in gen0. So we need to make sure at least gen0 has 1 region). If so
    //   thread_final_regions will naturally get one from there so we don't need to call set_region_plan_gen_num to replace the
    //   plan gen num.
    //
    // + if we don't have enough in regions that will be empty, we'll need to ask for new regions and if we can't, we fall back
    //   to the special sweep mode.
    //
    dprintf (REGIONS_LOG, ("h%d planned regions in g2: %d, g1: %d, g0: %d, before processing remaining regions",
        heap_number, planned_regions_per_gen[2], planned_regions_per_gen[1], planned_regions_per_gen[0]));

    dprintf (REGIONS_LOG, ("h%d g2: surv %Id(p: %Id, %.2f%%), g1: surv %Id(p: %Id, %.2f%%), g0: surv %Id(p: %Id, %.2f%%)",
        heap_number,
        dd_survived_size (dynamic_data_of (2)), dd_pinned_survived_size (dynamic_data_of (2)),
        (dd_survived_size (dynamic_data_of (2)) ? ((double)dd_pinned_survived_size (dynamic_data_of (2)) * 100.0 / (double)dd_survived_size (dynamic_data_of (2))) : 0),
        dd_survived_size (dynamic_data_of (1)), dd_pinned_survived_size (dynamic_data_of (1)),
        (dd_survived_size (dynamic_data_of (2)) ? ((double)dd_pinned_survived_size (dynamic_data_of (1)) * 100.0 / (double)dd_survived_size (dynamic_data_of (1))) : 0),
        dd_survived_size (dynamic_data_of (0)), dd_pinned_survived_size (dynamic_data_of (0)),
        (dd_survived_size (dynamic_data_of (2)) ? ((double)dd_pinned_survived_size (dynamic_data_of (0)) * 100.0 / (double)dd_survived_size (dynamic_data_of (0))) : 0)));

    int to_be_empty_regions = 0;

    // If decide_promote_gen1_pins_p is true, We need to see if we should promote what's left in gen1 pins. We either promote
    // or demote all that's left. As a future performance improvement, we could sort these regions by the amount of
    // pinned survival and only promote the ones with excessive amounts of survival.
    //
    // First go through the remaining gen1 regions to see if we should demote the remaining pins
    heap_segment* current_region = generation_allocation_segment (consing_gen);
    bool actual_promote_gen1_pins_p = false;

    if (decide_promote_gen1_pins_p)
    {
        size_t gen1_pins_left = 0;
        size_t total_space_to_skip = 0;

        while (current_region)
        {
            int gen_num = heap_segment_gen_num (current_region);
            if (gen_num != 0)
            {
                assert (gen_num == (max_generation - 1));

                if (!heap_segment_swept_in_plan (current_region))
                {
                    gen1_pins_left += heap_segment_pinned_survived (current_region);
                    total_space_to_skip += get_region_size (current_region);
                }
            }
            else
            {
                break;
            }

            current_region = heap_segment_next (current_region);
        }

        float pin_frag_ratio = 0.0;
        float pin_surv_ratio = 0.0;

        if (total_space_to_skip)
        {
            size_t gen1_surv = dd_survived_size (dynamic_data_of (max_generation - 1));
            if (gen1_surv)
            {
                pin_frag_ratio = (float)gen1_pins_left / (float)total_space_to_skip;
                pin_surv_ratio = (float)gen1_pins_left / (float)gen1_surv;
                actual_promote_gen1_pins_p = decide_on_gen1_pin_promotion (pin_frag_ratio, pin_surv_ratio);
            }
        }

#ifdef SIMPLE_DPRINTF
        dprintf (REGIONS_LOG, ("h%d ad_p_d: PL: %zd, SL: %zd, pfr: %.3f, psr: %.3f, prmoote gen1 %d. gen1_pins_left %Id, total surv %Id (p:%Id), total_space %Id",
            heap_number, gen1_pins_left, total_space_to_skip, pin_frag_ratio, pin_surv_ratio, actual_promote_gen1_pins_p, gen1_pins_left,
            dd_survived_size (dynamic_data_of (max_generation - 1)), dd_pinned_survived_size (dynamic_data_of (max_generation - 1)), total_space_to_skip));
#endif
    }

    maxgen_pinned_compact_before_advance = generation_pinned_allocation_compact_size (generation_of (max_generation));

    bool large_pins_p = false;

    while (!pinned_plug_que_empty_p())
    {
        uint8_t* oldest_plug = pinned_plug (oldest_pin());

        heap_segment* nseg = heap_segment_rw (generation_allocation_segment (consing_gen));
        dprintf (3, ("h%d oldest pin: %p, consing alloc %p, ptr %p, limit %p",
            heap_number, oldest_plug, heap_segment_mem (nseg),
            generation_allocation_pointer (consing_gen),
            generation_allocation_limit (consing_gen)));

        while ((oldest_plug < generation_allocation_pointer (consing_gen)) ||
               (oldest_plug >= heap_segment_allocated (nseg)))
        {
            assert ((oldest_plug < heap_segment_mem (nseg)) || (oldest_plug > heap_segment_reserved (nseg)));
            assert (generation_allocation_pointer (consing_gen)>= heap_segment_mem (nseg));
            assert (generation_allocation_pointer (consing_gen)<= heap_segment_committed (nseg));
            assert (!heap_segment_swept_in_plan (nseg));

            dprintf (3, ("h%d PRR: in loop, seg %p pa %p -> alloc ptr %p, plan gen %d->%d",
                heap_number, heap_segment_mem (nseg),
                heap_segment_plan_allocated (nseg),
                generation_allocation_pointer (consing_gen),
                heap_segment_plan_gen_num (nseg),
                current_plan_gen_num));

            heap_segment_plan_allocated (nseg) = generation_allocation_pointer (consing_gen);
            decide_on_demotion_pin_surv (nseg, &to_be_empty_regions, actual_promote_gen1_pins_p, large_pins_p);

            heap_segment* next_seg = heap_segment_next_non_sip (nseg);

            if ((next_seg == 0) && (heap_segment_gen_num (nseg) > 0))
            {
                next_seg = generation_start_segment (generation_of (heap_segment_gen_num (nseg) - 1));
                dprintf (3, ("h%d PRR: switching to next gen%d start %zx",
                    heap_number, heap_segment_gen_num (next_seg), (size_t)next_seg));
            }

            assert (next_seg != 0);
            nseg = next_seg;
            large_pins_p = false;

            generation_allocation_segment (consing_gen) = nseg;
            generation_allocation_pointer (consing_gen) = heap_segment_mem (nseg);
        }

        mark* m = pinned_plug_of (deque_pinned_plug());
        uint8_t* plug = pinned_plug (m);
        size_t len = pinned_len (m);

        if (!large_pins_p)
        {
            large_pins_p = (len >= demotion_plug_len_th);
        }

        set_new_pin_info (m, generation_allocation_pointer (consing_gen));
        size_t free_size = pinned_len (m);
        update_planned_gen0_free_space (free_size, plug);
        dprintf (2, ("h%d plug %p-%p(%zu), free space before %p-%p(%zu)",
            heap_number, plug, (plug + len), len,
            generation_allocation_pointer (consing_gen), plug, free_size));

        generation_allocation_pointer (consing_gen) = plug + len;
        generation_allocation_limit (consing_gen) =
            generation_allocation_pointer (consing_gen);
    }

    current_region = generation_allocation_segment (consing_gen);

    if (special_sweep_p)
    {
        assert ((current_region == 0) || (heap_segment_next_rw (current_region) == 0));
        return;
    }

    dprintf (REGIONS_LOG, ("after going through the rest of regions - regions in g2: %d, g1: %d, g0: %d, to be empty %d now",
        planned_regions_per_gen[2], planned_regions_per_gen[1], planned_regions_per_gen[0], to_be_empty_regions));

    // We may not have gone through the while loop above so we could get an alloc region that's SIP (which normally would be
    // filtered out by get_next_alloc_seg in allocate_in_condemned_generations. But we are not allocating in condemned anymore
    // so make sure we skip if it's SIP.
    current_region = heap_segment_non_sip (current_region);
    dprintf (REGIONS_LOG, ("now current region is %p", (current_region ? heap_segment_mem (current_region) : 0)));

    if (current_region)
    {
        decide_on_demotion_pin_surv (current_region, &to_be_empty_regions, actual_promote_gen1_pins_p, large_pins_p);

        if (!heap_segment_swept_in_plan (current_region))
        {
            heap_segment_plan_allocated (current_region) = generation_allocation_pointer (consing_gen);
            dprintf (REGIONS_LOG, ("h%d setting alloc seg %p plan alloc to %p",
                heap_number, heap_segment_mem (current_region),
                heap_segment_plan_allocated (current_region)));
        }

        dprintf (REGIONS_LOG, ("before going through the rest of empty regions - regions in g2: %d, g1: %d, g0: %d, to be empty %d now",
            planned_regions_per_gen[2], planned_regions_per_gen[1], planned_regions_per_gen[0], to_be_empty_regions));

        heap_segment* region_no_pins = heap_segment_next (current_region);
        int region_no_pins_gen_num = heap_segment_gen_num (current_region);

        do
        {
            region_no_pins = heap_segment_non_sip (region_no_pins);

            if (region_no_pins)
            {
                set_region_plan_gen_num (region_no_pins, current_plan_gen_num);
                to_be_empty_regions++;

                heap_segment_plan_allocated (region_no_pins) = heap_segment_mem (region_no_pins);
                dprintf (REGIONS_LOG, ("h%d setting empty seg %p(no pins) plan gen to 0, plan alloc to %p",
                    heap_number, heap_segment_mem (region_no_pins),
                    heap_segment_plan_allocated (region_no_pins)));

                region_no_pins = heap_segment_next (region_no_pins);
            }

            if (!region_no_pins)
            {
                if (region_no_pins_gen_num > 0)
                {
                    region_no_pins_gen_num--;
                    region_no_pins = generation_start_segment (generation_of (region_no_pins_gen_num));
                }
                else
                    break;
            }
        } while (region_no_pins);
    }

    if (to_be_empty_regions)
    {
        if (planned_regions_per_gen[0] == 0)
        {
            dprintf (REGIONS_LOG, ("we didn't seem to find any gen to plan gen0 yet we have empty regions?!"));
        }
        assert (planned_regions_per_gen[0]);
    }

    int saved_planned_regions_per_gen[max_generation + 1];
    memcpy (saved_planned_regions_per_gen, planned_regions_per_gen, sizeof (saved_planned_regions_per_gen));

    // Because all the "to be empty regions" were planned in gen0, we should substract them if we want to repurpose them.
    assert (saved_planned_regions_per_gen[0] >= to_be_empty_regions);
    saved_planned_regions_per_gen[0] -= to_be_empty_regions;

    int plan_regions_needed = 0;
    for (int gen_idx = settings.condemned_generation; gen_idx >= 0; gen_idx--)
    {
        if (saved_planned_regions_per_gen[gen_idx] == 0)
        {
            dprintf (REGIONS_LOG, ("g%d has 0 planned regions!!!", gen_idx));
            plan_regions_needed++;
        }
    }

    dprintf (REGIONS_LOG, ("we still need %d regions, %d will be empty", plan_regions_needed, to_be_empty_regions));
    if (plan_regions_needed > to_be_empty_regions)
    {
        dprintf (REGIONS_LOG, ("h%d %d regions will be empty but we still need %d regions!!", heap_number, to_be_empty_regions, plan_regions_needed));

        plan_regions_needed -= to_be_empty_regions;

        while (plan_regions_needed && get_new_region (0))
        {
            new_regions_in_prr++;
            plan_regions_needed--;
        }

        if (plan_regions_needed > 0)
        {
            dprintf (REGIONS_LOG, ("h%d %d regions short for having at least one region per gen, special sweep on",
                heap_number));
            special_sweep_p = true;
        }
    }

#ifdef _DEBUG
    {
        dprintf (REGIONS_LOG, ("regions in g2: %d[%d], g1: %d[%d], g0: %d[%d]",
            planned_regions_per_gen[2], regions_per_gen[2],
            planned_regions_per_gen[1], regions_per_gen[1],
            planned_regions_per_gen[0], regions_per_gen[0]));

        int total_regions = 0;
        int total_planned_regions = 0;
        for (int i = max_generation; i >= 0; i--)
        {
            total_regions += regions_per_gen[i];
            total_planned_regions += planned_regions_per_gen[i];
        }

        if (total_regions != total_planned_regions)
        {
            dprintf (REGIONS_LOG, ("planned %d regions, saw %d total",
                total_planned_regions, total_regions));
        }
    }
#endif //_DEBUG
}

void gc_heap::grow_mark_list_piece()
{
    if (g_mark_list_piece_total_size < region_count * 2 * get_num_heaps())
    {
        delete[] g_mark_list_piece;

        // at least double the size
        size_t alloc_count = max ((g_mark_list_piece_size * 2), region_count);

        // we need two arrays with alloc_count entries per heap
        g_mark_list_piece = new (nothrow) uint8_t * *[alloc_count * 2 * get_num_heaps()];
        if (g_mark_list_piece != nullptr)
        {
            g_mark_list_piece_size = alloc_count;
        }
        else
        {
            g_mark_list_piece_size = 0;
        }
        g_mark_list_piece_total_size = g_mark_list_piece_size * 2 * get_num_heaps();
    }
    // update the size per heap in case the number of heaps has changed,
    // but the total size is still sufficient
    g_mark_list_piece_size = g_mark_list_piece_total_size / (2 * get_num_heaps());
}

void gc_heap::save_current_survived()
{
    if (!survived_per_region) return;

    size_t region_info_to_copy = region_count * sizeof (size_t);
    memcpy (old_card_survived_per_region, survived_per_region, region_info_to_copy);

#ifdef _DEBUG
    for (size_t region_index = 0; region_index < region_count; region_index++)
    {
        if (survived_per_region[region_index] != 0)
        {
            dprintf (REGIONS_LOG, ("region#[%3zd]: %zd", region_index, survived_per_region[region_index]));
        }
    }

    dprintf (REGIONS_LOG, ("global reported %zd", promoted_bytes (heap_number)));
#endif //_DEBUG
}

void gc_heap::update_old_card_survived()
{
    if (!survived_per_region) return;

    for (size_t region_index = 0; region_index < region_count; region_index++)
    {
        old_card_survived_per_region[region_index] = survived_per_region[region_index] -
                                                     old_card_survived_per_region[region_index];
        if (survived_per_region[region_index] != 0)
        {
            dprintf (REGIONS_LOG, ("region#[%3zd]: %zd (card: %zd)",
                region_index, survived_per_region[region_index], old_card_survived_per_region[region_index]));
        }
    }
}

void gc_heap::update_planned_gen0_free_space (size_t free_size, uint8_t* plug)
{
    gen0_pinned_free_space += free_size;
    if (!gen0_large_chunk_found)
    {
        gen0_large_chunk_found = (free_size >= END_SPACE_AFTER_GC_FL);
        if (gen0_large_chunk_found)
        {
            dprintf (3, ("h%d found large pin free space: %zd at %p",
                heap_number, free_size, plug));
        }
    }
}

// REGIONS TODO: I wrote this in the same spirit as ephemeral_gen_fit_p but we really should
// take committed into consideration instead of reserved. We could also avoid going through
// the regions again and do this update in plan phase.
void gc_heap::get_gen0_end_plan_space()
{
    end_gen0_region_space = 0;
    for (int gen_idx = settings.condemned_generation; gen_idx >= 0; gen_idx--)
    {
        generation* gen = generation_of (gen_idx);
        heap_segment* region = heap_segment_rw (generation_start_segment (gen));
        while (region)
        {
            if (heap_segment_plan_gen_num (region) == 0)
            {
                size_t end_plan_space = heap_segment_reserved (region) - heap_segment_plan_allocated (region);
                if (!gen0_large_chunk_found)
                {
                    gen0_large_chunk_found = (end_plan_space >= END_SPACE_AFTER_GC_FL);

                    if (gen0_large_chunk_found)
                    {
                        dprintf (REGIONS_LOG, ("h%d found large end space: %zd in region %p",
                            heap_number, end_plan_space, heap_segment_mem (region)));
                    }
                }

                dprintf (REGIONS_LOG, ("h%d found end space: %zd in region %p, total %zd->%zd",
                    heap_number, end_plan_space, heap_segment_mem (region), end_gen0_region_space,
                    (end_gen0_region_space + end_plan_space)));
                end_gen0_region_space += end_plan_space;
            }

            region = heap_segment_next (region);
        }
    }
}

size_t gc_heap::get_gen0_end_space(memory_type type)
{
    size_t end_space = 0;
    heap_segment* seg = generation_start_segment (generation_of (0));

    while (seg)
    {
        // TODO -
        // This method can also be called concurrently by full GC notification but
        // there's no synchronization between checking for ephemeral_heap_segment and
        // getting alloc_allocated so for now we just always use heap_segment_allocated.
        //uint8_t* allocated = ((seg == ephemeral_heap_segment) ?
        //                      alloc_allocated : heap_segment_allocated (seg));
        uint8_t* allocated = heap_segment_allocated (seg);
        uint8_t* end = (type == memory_type_reserved) ? heap_segment_reserved (seg) : heap_segment_committed (seg);

        end_space += end - allocated;
        dprintf (REGIONS_LOG, ("h%d gen0 seg %p, end %p-%p=%zx, end_space->%zd",
            heap_number, heap_segment_mem (seg),
            end, allocated,
            (end - allocated),
            end_space));

        seg = heap_segment_next (seg);
    }

    return end_space;
}
#endif //USE_REGIONS

inline
uint8_t* gc_heap::find_next_marked (uint8_t* x, uint8_t* end,
                                    BOOL use_mark_list,
                                    uint8_t**& mark_list_next,
                                    uint8_t** mark_list_index)
{
    if (use_mark_list)
    {
        uint8_t* old_x = x;
        while ((mark_list_next < mark_list_index) &&
            (*mark_list_next <= x))
        {
            mark_list_next++;
        }
        x = end;
        if ((mark_list_next < mark_list_index)
#ifdef MULTIPLE_HEAPS
            && (*mark_list_next < end) //for multiple segments
#endif //MULTIPLE_HEAPS
            )
        x = *mark_list_next;
#ifdef BACKGROUND_GC
        if (current_c_gc_state == c_gc_state_marking)
        {
            assert(gc_heap::background_running_p());
            bgc_clear_batch_mark_array_bits (old_x, x);
        }
#endif //BACKGROUND_GC
    }
    else
    {
        uint8_t* xl = x;
#ifdef BACKGROUND_GC
        if (current_c_gc_state == c_gc_state_marking)
        {
            assert (gc_heap::background_running_p());
            while ((xl < end) && !marked (xl))
            {
                dprintf (4, ("-%zx-", (size_t)xl));
                assert ((size (xl) > 0));
                background_object_marked (xl, TRUE);
                xl = xl + Align (size (xl));
                Prefetch (xl);
            }
        }
        else
#endif //BACKGROUND_GC
        {
            while ((xl < end) && !marked (xl))
            {
                dprintf (4, ("-%zx-", (size_t)xl));
                assert ((size (xl) > 0));
                xl = xl + Align (size (xl));
                Prefetch (xl);
            }
        }
        assert (xl <= end);
        x = xl;
    }

    return x;
}

#ifdef FEATURE_EVENT_TRACE
void gc_heap::init_bucket_info()
{
    memset (bucket_info, 0, sizeof (bucket_info));
}

void gc_heap::add_plug_in_condemned_info (generation* gen, size_t plug_size)
{
    uint32_t bucket_index = generation_allocator (gen)->first_suitable_bucket (plug_size);
    (bucket_info[bucket_index].count)++;
    bucket_info[bucket_index].size += plug_size;
}
#endif //FEATURE_EVENT_TRACE

inline void save_allocated(heap_segment* seg)
{
#ifndef MULTIPLE_HEAPS
    if (!heap_segment_saved_allocated(seg))
#endif // !MULTIPLE_HEAPS
    {
        heap_segment_saved_allocated (seg) = heap_segment_allocated (seg);
    }
}

void gc_heap::plan_phase (int condemned_gen_number)
{
    size_t old_gen2_allocated = 0;
    size_t old_gen2_size = 0;

    if (condemned_gen_number == (max_generation - 1))
    {
        old_gen2_allocated = generation_free_list_allocated (generation_of (max_generation));
        old_gen2_size = generation_size (max_generation);
    }

    assert (settings.concurrent == FALSE);

    dprintf (2,(ThreadStressLog::gcStartPlanMsg(), heap_number,
                condemned_gen_number, settings.promotion ? 1 : 0));

    generation*  condemned_gen1 = generation_of (condemned_gen_number);

    BOOL use_mark_list = FALSE;
#ifdef GC_CONFIG_DRIVEN
    dprintf (3, ("total number of marked objects: %zd (%zd)",
                 (mark_list_index - &mark_list[0]), (mark_list_end - &mark_list[0])));

    if (mark_list_index >= (mark_list_end + 1))
    {
        mark_list_index = mark_list_end + 1;
#ifndef MULTIPLE_HEAPS // in Server GC, we check for mark list overflow in sort_mark_list
        mark_list_overflow = true;
#endif
    }
#else //GC_CONFIG_DRIVEN
    dprintf (3, ("mark_list length: %zd",
                 (mark_list_index - &mark_list[0])));
#endif //GC_CONFIG_DRIVEN

    if ((condemned_gen_number < max_generation) &&
        (mark_list_index <= mark_list_end))
    {
#ifndef MULTIPLE_HEAPS
#ifdef USE_VXSORT
        do_vxsort (mark_list, mark_list_index - mark_list, slow, shigh);
#else //USE_VXSORT
        _sort (&mark_list[0], mark_list_index - 1, 0);
#endif //USE_VXSORT

        dprintf (3, ("using mark list at GC #%zd", (size_t)settings.gc_index));
        //verify_qsort_array (&mark_list[0], mark_list_index-1);
#endif //!MULTIPLE_HEAPS
        use_mark_list = TRUE;
        get_gc_data_per_heap()->set_mechanism_bit(gc_mark_list_bit);
    }
    else
    {
        dprintf (3, ("mark_list not used"));
    }

#ifdef FEATURE_BASICFREEZE
    sweep_ro_segments();
#endif //FEATURE_BASICFREEZE

#ifndef MULTIPLE_HEAPS
    int condemned_gen_index = get_stop_generation_index (condemned_gen_number);
    for (; condemned_gen_index <= condemned_gen_number; condemned_gen_index++)
    {
        generation* current_gen = generation_of (condemned_gen_index);
        if (shigh != (uint8_t*)0)
        {
            heap_segment* seg = heap_segment_rw (generation_start_segment (current_gen));
            _ASSERTE(seg != NULL);

            heap_segment* fseg = seg;
            do
            {
                heap_segment_saved_allocated(seg) = 0;
                if (in_range_for_segment (slow, seg))
                {
                    uint8_t* start_unmarked = 0;
#ifdef USE_REGIONS
                    start_unmarked = heap_segment_mem (seg);
#else //USE_REGIONS
                    if (seg == fseg)
                    {
                        uint8_t* o = generation_allocation_start (current_gen);
                        o += get_soh_start_obj_len (o);
                        if (slow > o)
                        {
                            start_unmarked = o;
                            assert ((slow - o) >= (int)Align (min_obj_size));
                        }
                    }
                    else
                    {
                        assert (condemned_gen_number == max_generation);
                        start_unmarked = heap_segment_mem (seg);
                    }
#endif //USE_REGIONS

                    if (start_unmarked)
                    {
                        size_t unmarked_size = slow - start_unmarked;

                        if (unmarked_size > 0)
                        {
#ifdef BACKGROUND_GC
                            if (current_c_gc_state == c_gc_state_marking)
                            {
                                bgc_clear_batch_mark_array_bits (start_unmarked, slow);
                            }
#endif //BACKGROUND_GC
                            make_unused_array (start_unmarked, unmarked_size);
                        }
                    }
                }
                if (in_range_for_segment (shigh, seg))
                {
#ifdef BACKGROUND_GC
                    if (current_c_gc_state == c_gc_state_marking)
                    {
                        bgc_clear_batch_mark_array_bits ((shigh + Align (size (shigh))), heap_segment_allocated (seg));
                    }
#endif //BACKGROUND_GC
                    save_allocated(seg);
                    heap_segment_allocated (seg) = shigh + Align (size (shigh));
                }
                // test if the segment is in the range of [slow, shigh]
                if (!((heap_segment_reserved (seg) >= slow) &&
                    (heap_segment_mem (seg) <= shigh)))
                {
#ifdef BACKGROUND_GC
                    if (current_c_gc_state == c_gc_state_marking)
                    {
#ifdef USE_REGIONS
                        bgc_clear_batch_mark_array_bits (heap_segment_mem (seg), heap_segment_allocated (seg));
#else //USE_REGIONS
                        // This cannot happen with segments as we'd only be on the ephemeral segment if BGC is in
                        // progress and it's guaranteed shigh/slow would be in range of the ephemeral segment.
                        assert (!"cannot happen with segments");
#endif //USE_REGIONS
                    }
#endif //BACKGROUND_GC
                    save_allocated(seg);
                    // shorten it to minimum
                    heap_segment_allocated (seg) =  heap_segment_mem (seg);
                }
                seg = heap_segment_next_rw (seg);
            } while (seg);
        }
        else
        {
            heap_segment* seg = heap_segment_rw (generation_start_segment (current_gen));

            _ASSERTE(seg != NULL);

            heap_segment* sseg = seg;
            do
            {
                heap_segment_saved_allocated(seg) = 0;
                uint8_t* start_unmarked = heap_segment_mem (seg);
#ifndef USE_REGIONS
                // shorten it to minimum
                if (seg == sseg)
                {
                    // no survivors make all generations look empty
                    uint8_t* o = generation_allocation_start (current_gen);
                    o += get_soh_start_obj_len (o);
                    start_unmarked = o;
                }
#endif //!USE_REGIONS

#ifdef BACKGROUND_GC
                if (current_c_gc_state == c_gc_state_marking)
                {
                    bgc_clear_batch_mark_array_bits (start_unmarked, heap_segment_allocated (seg));
                }
#endif //BACKGROUND_GC
                save_allocated(seg);
                heap_segment_allocated (seg) = start_unmarked;

                seg = heap_segment_next_rw (seg);
            } while (seg);
        }
    }
#endif //MULTIPLE_HEAPS

    heap_segment*  seg1 = heap_segment_rw (generation_start_segment (condemned_gen1));

    _ASSERTE(seg1 != NULL);

    uint8_t*  end = heap_segment_allocated (seg1);
    uint8_t*  first_condemned_address = get_soh_start_object (seg1, condemned_gen1);
    uint8_t*  x = first_condemned_address;

#ifdef USE_REGIONS
    memset (regions_per_gen, 0, sizeof (regions_per_gen));
    memset (planned_regions_per_gen, 0, sizeof (planned_regions_per_gen));
    memset (sip_maxgen_regions_per_gen, 0, sizeof (sip_maxgen_regions_per_gen));
    memset (reserved_free_regions_sip, 0, sizeof (reserved_free_regions_sip));
    int pinned_survived_region = 0;
    uint8_t** mark_list_index = nullptr;
    uint8_t** mark_list_next = nullptr;
    if (use_mark_list)
        mark_list_next = get_region_mark_list (use_mark_list, x, end, &mark_list_index);
#else // USE_REGIONS
    assert (!marked (x));
    uint8_t** mark_list_next = &mark_list[0];
#endif //USE_REGIONS
    uint8_t*  plug_end = x;
    uint8_t*  tree = 0;
    size_t  sequence_number = 0;
    uint8_t*  last_node = 0;
    size_t  current_brick = brick_of (x);
    BOOL  allocate_in_condemned = ((condemned_gen_number == max_generation)||
                                   (settings.promotion == FALSE));
    int  active_old_gen_number = condemned_gen_number;
    int  active_new_gen_number = (allocate_in_condemned ? condemned_gen_number:
                                  (1 + condemned_gen_number));

    generation*  older_gen = 0;
    generation* consing_gen = condemned_gen1;
    alloc_list  r_free_list [MAX_SOH_BUCKET_COUNT];

    size_t r_free_list_space = 0;
    size_t r_free_obj_space = 0;
    size_t r_older_gen_free_list_allocated = 0;
    size_t r_older_gen_condemned_allocated = 0;
    size_t r_older_gen_end_seg_allocated = 0;
    uint8_t*  r_allocation_pointer = 0;
    uint8_t*  r_allocation_limit = 0;
    uint8_t* r_allocation_start_region = 0;
    heap_segment*  r_allocation_segment = 0;
#ifdef FREE_USAGE_STATS
    size_t r_older_gen_free_space[NUM_GEN_POWER2];
#endif //FREE_USAGE_STATS

    if ((condemned_gen_number < max_generation))
    {
        older_gen = generation_of (min ((int)max_generation, 1 + condemned_gen_number));
        generation_allocator (older_gen)->copy_to_alloc_list (r_free_list);

        r_free_list_space = generation_free_list_space (older_gen);
        r_free_obj_space = generation_free_obj_space (older_gen);
#ifdef FREE_USAGE_STATS
        memcpy (r_older_gen_free_space, older_gen->gen_free_spaces, sizeof (r_older_gen_free_space));
#endif //FREE_USAGE_STATS
        generation_allocate_end_seg_p (older_gen) = FALSE;

#ifdef DOUBLY_LINKED_FL
        if (older_gen->gen_num == max_generation)
        {
            generation_set_bgc_mark_bit_p (older_gen) = FALSE;
            generation_last_free_list_allocated (older_gen) = 0;
        }
#endif //DOUBLY_LINKED_FL

        r_older_gen_free_list_allocated = generation_free_list_allocated (older_gen);
        r_older_gen_condemned_allocated = generation_condemned_allocated (older_gen);
        r_older_gen_end_seg_allocated = generation_end_seg_allocated (older_gen);
        r_allocation_limit = generation_allocation_limit (older_gen);
        r_allocation_pointer = generation_allocation_pointer (older_gen);
        r_allocation_start_region = generation_allocation_context_start_region (older_gen);
        r_allocation_segment = generation_allocation_segment (older_gen);

#ifdef USE_REGIONS
        if (older_gen->gen_num == max_generation)
        {
            check_seg_gen_num (r_allocation_segment);
        }
#endif //USE_REGIONS

        heap_segment* start_seg = heap_segment_rw (generation_start_segment (older_gen));

        _ASSERTE(start_seg != NULL);

#ifdef USE_REGIONS
        heap_segment* skip_seg = 0;

        assert (generation_allocation_pointer (older_gen) == 0);
        assert (generation_allocation_limit (older_gen) == 0);
#else //USE_REGIONS
        heap_segment* skip_seg = ephemeral_heap_segment;
        if (start_seg != ephemeral_heap_segment)
        {
            assert (condemned_gen_number == (max_generation - 1));
        }
#endif //USE_REGIONS
        if (start_seg != skip_seg)
        {
            while (start_seg && (start_seg != skip_seg))
            {
                assert (heap_segment_allocated (start_seg) >=
                        heap_segment_mem (start_seg));
                assert (heap_segment_allocated (start_seg) <=
                        heap_segment_reserved (start_seg));
                heap_segment_plan_allocated (start_seg) =
                    heap_segment_allocated (start_seg);
                start_seg = heap_segment_next_rw (start_seg);
            }
        }
    }

    //reset all of the segment's plan_allocated
    {
        int condemned_gen_index1 = get_stop_generation_index (condemned_gen_number);
        for (; condemned_gen_index1 <= condemned_gen_number; condemned_gen_index1++)
        {
            generation* current_gen = generation_of (condemned_gen_index1);
            heap_segment*  seg2 = heap_segment_rw (generation_start_segment (current_gen));
            _ASSERTE(seg2 != NULL);

            while (seg2)
            {
#ifdef USE_REGIONS
                regions_per_gen[condemned_gen_index1]++;
                dprintf (REGIONS_LOG, ("h%d PS: gen%d %p-%p (%d, surv: %d), %d regions",
                    heap_number, condemned_gen_index1,
                    heap_segment_mem (seg2), heap_segment_allocated (seg2),
                    (heap_segment_allocated (seg2) - heap_segment_mem (seg2)),
                    (int)heap_segment_survived (seg2), regions_per_gen[condemned_gen_index1]));
#endif //USE_REGIONS

                heap_segment_plan_allocated (seg2) =
                    heap_segment_mem (seg2);
                seg2 = heap_segment_next_rw (seg2);
            }
        }
    }

    int  condemned_gn = condemned_gen_number;

    int bottom_gen = 0;
    init_free_and_plug();

    while (condemned_gn >= bottom_gen)
    {
        generation*  condemned_gen2 = generation_of (condemned_gn);
        generation_allocator (condemned_gen2)->clear();
        generation_free_list_space (condemned_gen2) = 0;
        generation_free_obj_space (condemned_gen2) = 0;
        generation_allocation_size (condemned_gen2) = 0;
        generation_condemned_allocated (condemned_gen2) = 0;
        generation_sweep_allocated (condemned_gen2) = 0;
        generation_free_list_allocated(condemned_gen2) = 0;
        generation_end_seg_allocated (condemned_gen2) = 0;
        generation_pinned_allocation_sweep_size (condemned_gen2) = 0;
        generation_pinned_allocation_compact_size (condemned_gen2) = 0;
#ifdef FREE_USAGE_STATS
        generation_pinned_free_obj_space (condemned_gen2) = 0;
        generation_allocated_in_pinned_free (condemned_gen2) = 0;
        generation_allocated_since_last_pin (condemned_gen2) = 0;
#endif //FREE_USAGE_STATS

#ifndef USE_REGIONS
        generation_plan_allocation_start (condemned_gen2) = 0;
#endif //!USE_REGIONS
        generation_allocation_segment (condemned_gen2) =
            heap_segment_rw (generation_start_segment (condemned_gen2));

        _ASSERTE(generation_allocation_segment(condemned_gen2) != NULL);

#ifdef USE_REGIONS
        generation_allocation_pointer (condemned_gen2) =
            heap_segment_mem (generation_allocation_segment (condemned_gen2));
#else //USE_REGIONS
        if (generation_start_segment (condemned_gen2) != ephemeral_heap_segment)
        {
            generation_allocation_pointer (condemned_gen2) =
                heap_segment_mem (generation_allocation_segment (condemned_gen2));
        }
        else
        {
            generation_allocation_pointer (condemned_gen2) = generation_allocation_start (condemned_gen2);
        }
#endif //USE_REGIONS
        generation_allocation_limit (condemned_gen2) = generation_allocation_pointer (condemned_gen2);
        generation_allocation_context_start_region (condemned_gen2) = generation_allocation_pointer (condemned_gen2);

        condemned_gn--;
    }

    BOOL allocate_first_generation_start = FALSE;

    if (allocate_in_condemned)
    {
        allocate_first_generation_start = TRUE;
    }

    dprintf(3,( " From %zx to %zx", (size_t)x, (size_t)end));

    // Normally we always demote pins left after plan allocation, but if we are doing a gen1 only because of cards, it means
    // we need to decide if we will promote these pins from gen1.
    decide_promote_gen1_pins_p = (settings.promotion &&
        (settings.condemned_generation == (max_generation - 1)) &&
        gen_to_condemn_reasons.is_only_condition(gen_low_card_p));

#ifdef USE_REGIONS
    if (should_sweep_in_plan (seg1))
    {
        sweep_region_in_plan (seg1, use_mark_list, mark_list_next, mark_list_index);
        x = end;
    }
#else
    demotion_low = MAX_PTR;
    demotion_high = heap_segment_allocated (ephemeral_heap_segment);

    total_ephemeral_size = 0;
#endif //!USE_REGIONS

    print_free_and_plug ("BP");

#ifndef USE_REGIONS
    for (int gen_idx = 0; gen_idx <= max_generation; gen_idx++)
    {
        generation* temp_gen = generation_of (gen_idx);

        dprintf (2, ("gen%d start %p, plan start %p",
            gen_idx,
            generation_allocation_start (temp_gen),
            generation_plan_allocation_start (temp_gen)));
    }
#endif //!USE_REGIONS

#ifdef FEATURE_EVENT_TRACE
    // When verbose level is enabled we want to record some info about gen2 FL usage during gen1 GCs.
    // We record the bucket info for the largest FL items and plugs that we have to allocate in condemned.
    bool record_fl_info_p = (EVENT_ENABLED (GCFitBucketInfo) && (condemned_gen_number == (max_generation - 1)));
    size_t recorded_fl_info_size = 0;
    if (record_fl_info_p)
        init_bucket_info();
    bool fire_pinned_plug_events_p = EVENT_ENABLED(PinPlugAtGCTime);
#endif //FEATURE_EVENT_TRACE

    size_t last_plug_len = 0;

#ifdef DOUBLY_LINKED_FL
    gen2_removed_no_undo = 0;
    saved_pinned_plug_index = INVALID_SAVED_PINNED_PLUG_INDEX;
#endif //DOUBLY_LINKED_FL

    while (1)
    {
        if (x >= end)
        {
            if (!use_mark_list)
            {
                assert (x == end);
            }

#ifdef USE_REGIONS
            if (heap_segment_swept_in_plan (seg1))
            {
                assert (heap_segment_gen_num (seg1) == active_old_gen_number);
                dynamic_data* dd_active_old = dynamic_data_of (active_old_gen_number);
                dd_survived_size (dd_active_old) += heap_segment_survived (seg1);
                dprintf (REGIONS_LOG, ("region %p-%p SIP",
                    heap_segment_mem (seg1), heap_segment_allocated (seg1)));
            }
            else
#endif //USE_REGIONS
            {
                assert (heap_segment_allocated (seg1) == end);
                save_allocated(seg1);
                heap_segment_allocated (seg1) = plug_end;
                current_brick = update_brick_table (tree, current_brick, x, plug_end);
                dprintf (REGIONS_LOG, ("region %p-%p(%p) non SIP",
                    heap_segment_mem (seg1), heap_segment_allocated (seg1),
                    heap_segment_plan_allocated (seg1)));
                dprintf (3, ("end of seg: new tree, sequence# 0"));
                sequence_number = 0;
                tree = 0;
            }

#ifdef USE_REGIONS
            heap_segment_pinned_survived (seg1) = pinned_survived_region;
            dprintf (REGIONS_LOG, ("h%d setting seg %p pin surv: %d",
                heap_number, heap_segment_mem (seg1), pinned_survived_region));
            pinned_survived_region = 0;
            if (heap_segment_mem (seg1) == heap_segment_allocated (seg1))
            {
                num_regions_freed_in_sweep++;
            }
#endif //USE_REGIONS

            if (heap_segment_next_rw (seg1))
            {
                seg1 = heap_segment_next_rw (seg1);
                end = heap_segment_allocated (seg1);
                plug_end = x = heap_segment_mem (seg1);
                current_brick = brick_of (x);
#ifdef USE_REGIONS
                if (use_mark_list)
                    mark_list_next = get_region_mark_list (use_mark_list, x, end, &mark_list_index);

                if (should_sweep_in_plan (seg1))
                {
                    sweep_region_in_plan (seg1, use_mark_list, mark_list_next, mark_list_index);
                    x = end;
                }
#endif //USE_REGIONS
                dprintf(3,( " From %zx to %zx", (size_t)x, (size_t)end));
                continue;
            }
            else
            {
#ifdef USE_REGIONS
                // We have a few task here when we ran out of regions to go through for the
                // active_old_gen_number -
                //
                // + decide on which pins to skip
                // + set the planned gen for the regions we process here
                // + set the consing gen's alloc ptr/limit
                // + decide on the new active_old_gen_number (which is just the current one - 1)
                // + decide on the new active_new_gen_number (which depends on settings.promotion)
                //
                // Important differences between process_last_np_surv_region and process_ephemeral_boundaries
                // - it's guaranteed we would ask to allocate gen1 start for promotion and gen0
                //   start for non promotion case.
                // - consing_gen is never changed. In fact we really don't need consing_gen, we just
                //   need the alloc ptr/limit pair and the alloc seg.
                //   TODO : should just get rid of consing_gen.
                // These make things more regular and easier to keep track of.
                //
                // Also I'm doing everything here instead of having to have separate code to go
                // through the left over pins after the main loop in plan phase.
                int saved_active_new_gen_number = active_new_gen_number;
                BOOL saved_allocate_in_condemned = allocate_in_condemned;

                dprintf (REGIONS_LOG, ("h%d finished planning gen%d regions into gen%d, alloc_in_condemned: %d",
                    heap_number, active_old_gen_number, active_new_gen_number, allocate_in_condemned));

                if (active_old_gen_number <= (settings.promotion ? (max_generation - 1) : max_generation))
                {
                    dprintf (REGIONS_LOG, ("h%d active old: %d, new: %d->%d, allocate_in_condemned %d->1",
                        heap_number, active_old_gen_number,
                        active_new_gen_number, (active_new_gen_number - 1),
                        allocate_in_condemned));
                    active_new_gen_number--;
                    allocate_in_condemned = TRUE;
                }

                if (active_new_gen_number >= 0)
                {
                    process_last_np_surv_region (consing_gen, saved_active_new_gen_number, active_new_gen_number);
                }

                if (active_old_gen_number == 0)
                {
                    // We need to process the pins on the remaining regions if any.
                    process_remaining_regions (active_new_gen_number, consing_gen);
                    break;
                }
                else
                {
                    active_old_gen_number--;

                    seg1 = heap_segment_rw (generation_start_segment (generation_of (active_old_gen_number)));
                    end = heap_segment_allocated (seg1);
                    plug_end = x = heap_segment_mem (seg1);
                    current_brick = brick_of (x);

                    if (use_mark_list)
                        mark_list_next = get_region_mark_list (use_mark_list, x, end, &mark_list_index);

                    if (should_sweep_in_plan (seg1))
                    {
                        sweep_region_in_plan (seg1, use_mark_list, mark_list_next, mark_list_index);
                        x = end;
                    }

                    dprintf (REGIONS_LOG,("h%d switching to gen%d start region %p, %p-%p",
                        heap_number, active_old_gen_number, heap_segment_mem (seg1), x, end));
                    continue;
                }
#else //USE_REGIONS
                break;
#endif //USE_REGIONS
            }
        }

        BOOL last_npinned_plug_p = FALSE;
        BOOL last_pinned_plug_p = FALSE;

        // last_pinned_plug is the beginning of the last pinned plug. If we merge a plug into a pinned
        // plug we do not change the value of last_pinned_plug. This happens with artificially pinned plugs -
        // it can be merged with a previous pinned plug and a pinned plug after it can be merged with it.
        uint8_t* last_pinned_plug = 0;
        size_t num_pinned_plugs_in_plug = 0;

        uint8_t* last_object_in_plug = 0;

        while ((x < end) && marked (x))
        {
            uint8_t*  plug_start = x;
            uint8_t*  saved_plug_end = plug_end;
            BOOL   pinned_plug_p = FALSE;
            BOOL   npin_before_pin_p = FALSE;
            BOOL   saved_last_npinned_plug_p = last_npinned_plug_p;
            uint8_t*  saved_last_object_in_plug = last_object_in_plug;
            BOOL   merge_with_last_pin_p = FALSE;

            size_t added_pinning_size = 0;
            size_t artificial_pinned_size = 0;

            store_plug_gap_info (plug_start, plug_end, last_npinned_plug_p, last_pinned_plug_p,
                                 last_pinned_plug, pinned_plug_p, last_object_in_plug,
                                 merge_with_last_pin_p, last_plug_len);

#ifdef FEATURE_STRUCTALIGN
            int requiredAlignment = ((CObjectHeader*)plug_start)->GetRequiredAlignment();
            size_t alignmentOffset = OBJECT_ALIGNMENT_OFFSET;
#endif // FEATURE_STRUCTALIGN

            {
                uint8_t* xl = x;
                while ((xl < end) && marked (xl) && (pinned (xl) == pinned_plug_p))
                {
                    assert (xl < end);
                    if (pinned(xl))
                    {
                        clear_pinned (xl);
                    }
#ifdef FEATURE_STRUCTALIGN
                    else
                    {
                        int obj_requiredAlignment = ((CObjectHeader*)xl)->GetRequiredAlignment();
                        if (obj_requiredAlignment > requiredAlignment)
                        {
                            requiredAlignment = obj_requiredAlignment;
                            alignmentOffset = xl - plug_start + OBJECT_ALIGNMENT_OFFSET;
                        }
                    }
#endif // FEATURE_STRUCTALIGN

                    clear_marked (xl);

                    dprintf(4, ("+%zx+", (size_t)xl));
                    assert ((size (xl) > 0));
                    assert ((size (xl) <= loh_size_threshold));

                    last_object_in_plug = xl;

                    xl = xl + Align (size (xl));
                    Prefetch (xl);
                }

                BOOL next_object_marked_p = ((xl < end) && marked (xl));

                if (pinned_plug_p)
                {
                    // If it is pinned we need to extend to the next marked object as we can't use part of
                    // a pinned object to make the artificial gap (unless the last 3 ptr sized words are all
                    // references but for now I am just using the next non pinned object for that).
                    if (next_object_marked_p)
                    {
                        clear_marked (xl);
                        last_object_in_plug = xl;
                        size_t extra_size = Align (size (xl));
                        xl = xl + extra_size;
                        added_pinning_size = extra_size;
                    }
                }
                else
                {
                    if (next_object_marked_p)
                        npin_before_pin_p = TRUE;
                }

                assert (xl <= end);
                x = xl;
            }
            dprintf (3, ( "%zx[", (size_t)plug_start));
            plug_end = x;
            size_t ps = plug_end - plug_start;
            last_plug_len = ps;
            dprintf (3, ( "%zx[(%zx)", (size_t)x, ps));
            uint8_t*  new_address = 0;

            if (!pinned_plug_p)
            {
                if (allocate_in_condemned &&
                    (settings.condemned_generation == max_generation) &&
                    (ps > OS_PAGE_SIZE))
                {
                    ptrdiff_t reloc = plug_start - generation_allocation_pointer (consing_gen);
                    //reloc should >=0 except when we relocate
                    //across segments and the dest seg is higher then the src

                    if ((ps > (8*OS_PAGE_SIZE)) &&
                        (reloc > 0) &&
                        ((size_t)reloc < (ps/16)))
                    {
                        dprintf (3, ("Pinning %zx; reloc would have been: %zx",
                                     (size_t)plug_start, reloc));
                        // The last plug couldn't have been a npinned plug or it would have
                        // included this plug.
                        assert (!saved_last_npinned_plug_p);

                        if (last_pinned_plug)
                        {
                            dprintf (3, ("artificially pinned plug merged with last pinned plug"));
                            merge_with_last_pin_p = TRUE;
                        }
                        else
                        {
                            enque_pinned_plug (plug_start, FALSE, 0);
                            last_pinned_plug = plug_start;
                        }

                        convert_to_pinned_plug (last_npinned_plug_p, last_pinned_plug_p, pinned_plug_p,
                                                ps, artificial_pinned_size);
                    }
                }
            }

#ifndef USE_REGIONS
            if (allocate_first_generation_start)
            {
                allocate_first_generation_start = FALSE;
                plan_generation_start (condemned_gen1, consing_gen, plug_start);
                assert (generation_plan_allocation_start (condemned_gen1));
            }

            if (seg1 == ephemeral_heap_segment)
            {
                process_ephemeral_boundaries (plug_start, active_new_gen_number,
                                              active_old_gen_number,
                                              consing_gen,
                                              allocate_in_condemned);
            }
#endif //!USE_REGIONS

            dprintf (3, ("adding %zd to gen%d surv", ps, active_old_gen_number));

            dynamic_data* dd_active_old = dynamic_data_of (active_old_gen_number);
            dd_survived_size (dd_active_old) += ps;

            BOOL convert_to_pinned_p = FALSE;
            BOOL allocated_in_older_p = FALSE;

            if (!pinned_plug_p)
            {
#if defined (RESPECT_LARGE_ALIGNMENT) || defined (FEATURE_STRUCTALIGN)
                dd_num_npinned_plugs (dd_active_old)++;
#endif //RESPECT_LARGE_ALIGNMENT || FEATURE_STRUCTALIGN

                add_gen_plug (active_old_gen_number, ps);

                if (allocate_in_condemned)
                {
                    verify_pins_with_post_plug_info("before aic");

                    new_address =
                        allocate_in_condemned_generations (consing_gen,
                                                           ps,
                                                           active_old_gen_number,
#ifdef SHORT_PLUGS
                                                           &convert_to_pinned_p,
                                                           (npin_before_pin_p ? plug_end : 0),
                                                           seg1,
#endif //SHORT_PLUGS
                                                           plug_start REQD_ALIGN_AND_OFFSET_ARG);
                    verify_pins_with_post_plug_info("after aic");
                }
                else
                {
                    new_address = allocate_in_older_generation (older_gen, ps, active_old_gen_number, plug_start REQD_ALIGN_AND_OFFSET_ARG);

                    if (new_address != 0)
                    {
                        allocated_in_older_p = TRUE;
                        if (settings.condemned_generation == (max_generation - 1))
                        {
                            dprintf (3, (" NA: %p-%p -> %zx, %zx (%zx)",
                                plug_start, plug_end,
                                (size_t)new_address, (size_t)new_address + (plug_end - plug_start),
                                (size_t)(plug_end - plug_start)));
                        }
                    }
                    else
                    {
                        if (generation_allocator(older_gen)->discard_if_no_fit_p())
                        {
                            allocate_in_condemned = TRUE;
                        }

                        new_address = allocate_in_condemned_generations (consing_gen, ps, active_old_gen_number,
#ifdef SHORT_PLUGS
                                                                         &convert_to_pinned_p,
                                                                         (npin_before_pin_p ? plug_end : 0),
                                                                         seg1,
#endif //SHORT_PLUGS
                                                                         plug_start REQD_ALIGN_AND_OFFSET_ARG);
                    }
                }

#ifdef FEATURE_EVENT_TRACE
                if (record_fl_info_p && !allocated_in_older_p)
                {
                    add_plug_in_condemned_info (older_gen, ps);
                    recorded_fl_info_size += ps;
                }
#endif //FEATURE_EVENT_TRACE

                if (convert_to_pinned_p)
                {
                    assert (last_npinned_plug_p != FALSE);
                    assert (last_pinned_plug_p == FALSE);
                    convert_to_pinned_plug (last_npinned_plug_p, last_pinned_plug_p, pinned_plug_p,
                                            ps, artificial_pinned_size);
                    enque_pinned_plug (plug_start, FALSE, 0);
                    last_pinned_plug = plug_start;
                }
                else
                {
                    if (!new_address)
                    {
                        //verify that we are at then end of the ephemeral segment
                        assert (generation_allocation_segment (consing_gen) ==
                                ephemeral_heap_segment);
                        //verify that we are near the end
                        assert ((generation_allocation_pointer (consing_gen) + Align (ps)) <
                                heap_segment_allocated (ephemeral_heap_segment));
                        assert ((generation_allocation_pointer (consing_gen) + Align (ps)) >
                                (heap_segment_allocated (ephemeral_heap_segment) + Align (min_obj_size)));
                    }
                    else
                    {
                        dprintf (3, (ThreadStressLog::gcPlanPlugMsg(),
                            (size_t)(node_gap_size (plug_start)),
                            plug_start, plug_end, (size_t)new_address, (size_t)(plug_start - new_address),
                                (size_t)new_address + ps, ps,
                                (is_plug_padded (plug_start) ? 1 : 0), x,
                                (allocated_in_older_p ? "O" : "C")));

#ifdef SHORT_PLUGS
                        if (is_plug_padded (plug_start))
                        {
                            dprintf (3, ("%p was padded", plug_start));
                            dd_padding_size (dd_active_old) += Align (min_obj_size);
                        }
#endif //SHORT_PLUGS
                    }
                }
            }

            if (pinned_plug_p)
            {
#ifdef FEATURE_EVENT_TRACE
                if (fire_pinned_plug_events_p)
                {
                    FIRE_EVENT(PinPlugAtGCTime, plug_start, plug_end,
                               (merge_with_last_pin_p ? 0 : (uint8_t*)node_gap_size (plug_start)));
                }
#endif //FEATURE_EVENT_TRACE

                if (merge_with_last_pin_p)
                {
                    merge_with_last_pinned_plug (last_pinned_plug, ps);
                }
                else
                {
                    assert (last_pinned_plug == plug_start);
                    set_pinned_info (plug_start, ps, consing_gen);
                }

                new_address = plug_start;

                dprintf (3, (ThreadStressLog::gcPlanPinnedPlugMsg(),
                            (size_t)(node_gap_size (plug_start)), (size_t)plug_start,
                            (size_t)plug_end, ps,
                            (merge_with_last_pin_p ? 1 : 0)));

                dprintf (3, ("adding %zd to gen%d pinned surv", plug_end - plug_start, active_old_gen_number));

                size_t pinned_plug_size = plug_end - plug_start;
#ifdef USE_REGIONS
                pinned_survived_region += (int)pinned_plug_size;
#endif //USE_REGIONS

                dd_pinned_survived_size (dd_active_old) += pinned_plug_size;
                dd_added_pinned_size (dd_active_old) += added_pinning_size;
                dd_artificial_pinned_survived_size (dd_active_old) += artificial_pinned_size;

#ifndef USE_REGIONS
                if (decide_promote_gen1_pins_p && (active_old_gen_number == (max_generation - 1)))
                {
                    last_gen1_pin_end = plug_end;
                }
#endif //!USE_REGIONS
            }

#ifdef _DEBUG
            // detect forward allocation in the same segment
            assert (!((new_address > plug_start) &&
                (new_address < heap_segment_reserved (seg1))));
#endif //_DEBUG

            if (!merge_with_last_pin_p)
            {
                if (current_brick != brick_of (plug_start))
                {
                    current_brick = update_brick_table (tree, current_brick, plug_start, saved_plug_end);
                    sequence_number = 0;
                    tree = 0;
                }

                set_node_relocation_distance (plug_start, (new_address - plug_start));
                if (last_node && (node_relocation_distance (last_node) ==
                                  (node_relocation_distance (plug_start) +
                                   (ptrdiff_t)node_gap_size (plug_start))))
                {
                    //dprintf(3,( " Lb"));
                    dprintf (3, ("%p Lb", plug_start));
                    set_node_left (plug_start);
                }
                if (0 == sequence_number)
                {
                    dprintf (2, ("sn: 0, tree is set to %p", plug_start));
                    tree = plug_start;
                }

                verify_pins_with_post_plug_info("before insert node");

                tree = insert_node (plug_start, ++sequence_number, tree, last_node);
                dprintf (3, ("tree is %p (b: %zx) after insert_node(lc: %p, rc: %p)",
                    tree, brick_of (tree),
                    (tree + node_left_child (tree)), (tree + node_right_child (tree))));
                last_node = plug_start;

#ifdef _DEBUG
                // If we detect if the last plug is pinned plug right before us, we should save this gap info
                if (!pinned_plug_p)
                {
                    if (mark_stack_tos > 0)
                    {
                        mark& m = mark_stack_array[mark_stack_tos - 1];
                        if (m.has_post_plug_info())
                        {
                            uint8_t* post_plug_info_start = m.saved_post_plug_info_start;
                            size_t* current_plug_gap_start = (size_t*)(plug_start - sizeof (plug_and_gap));
                            if ((uint8_t*)current_plug_gap_start == post_plug_info_start)
                            {
                                dprintf (3, ("Ginfo: %zx, %zx, %zx",
                                    *current_plug_gap_start, *(current_plug_gap_start + 1),
                                    *(current_plug_gap_start + 2)));
                                memcpy (&(m.saved_post_plug_debug), current_plug_gap_start, sizeof (gap_reloc_pair));
                            }
                        }
                    }
                }
#endif //_DEBUG

                verify_pins_with_post_plug_info("after insert node");
            }
        }

        if (num_pinned_plugs_in_plug > 1)
        {
            dprintf (3, ("more than %zd pinned plugs in this plug", num_pinned_plugs_in_plug));
        }

        x = find_next_marked (x, end, use_mark_list, mark_list_next, mark_list_index);
    }

#ifndef USE_REGIONS
    while (!pinned_plug_que_empty_p())
    {
        if (settings.promotion)
        {
            uint8_t* pplug = pinned_plug (oldest_pin());
            if (in_range_for_segment (pplug, ephemeral_heap_segment))
            {
                consing_gen = ensure_ephemeral_heap_segment (consing_gen);
                //allocate all of the generation gaps
                while (active_new_gen_number > 0)
                {
                    active_new_gen_number--;

                    if (active_new_gen_number == (max_generation - 1))
                    {
                        maxgen_pinned_compact_before_advance = generation_pinned_allocation_compact_size (generation_of (max_generation));
                        if (decide_promote_gen1_pins_p)
                            advance_pins_for_demotion (consing_gen);
                    }

                    generation* gen = generation_of (active_new_gen_number);
                    plan_generation_start (gen, consing_gen, 0);

                    if (demotion_low == MAX_PTR)
                    {
                        demotion_low = pplug;
                        dprintf (3, ("end plan: dlow->%p", demotion_low));
                    }

                    dprintf (2, ("(%d)gen%d plan start: %zx",
                                  heap_number, active_new_gen_number, (size_t)generation_plan_allocation_start (gen)));
                    assert (generation_plan_allocation_start (gen));
                }
            }
        }

        if (pinned_plug_que_empty_p())
            break;

        size_t  entry = deque_pinned_plug();
        mark*  m = pinned_plug_of (entry);
        uint8_t*  plug = pinned_plug (m);
        size_t  len = pinned_len (m);

        // detect pinned block in different segment (later) than
        // allocation segment
        heap_segment* nseg = heap_segment_rw (generation_allocation_segment (consing_gen));

        while ((plug < generation_allocation_pointer (consing_gen)) ||
               (plug >= heap_segment_allocated (nseg)))
        {
            assert ((plug < heap_segment_mem (nseg)) ||
                    (plug > heap_segment_reserved (nseg)));
            //adjust the end of the segment to be the end of the plug
            assert (generation_allocation_pointer (consing_gen)>=
                    heap_segment_mem (nseg));
            assert (generation_allocation_pointer (consing_gen)<=
                    heap_segment_committed (nseg));

            heap_segment_plan_allocated (nseg) =
                generation_allocation_pointer (consing_gen);
            //switch allocation segment
            nseg = heap_segment_next_rw (nseg);
            generation_allocation_segment (consing_gen) = nseg;
            //reset the allocation pointer and limits
            generation_allocation_pointer (consing_gen) =
                heap_segment_mem (nseg);
        }

        set_new_pin_info (m, generation_allocation_pointer (consing_gen));
        dprintf (2, ("pin %p b: %zx->%zx", plug, brick_of (plug),
            (size_t)(brick_table[brick_of (plug)])));

        generation_allocation_pointer (consing_gen) = plug + len;
        generation_allocation_limit (consing_gen) =
            generation_allocation_pointer (consing_gen);
        //Add the size of the pinned plug to the right pinned allocations
        //find out which gen this pinned plug came from
        int frgn = object_gennum (plug);
        if ((frgn != (int)max_generation) && settings.promotion)
        {
            generation_pinned_allocation_sweep_size ((generation_of (frgn +1))) += len;
        }
    }

    plan_generation_starts (consing_gen);
#endif //!USE_REGIONS

    descr_generations ("AP");

    print_free_and_plug ("AP");

    {
#ifdef SIMPLE_DPRINTF
        for (int gen_idx = 0; gen_idx <= max_generation; gen_idx++)
        {
            generation* temp_gen = generation_of (gen_idx);
            dynamic_data* temp_dd = dynamic_data_of (gen_idx);

            int added_pinning_ratio = 0;
            int artificial_pinned_ratio = 0;

            if (dd_pinned_survived_size (temp_dd) != 0)
            {
                added_pinning_ratio = (int)((float)dd_added_pinned_size (temp_dd) * 100 / (float)dd_pinned_survived_size (temp_dd));
                artificial_pinned_ratio = (int)((float)dd_artificial_pinned_survived_size (temp_dd) * 100 / (float)dd_pinned_survived_size (temp_dd));
            }

            size_t padding_size =
#ifdef SHORT_PLUGS
                dd_padding_size (temp_dd);
#else
                0;
#endif //SHORT_PLUGS
            dprintf (2, ("gen%d: NON PIN alloc: %zd, pin com: %zd, sweep: %zd, surv: %zd, pinsurv: %zd(%d%% added, %d%% art), np surv: %zd, pad: %zd",
                gen_idx,
                generation_allocation_size (temp_gen),
                generation_pinned_allocation_compact_size (temp_gen),
                generation_pinned_allocation_sweep_size (temp_gen),
                dd_survived_size (temp_dd),
                dd_pinned_survived_size (temp_dd),
                added_pinning_ratio,
                artificial_pinned_ratio,
                (dd_survived_size (temp_dd) - dd_pinned_survived_size (temp_dd)),
                padding_size));

#ifndef USE_REGIONS
            dprintf (1, ("gen%d: %p, %p(%zd)",
                gen_idx,
                generation_allocation_start (temp_gen),
                generation_plan_allocation_start (temp_gen),
                (size_t)(generation_plan_allocation_start (temp_gen) - generation_allocation_start (temp_gen))));
#endif //USE_REGIONS
        }
#endif //SIMPLE_DPRINTF
    }

    if (settings.condemned_generation == (max_generation - 1 ))
    {
        generation* older_gen = generation_of (settings.condemned_generation + 1);
        size_t rejected_free_space = generation_free_obj_space (older_gen) - r_free_obj_space;
        size_t free_list_allocated = generation_free_list_allocated (older_gen) - r_older_gen_free_list_allocated;
        size_t end_seg_allocated = generation_end_seg_allocated (older_gen) - r_older_gen_end_seg_allocated;
        size_t condemned_allocated = generation_condemned_allocated (older_gen) - r_older_gen_condemned_allocated;

        size_t growth = end_seg_allocated + condemned_allocated;

        if (growth > 0)
        {
            dprintf (1, ("gen2 grew %zd (end seg alloc: %zd, condemned alloc: %zd",
                         growth, end_seg_allocated, condemned_allocated));

            maxgen_size_inc_p = true;
        }
        else
        {
            dprintf (1, ("gen2 didn't grow (end seg alloc: %zd, , condemned alloc: %zd, gen1 c alloc: %zd",
                         end_seg_allocated, condemned_allocated,
                         generation_condemned_allocated (generation_of (max_generation - 1))));
        }

        dprintf (2, ("older gen's free alloc: %zd->%zd, seg alloc: %zd->%zd, condemned alloc: %zd->%zd",
                    r_older_gen_free_list_allocated, generation_free_list_allocated (older_gen),
                    r_older_gen_end_seg_allocated, generation_end_seg_allocated (older_gen),
                    r_older_gen_condemned_allocated, generation_condemned_allocated (older_gen)));

        dprintf (2, ("this GC did %zd free list alloc(%zd bytes free space rejected)",
            free_list_allocated, rejected_free_space));

        maxgen_size_increase* maxgen_size_info = &(get_gc_data_per_heap()->maxgen_size_info);
        maxgen_size_info->free_list_allocated = free_list_allocated;
        maxgen_size_info->free_list_rejected = rejected_free_space;
        maxgen_size_info->end_seg_allocated = end_seg_allocated;
        maxgen_size_info->condemned_allocated = condemned_allocated;
        maxgen_size_info->pinned_allocated = maxgen_pinned_compact_before_advance;
        maxgen_size_info->pinned_allocated_advance = generation_pinned_allocation_compact_size (generation_of (max_generation)) - maxgen_pinned_compact_before_advance;

#ifdef FREE_USAGE_STATS
        int free_list_efficiency = 0;
        if ((free_list_allocated + rejected_free_space) != 0)
            free_list_efficiency = (int)(((float) (free_list_allocated) / (float)(free_list_allocated + rejected_free_space)) * (float)100);

        size_t running_free_list_efficiency = generation_allocator_efficiency_percent(older_gen);

        dprintf (1, ("gen%d free list alloc effi: %d%%, current effi: %zu%%",
                    older_gen->gen_num,
                    free_list_efficiency, running_free_list_efficiency));

        dprintf (1, ("gen2 free list change"));
        for (int j = 0; j < NUM_GEN_POWER2; j++)
        {
            dprintf (1, ("[h%d][#%zd]: 2^%d: F: %zd->%zd(%zd), P: %zd",
                heap_number,
                settings.gc_index,
                (j + 10), r_older_gen_free_space[j], older_gen->gen_free_spaces[j],
                (ptrdiff_t)(r_older_gen_free_space[j] - older_gen->gen_free_spaces[j]),
                (generation_of(max_generation - 1))->gen_plugs[j]));
        }
#endif //FREE_USAGE_STATS
    }

    size_t fragmentation =
        generation_fragmentation (generation_of (condemned_gen_number),
                                  consing_gen,
                                  heap_segment_allocated (ephemeral_heap_segment));

    dprintf (2,("Fragmentation: %zd", fragmentation));
    dprintf (2,("---- End of Plan phase ----"));

    // We may update write barrier code.  We assume here EE has been suspended if we are on a GC thread.
    assert(IsGCInProgress());

    BOOL should_expand = FALSE;
    BOOL should_compact= FALSE;

#ifndef USE_REGIONS
    ephemeral_promotion = FALSE;
#endif //!USE_REGIONS

#ifdef HOST_64BIT
    if ((!settings.concurrent) &&
#ifdef USE_REGIONS
        !special_sweep_p &&
#endif //USE_REGIONS
        !provisional_mode_triggered &&
        ((condemned_gen_number < max_generation) &&
         ((settings.gen0_reduction_count > 0) || (settings.entry_memory_load >= 95))))
    {
        dprintf (GTC_LOG, ("gen0 reduction count is %d, condemning %d, mem load %d",
                     settings.gen0_reduction_count,
                     condemned_gen_number,
                     settings.entry_memory_load));
        should_compact = TRUE;

        get_gc_data_per_heap()->set_mechanism (gc_heap_compact,
            ((settings.gen0_reduction_count > 0) ? compact_fragmented_gen0 : compact_high_mem_load));

#ifndef USE_REGIONS
        if ((condemned_gen_number >= (max_generation - 1)) &&
            dt_low_ephemeral_space_p (tuning_deciding_expansion))
        {
            dprintf (GTC_LOG, ("Not enough space for all ephemeral generations with compaction"));
            should_expand = TRUE;
        }
#endif //!USE_REGIONS
    }
    else
#endif // HOST_64BIT
    {
        should_compact = decide_on_compacting (condemned_gen_number, fragmentation, should_expand);
    }

    if (condemned_gen_number == max_generation)
    {
#ifdef FEATURE_LOH_COMPACTION
        if (settings.loh_compaction)
        {
            should_compact = TRUE;
            get_gc_data_per_heap()->set_mechanism (gc_heap_compact, compact_loh_forced);
        }
        else
#endif //FEATURE_LOH_COMPACTION
        {
            GCToEEInterface::DiagWalkUOHSurvivors(__this, loh_generation);
            sweep_uoh_objects (loh_generation);
        }

        GCToEEInterface::DiagWalkUOHSurvivors(__this, poh_generation);
        sweep_uoh_objects (poh_generation);
    }
    else
    {
        settings.loh_compaction = FALSE;
    }

#ifdef MULTIPLE_HEAPS
#ifndef USE_REGIONS
    new_heap_segment = NULL;
#endif //!USE_REGIONS

    if (should_compact && should_expand)
        gc_policy = policy_expand;
    else if (should_compact)
        gc_policy = policy_compact;
    else
        gc_policy = policy_sweep;

    //vote for result of should_compact
    dprintf (3, ("Joining for compaction decision"));
    gc_t_join.join(this, gc_join_decide_on_compaction);
    if (gc_t_join.joined())
    {
#ifndef USE_REGIONS
        //safe place to delete large heap segments
        if (condemned_gen_number == max_generation)
        {
            for (int i = 0; i < n_heaps; i++)
            {
                g_heaps [i]->rearrange_uoh_segments ();
            }
        }
#endif //!USE_REGIONS
        if (maxgen_size_inc_p && provisional_mode_triggered
#ifdef BACKGROUND_GC
            && !is_bgc_in_progress()
#endif //BACKGROUND_GC
            )
        {
            pm_trigger_full_gc = true;
            dprintf (GTC_LOG, ("in PM: maxgen size inc, doing a sweeping gen1 and trigger NGC2"));
        }
        else
        {
#ifdef USE_REGIONS
            bool joined_special_sweep_p = false;
#else
            settings.demotion = FALSE;
#endif //USE_REGIONS
            int pol_max = policy_sweep;
#ifdef GC_CONFIG_DRIVEN
            BOOL is_compaction_mandatory = FALSE;
#endif //GC_CONFIG_DRIVEN

            int i;
            for (i = 0; i < n_heaps; i++)
            {
                if (pol_max < g_heaps[i]->gc_policy)
                    pol_max = policy_compact;
#ifdef USE_REGIONS
                joined_special_sweep_p |= g_heaps[i]->special_sweep_p;
#else
                // set the demotion flag is any of the heap has demotion
                if (g_heaps[i]->demotion_high >= g_heaps[i]->demotion_low)
                {
                    (g_heaps[i]->get_gc_data_per_heap())->set_mechanism_bit (gc_demotion_bit);
                    settings.demotion = TRUE;
                }
#endif //USE_REGIONS

#ifdef GC_CONFIG_DRIVEN
                if (!is_compaction_mandatory)
                {
                    int compact_reason = (g_heaps[i]->get_gc_data_per_heap())->get_mechanism (gc_heap_compact);
                    if (compact_reason >= 0)
                    {
                        if (gc_heap_compact_reason_mandatory_p[compact_reason])
                            is_compaction_mandatory = TRUE;
                    }
                }
#endif //GC_CONFIG_DRIVEN
            }

#ifdef GC_CONFIG_DRIVEN
            if (!is_compaction_mandatory)
            {
                // If compaction is not mandatory we can feel free to change it to a sweeping GC.
                // Note that we may want to change this to only checking every so often instead of every single GC.
                if (should_do_sweeping_gc (pol_max >= policy_compact))
                {
                    pol_max = policy_sweep;
                }
                else
                {
                    if (pol_max == policy_sweep)
                        pol_max = policy_compact;
                }
            }
#endif //GC_CONFIG_DRIVEN

            for (i = 0; i < n_heaps; i++)
            {
#ifdef USE_REGIONS
                g_heaps[i]->special_sweep_p = joined_special_sweep_p;
                if (joined_special_sweep_p)
                {
                    g_heaps[i]->gc_policy = policy_sweep;
                }
                else
#endif //USE_REGIONS
                if (pol_max > g_heaps[i]->gc_policy)
                    g_heaps[i]->gc_policy = pol_max;
#ifndef USE_REGIONS
                //get the segment while we are serialized
                if (g_heaps[i]->gc_policy == policy_expand)
                {
                    g_heaps[i]->new_heap_segment =
                        g_heaps[i]->soh_get_segment_to_expand();
                    if (!g_heaps[i]->new_heap_segment)
                    {
                        set_expand_in_full_gc (condemned_gen_number);
                        //we are out of memory, cancel the expansion
                        g_heaps[i]->gc_policy = policy_compact;
                    }
                }
#endif //!USE_REGIONS
            }

            BOOL is_full_compacting_gc = FALSE;

            if ((gc_policy >= policy_compact) && (condemned_gen_number == max_generation))
            {
                full_gc_counts[gc_type_compacting]++;
                is_full_compacting_gc = TRUE;
            }

            for (i = 0; i < n_heaps; i++)
            {
#ifndef USE_REGIONS
                if (g_gc_card_table!= g_heaps[i]->card_table)
                {
                    g_heaps[i]->copy_brick_card_table();
                }
#endif //!USE_REGIONS
                if (is_full_compacting_gc)
                {
                    g_heaps[i]->loh_alloc_since_cg = 0;
                }
            }
        }

#ifdef FEATURE_EVENT_TRACE
        if (informational_event_enabled_p)
        {
            gc_time_info[time_sweep] = GetHighPrecisionTimeStamp();
            gc_time_info[time_plan] = gc_time_info[time_sweep] - gc_time_info[time_plan];
        }
#endif //FEATURE_EVENT_TRACE

        dprintf(3, ("Starting all gc threads after compaction decision"));
        gc_t_join.restart();
    }

    should_compact = (gc_policy >= policy_compact);
    should_expand  = (gc_policy >= policy_expand);

#else //MULTIPLE_HEAPS
#ifndef USE_REGIONS
    //safe place to delete large heap segments
    if (condemned_gen_number == max_generation)
    {
        rearrange_uoh_segments ();
    }
#endif //!USE_REGIONS
    if (maxgen_size_inc_p && provisional_mode_triggered
#ifdef BACKGROUND_GC
        && !is_bgc_in_progress()
#endif //BACKGROUND_GC
        )
    {
        pm_trigger_full_gc = true;
        dprintf (GTC_LOG, ("in PM: maxgen size inc, doing a sweeping gen1 and trigger NGC2"));
    }
    else
    {
#ifndef USE_REGIONS
        // for regions it was already set when we set plan_gen_num for regions.
        settings.demotion = ((demotion_high >= demotion_low) ? TRUE : FALSE);
        if (settings.demotion)
            get_gc_data_per_heap()->set_mechanism_bit (gc_demotion_bit);
#endif //!USE_REGIONS

#ifdef GC_CONFIG_DRIVEN
        BOOL is_compaction_mandatory = FALSE;
        int compact_reason = get_gc_data_per_heap()->get_mechanism (gc_heap_compact);
        if (compact_reason >= 0)
            is_compaction_mandatory = gc_heap_compact_reason_mandatory_p[compact_reason];

        if (!is_compaction_mandatory)
        {
            if (should_do_sweeping_gc (should_compact))
                should_compact = FALSE;
            else
                should_compact = TRUE;
        }
#endif //GC_CONFIG_DRIVEN

        if (should_compact && (condemned_gen_number == max_generation))
        {
            full_gc_counts[gc_type_compacting]++;
            loh_alloc_since_cg = 0;
        }
    }

#ifdef FEATURE_EVENT_TRACE
    if (informational_event_enabled_p)
    {
        gc_time_info[time_sweep] = GetHighPrecisionTimeStamp();
        gc_time_info[time_plan] = gc_time_info[time_sweep] - gc_time_info[time_plan];
    }
#endif //FEATURE_EVENT_TRACE

#ifdef USE_REGIONS
    if (special_sweep_p)
    {
        should_compact = FALSE;
    }
#endif //!USE_REGIONS
#endif //MULTIPLE_HEAPS

#ifdef FEATURE_LOH_COMPACTION
    loh_compacted_p = FALSE;
#endif //FEATURE_LOH_COMPACTION

    if (condemned_gen_number == max_generation)
    {
#ifdef FEATURE_LOH_COMPACTION
        if (settings.loh_compaction)
        {
            if (should_compact && plan_loh())
            {
                loh_compacted_p = TRUE;
            }
            else
            {
                GCToEEInterface::DiagWalkUOHSurvivors(__this, loh_generation);
                sweep_uoh_objects (loh_generation);
            }
        }
        else
        {
            if (loh_pinned_queue)
            {
                loh_pinned_queue_decay--;

                if (!loh_pinned_queue_decay)
                {
                    delete[] loh_pinned_queue;
                    loh_pinned_queue = 0;
                }
            }
        }
#endif //FEATURE_LOH_COMPACTION
    }

    if (!pm_trigger_full_gc && pm_stress_on && provisional_mode_triggered)
    {
        if ((settings.condemned_generation == (max_generation - 1)) &&
            ((settings.gc_index % 5) == 0)
#ifdef BACKGROUND_GC
            && !is_bgc_in_progress()
#endif //BACKGROUND_GC
            )
        {
            pm_trigger_full_gc = true;
        }
    }

    if (settings.condemned_generation == (max_generation - 1))
    {
        if (provisional_mode_triggered)
        {
            if (should_expand)
            {
                should_expand = FALSE;
                dprintf (GTC_LOG, ("h%d in PM cannot expand", heap_number));
            }
        }

        if (pm_trigger_full_gc)
        {
            should_compact = FALSE;
            dprintf (GTC_LOG, ("h%d PM doing sweeping", heap_number));
        }
    }

    if (should_compact)
    {
        dprintf (2,( "**** Doing Compacting GC ****"));

#if defined(USE_REGIONS) && defined(BACKGROUND_GC)
        if (should_update_end_mark_size())
        {
            background_soh_size_end_mark += generation_end_seg_allocated (older_gen) -
                                            r_older_gen_end_seg_allocated;
        }
#endif //USE_REGIONS && BACKGROUND_GC

#ifndef USE_REGIONS
        if (should_expand)
        {
#ifndef MULTIPLE_HEAPS
            heap_segment* new_heap_segment = soh_get_segment_to_expand();
#endif //!MULTIPLE_HEAPS
            if (new_heap_segment)
            {
                consing_gen = expand_heap(condemned_gen_number,
                                          consing_gen,
                                          new_heap_segment);
            }

            // If we couldn't get a new segment, or we were able to
            // reserve one but no space to commit, we couldn't
            // expand heap.
            if (ephemeral_heap_segment != new_heap_segment)
            {
                set_expand_in_full_gc (condemned_gen_number);
                should_expand = FALSE;
            }
        }
#endif //!USE_REGIONS

        generation_allocation_limit (condemned_gen1) =
            generation_allocation_pointer (condemned_gen1);
        if ((condemned_gen_number < max_generation))
        {
            generation_allocator (older_gen)->commit_alloc_list_changes();

            // Fix the allocation area of the older generation
            fix_older_allocation_area (older_gen);

#ifdef FEATURE_EVENT_TRACE
            if (record_fl_info_p)
            {
                // For plugs allocated in condemned we kept track of each one but only fire the
                // event for buckets with non zero items.
                uint16_t non_zero_buckets = 0;
                for (uint16_t bucket_index = 0; bucket_index < NUM_GEN2_ALIST; bucket_index++)
                {
                    if (bucket_info[bucket_index].count != 0)
                    {
                        if (bucket_index != non_zero_buckets)
                        {
                            bucket_info[non_zero_buckets].set (bucket_index,
                                                            bucket_info[bucket_index].count,
                                                            bucket_info[bucket_index].size);
                        }
                        else
                        {
                            bucket_info[bucket_index].index = bucket_index;
                        }
                        non_zero_buckets++;
                    }
                }

                if (non_zero_buckets)
                {
                    FIRE_EVENT(GCFitBucketInfo,
                            (uint16_t)etw_bucket_kind::plugs_in_condemned,
                            recorded_fl_info_size,
                            non_zero_buckets,
                            (uint32_t)(sizeof (etw_bucket_info)),
                            (void *)bucket_info);
                    init_bucket_info();
                }

                // We want to get an idea of the sizes of free items in the top 25% of the free list
                // for gen2 (to be accurate - we stop as soon as the size we count exceeds 25%. This
                // is just so that if we have a really big free item we will still count that one).
                // The idea is we want to see if they all in a few big ones or many smaller ones?
                // To limit the amount of time we spend counting, we stop till we have counted the
                // top percentage, or exceeded max_etw_item_count items.
                size_t max_size_to_count = generation_free_list_space (older_gen) / 4;
                non_zero_buckets =
                    generation_allocator (older_gen)->count_largest_items (bucket_info,
                                                                        max_size_to_count,
                                                                        max_etw_item_count,
                                                                        &recorded_fl_info_size);
                if (non_zero_buckets)
                {
                    FIRE_EVENT(GCFitBucketInfo,
                            (uint16_t)etw_bucket_kind::largest_fl_items,
                            recorded_fl_info_size,
                            non_zero_buckets,
                            (uint32_t)(sizeof (etw_bucket_info)),
                            (void *)bucket_info);
                }
            }
#endif //FEATURE_EVENT_TRACE
        }
#ifndef USE_REGIONS
        assert (generation_allocation_segment (consing_gen) ==
                ephemeral_heap_segment);
#endif //!USE_REGIONS

        GCToEEInterface::DiagWalkSurvivors(__this, true);

        relocate_phase (condemned_gen_number, first_condemned_address);
        compact_phase (condemned_gen_number, first_condemned_address,
                       (!settings.demotion && settings.promotion));
        fix_generation_bounds (condemned_gen_number, consing_gen);
        assert (generation_allocation_limit (youngest_generation) ==
                generation_allocation_pointer (youngest_generation));

#ifndef USE_REGIONS
        if (condemned_gen_number >= (max_generation -1))
        {
#ifdef MULTIPLE_HEAPS
            gc_t_join.join(this, gc_join_rearrange_segs_compaction);
            if (gc_t_join.joined())
#endif //MULTIPLE_HEAPS
            {
#ifdef MULTIPLE_HEAPS
                for (int i = 0; i < n_heaps; i++)
                {
                    g_heaps [i]->rearrange_heap_segments (TRUE);
                }
#else //MULTIPLE_HEAPS
                rearrange_heap_segments (TRUE);
#endif //MULTIPLE_HEAPS

#ifdef MULTIPLE_HEAPS
                gc_t_join.restart();
#endif //MULTIPLE_HEAPS
            }

            if (should_expand)
            {
                //fix the start_segment for the ephemeral generations
                for (int i = 0; i < max_generation; i++)
                {
                    generation* gen = generation_of (i);
                    generation_start_segment (gen) = ephemeral_heap_segment;
                    generation_allocation_segment (gen) = ephemeral_heap_segment;
                }
            }
        }
#endif //!USE_REGIONS

        {
#ifdef USE_REGIONS
            end_gen0_region_committed_space = get_gen0_end_space (memory_type_committed);
            dprintf(REGIONS_LOG, ("h%d computed the end_gen0_region_committed_space value to be %zd", heap_number, end_gen0_region_committed_space));
#endif //USE_REGIONS
#ifdef MULTIPLE_HEAPS
            dprintf(3, ("Joining after end of compaction"));
            gc_t_join.join(this, gc_join_adjust_handle_age_compact);
            if (gc_t_join.joined())
            {
#endif //MULTIPLE_HEAPS

#ifdef FEATURE_EVENT_TRACE
                if (informational_event_enabled_p)
                {
                    uint64_t current_time = GetHighPrecisionTimeStamp();
                    gc_time_info[time_compact] = current_time - gc_time_info[time_compact];
                }
#endif //FEATURE_EVENT_TRACE

#ifdef _DEBUG
                verify_committed_bytes ();
#endif // _DEBUG

#ifdef MULTIPLE_HEAPS
                //join all threads to make sure they are synchronized
                dprintf(3, ("Restarting after Promotion granted"));
                gc_t_join.restart();
            }
#endif //MULTIPLE_HEAPS

#ifdef FEATURE_PREMORTEM_FINALIZATION
            finalize_queue->UpdatePromotedGenerations (condemned_gen_number,
                                                       (!settings.demotion && settings.promotion));
#endif // FEATURE_PREMORTEM_FINALIZATION

            ScanContext sc;
            sc.thread_number = heap_number;
            sc.thread_count = n_heaps;
            sc.promotion = FALSE;
            sc.concurrent = FALSE;
            // new generations bounds are set can call this guy
            if (settings.promotion && !settings.demotion)
            {
                dprintf (2, ("Promoting EE roots for gen %d",
                             condemned_gen_number));
                GCScan::GcPromotionsGranted(condemned_gen_number, max_generation, &sc);
            }
            else if (settings.demotion)
            {
                dprintf (2, ("Demoting EE roots for gen %d",
                             condemned_gen_number));
                GCScan::GcDemote (condemned_gen_number, max_generation, &sc);
            }
        }

        {
            reset_pinned_queue_bos();
#ifndef USE_REGIONS
            unsigned int  gen_number = (unsigned int)min ((int)max_generation, 1 + condemned_gen_number);
            generation*  gen = generation_of (gen_number);
            uint8_t*  low = generation_allocation_start (generation_of (gen_number-1));
            uint8_t*  high =  heap_segment_allocated (ephemeral_heap_segment);
#endif //!USE_REGIONS

            while (!pinned_plug_que_empty_p())
            {
                mark*  m = pinned_plug_of (deque_pinned_plug());
                size_t len = pinned_len (m);
                uint8_t*  arr = (pinned_plug (m) - len);
                dprintf(3,("free [%zx %zx[ pin",
                            (size_t)arr, (size_t)arr + len));
                if (len != 0)
                {
                    assert (len >= Align (min_obj_size));
                    make_unused_array (arr, len);
                    // fix fully contained bricks + first one
                    // if the array goes beyond the first brick
                    size_t start_brick = brick_of (arr);
                    size_t end_brick = brick_of (arr + len);
                    if (end_brick != start_brick)
                    {
                        dprintf (3,
                                    ("Fixing bricks [%zx, %zx[ to point to unused array %zx",
                                    start_brick, end_brick, (size_t)arr));
                        set_brick (start_brick,
                                    arr - brick_address (start_brick));
                        size_t brick = start_brick+1;
                        while (brick < end_brick)
                        {
                            set_brick (brick, start_brick - brick);
                            brick++;
                        }
                    }

#ifdef USE_REGIONS
                    int gen_number = object_gennum_plan (arr);
                    generation* gen = generation_of (gen_number);
#else
                    //when we take an old segment to make the new
                    //ephemeral segment. we can have a bunch of
                    //pinned plugs out of order going to the new ephemeral seg
                    //and then the next plugs go back to max_generation
                    if ((heap_segment_mem (ephemeral_heap_segment) <= arr) &&
                        (heap_segment_reserved (ephemeral_heap_segment) > arr))
                    {
                        while ((low <= arr) && (high > arr))
                        {
                            gen_number--;
                            assert ((gen_number >= 1) || (demotion_low != MAX_PTR) ||
                                    settings.demotion || !settings.promotion);
                            dprintf (3, ("new free list generation %d", gen_number));

                            gen = generation_of (gen_number);
                            if (gen_number >= 1)
                                low = generation_allocation_start (generation_of (gen_number-1));
                            else
                                low = high;
                        }
                    }
                    else
                    {
                        dprintf (3, ("new free list generation %d", max_generation));
                        gen_number = max_generation;
                        gen = generation_of (gen_number);
                    }
#endif //USE_REGIONS

                    dprintf(3,("h%d threading %p (%zd) before pin in gen %d",
                        heap_number, arr, len, gen_number));
                    thread_gap (arr, len, gen);
                    add_gen_free (gen_number, len);
                }
            }
        }

        clear_gen1_cards();
    }
    else
    {
        //force promotion for sweep
        settings.promotion = TRUE;
        settings.compaction = FALSE;

#ifdef USE_REGIONS
        // This should be set for segs too actually. We should always reset demotion
        // if we sweep.
        settings.demotion = FALSE;
#endif //USE_REGIONS

        ScanContext sc;
        sc.thread_number = heap_number;
        sc.thread_count = n_heaps;
        sc.promotion = FALSE;
        sc.concurrent = FALSE;

        dprintf (2, ("**** Doing Mark and Sweep GC****"));

        if ((condemned_gen_number < max_generation))
        {
#ifdef FREE_USAGE_STATS
            memcpy (older_gen->gen_free_spaces, r_older_gen_free_space, sizeof (r_older_gen_free_space));
#endif //FREE_USAGE_STATS
            generation_allocator (older_gen)->copy_from_alloc_list (r_free_list);
            generation_free_list_space (older_gen) = r_free_list_space;
            generation_free_obj_space (older_gen) = r_free_obj_space;

#ifdef DOUBLY_LINKED_FL
            if (condemned_gen_number == (max_generation - 1))
            {
                dprintf (2, ("[h%d] no undo, FL %zd-%zd -> %zd, FO %zd+%zd=%zd",
                    heap_number,
                    generation_free_list_space (older_gen), gen2_removed_no_undo,
                    (generation_free_list_space (older_gen) - gen2_removed_no_undo),
                    generation_free_obj_space (older_gen), gen2_removed_no_undo,
                    (generation_free_obj_space (older_gen) + gen2_removed_no_undo)));

                generation_free_list_space (older_gen) -= gen2_removed_no_undo;
                generation_free_obj_space (older_gen) += gen2_removed_no_undo;
            }
#endif //DOUBLY_LINKED_FL

            generation_free_list_allocated (older_gen) = r_older_gen_free_list_allocated;
            generation_end_seg_allocated (older_gen) = r_older_gen_end_seg_allocated;
            generation_condemned_allocated (older_gen) = r_older_gen_condemned_allocated;
            generation_sweep_allocated (older_gen) += dd_survived_size (dynamic_data_of (condemned_gen_number));
            generation_allocation_limit (older_gen) = r_allocation_limit;
            generation_allocation_pointer (older_gen) = r_allocation_pointer;
            generation_allocation_context_start_region (older_gen) = r_allocation_start_region;
            generation_allocation_segment (older_gen) = r_allocation_segment;
#ifdef USE_REGIONS
            if (older_gen->gen_num == max_generation)
            {
                check_seg_gen_num (r_allocation_segment);
            }
#endif //USE_REGIONS
        }

        if ((condemned_gen_number < max_generation))
        {
            // Fix the allocation area of the older generation
            fix_older_allocation_area (older_gen);
        }

        GCToEEInterface::DiagWalkSurvivors(__this, false);

        make_free_lists (condemned_gen_number);
        size_t total_recovered_sweep_size = recover_saved_pinned_info();
        if (total_recovered_sweep_size > 0)
        {
            generation_free_obj_space (generation_of (max_generation)) -= total_recovered_sweep_size;
            dprintf (2, ("h%d: deduct %zd for pin, fo->%zd",
                heap_number, total_recovered_sweep_size,
                generation_free_obj_space (generation_of (max_generation))));
        }

#ifdef USE_REGIONS
        end_gen0_region_committed_space = get_gen0_end_space (memory_type_committed);
        dprintf(REGIONS_LOG, ("h%d computed the end_gen0_region_committed_space value to be %zd", heap_number, end_gen0_region_committed_space));
#endif //USE_REGIONS

#ifdef MULTIPLE_HEAPS
        dprintf(3, ("Joining after end of sweep"));
        gc_t_join.join(this, gc_join_adjust_handle_age_sweep);
        if (gc_t_join.joined())
#endif //MULTIPLE_HEAPS
        {
#ifdef FEATURE_EVENT_TRACE
            if (informational_event_enabled_p)
            {
                uint64_t current_time = GetHighPrecisionTimeStamp();
                gc_time_info[time_sweep] = current_time - gc_time_info[time_sweep];
            }
#endif //FEATURE_EVENT_TRACE

#ifdef USE_REGIONS
            if (!special_sweep_p)
#endif //USE_REGIONS
            {
                GCScan::GcPromotionsGranted(condemned_gen_number,
                                                max_generation, &sc);
            }

#ifndef USE_REGIONS
            if (condemned_gen_number >= (max_generation -1))
            {
#ifdef MULTIPLE_HEAPS
                for (int i = 0; i < n_heaps; i++)
                {
                    g_heaps[i]->rearrange_heap_segments(FALSE);
                }
#else
                rearrange_heap_segments(FALSE);
#endif //MULTIPLE_HEAPS
            }
#endif //!USE_REGIONS

#ifdef USE_REGIONS
            verify_region_to_generation_map ();
#endif //USE_REGIONS

#ifdef MULTIPLE_HEAPS
            //join all threads to make sure they are synchronized
            dprintf(3, ("Restarting after Promotion granted"));
            gc_t_join.restart();
#endif //MULTIPLE_HEAPS
        }

#ifdef FEATURE_PREMORTEM_FINALIZATION
#ifdef USE_REGIONS
        if (!special_sweep_p)
#endif //USE_REGIONS
        {
            finalize_queue->UpdatePromotedGenerations (condemned_gen_number, TRUE);
        }
#endif // FEATURE_PREMORTEM_FINALIZATION

#ifdef USE_REGIONS
        if (!special_sweep_p)
#endif //USE_REGIONS
        {
            clear_gen1_cards();
        }
    }

    //verify_partial();
}

/*****************************
Called after compact phase to fix all generation gaps
********************************/
void gc_heap::fix_generation_bounds (int condemned_gen_number,
                                     generation* consing_gen)
{
#ifndef _DEBUG
    UNREFERENCED_PARAMETER(consing_gen);
#endif //_DEBUG

    int gen_number = condemned_gen_number;
    dprintf (2, ("---- thread regions gen%d GC ----", gen_number));

#ifdef USE_REGIONS
    // For ephemeral GCs, we handle up till the generation_allocation_segment as that's the last one we
    // changed in the older gen.
    if (settings.promotion && (condemned_gen_number < max_generation))
    {
        int older_gen_number = condemned_gen_number + 1;
        generation* older_gen = generation_of (older_gen_number);
        heap_segment* last_alloc_region = generation_allocation_segment (older_gen);

        dprintf (REGIONS_LOG, ("fix till we see alloc region which is %p", heap_segment_mem (last_alloc_region)));

        heap_segment* region = heap_segment_rw (generation_start_segment (older_gen));
        while (region)
        {
            heap_segment_allocated (region) = heap_segment_plan_allocated (region);
            if (region == last_alloc_region)
                break;
            region = heap_segment_next (region);
        }
    }

    thread_final_regions (true);

    ephemeral_heap_segment = generation_start_segment (generation_of (0));
    alloc_allocated = heap_segment_allocated (ephemeral_heap_segment);
#else //USE_REGIONS
    assert (generation_allocation_segment (consing_gen) ==
            ephemeral_heap_segment);

    int bottom_gen = 0;

    while (gen_number >= bottom_gen)
    {
        generation*  gen = generation_of (gen_number);
        dprintf(3,("Fixing generation pointers for %d", gen_number));
        if ((gen_number < max_generation) && ephemeral_promotion)
        {
            size_t saved_eph_start_size = saved_ephemeral_plan_start_size[gen_number];

            make_unused_array (saved_ephemeral_plan_start[gen_number],
                               saved_eph_start_size);
            generation_free_obj_space (generation_of (max_generation)) += saved_eph_start_size;
            dprintf (2, ("[h%d] EP %p(%zd)", heap_number, saved_ephemeral_plan_start[gen_number],
                saved_ephemeral_plan_start_size[gen_number]));
        }
        reset_allocation_pointers (gen, generation_plan_allocation_start (gen));
        make_unused_array (generation_allocation_start (gen), generation_plan_allocation_start_size (gen));
        dprintf(3,(" start %zx", (size_t)generation_allocation_start (gen)));
        gen_number--;
    }
#ifdef MULTIPLE_HEAPS
    if (ephemeral_promotion)
    {
        //we are creating a generation fault. set the cards.
        // and we are only doing this for multiple heaps because in the single heap scenario the
        // new ephemeral generations will be empty and there'll be no need to set cards for the
        // old ephemeral generations that got promoted into max_generation.
        ptrdiff_t delta = 0;
        heap_segment* old_ephemeral_seg = seg_mapping_table_segment_of (saved_ephemeral_plan_start[max_generation-1]);

        assert (in_range_for_segment (saved_ephemeral_plan_start[max_generation-1], old_ephemeral_seg));
        size_t end_card = card_of (align_on_card (heap_segment_plan_allocated (old_ephemeral_seg)));
        size_t card = card_of (saved_ephemeral_plan_start[max_generation-1]);
        while (card != end_card)
        {
            set_card (card);
            card++;
        }
    }
#endif //MULTIPLE_HEAPS

#ifdef BACKGROUND_GC
    if (should_update_end_mark_size())
    {
        background_soh_size_end_mark = generation_size (max_generation);
    }
#endif //BACKGROUND_GC
#endif //!USE_REGIONS

    {
        alloc_allocated = heap_segment_plan_allocated(ephemeral_heap_segment);
        //reset the allocated size
#ifdef _DEBUG
        uint8_t* start = get_soh_start_object (ephemeral_heap_segment, youngest_generation);
        if (settings.promotion && !settings.demotion)
        {
            assert ((start + get_soh_start_obj_len (start)) ==
                    heap_segment_plan_allocated(ephemeral_heap_segment));
        }
#endif //_DEBUG
        heap_segment_allocated(ephemeral_heap_segment)=
            heap_segment_plan_allocated(ephemeral_heap_segment);
    }
}

#ifndef USE_REGIONS
uint8_t* gc_heap::generation_limit (int gen_number)
{
    if (settings.promotion)
    {
        if (gen_number <= 1)
            return heap_segment_reserved (ephemeral_heap_segment);
        else
            return generation_allocation_start (generation_of ((gen_number - 2)));
    }
    else
    {
        if (gen_number <= 0)
            return heap_segment_reserved (ephemeral_heap_segment);
        else
            return generation_allocation_start (generation_of ((gen_number - 1)));
    }
}
#endif //!USE_REGIONS

BOOL gc_heap::ensure_gap_allocation (int condemned_gen_number)
{
#ifndef USE_REGIONS
    uint8_t* start = heap_segment_allocated (ephemeral_heap_segment);
    size_t size = Align (min_obj_size)*(condemned_gen_number+1);
    assert ((start + size) <=
            heap_segment_reserved (ephemeral_heap_segment));
    if ((start + size) >
        heap_segment_committed (ephemeral_heap_segment))
    {
        if (!grow_heap_segment (ephemeral_heap_segment, start + size))
        {
            return FALSE;
        }
    }
#endif //USE_REGIONS
    return TRUE;
}

uint8_t* gc_heap::allocate_at_end (size_t size)
{
    uint8_t* start = heap_segment_allocated (ephemeral_heap_segment);
    size = Align (size);
    uint8_t* result = start;
    // only called to allocate a min obj so can't overflow here.
    assert ((start + size) <=
            heap_segment_reserved (ephemeral_heap_segment));
    //ensure_gap_allocation took care of it
    assert ((start + size) <=
            heap_segment_committed (ephemeral_heap_segment));
    heap_segment_allocated (ephemeral_heap_segment) += size;
    return result;
}

#ifdef USE_REGIONS
// Find the first non empty region and also does the following in the process -
// + decommit end of region if it's not a gen0 region;
// + set the region gen_num to the new one;
//
// For empty regions, we always return empty regions to free. Note that I'm returning
// gen0 empty regions as well, however, returning a region to free does not decommit.
//
// If this is called for a compacting GC, we know we always take the planned generation
// on the region (and set the new allocated); else this is called for sweep in which case
// it's more complicated -
//
// + if we are in the special sweep mode, we don't change the old gen number at all
// + if we are not in special sweep we need to promote all regions, including the SIP ones
//   because we make the assumption that this is the case for sweep for handles.
heap_segment* gc_heap::find_first_valid_region (heap_segment* region, bool compact_p, int* num_returned_regions)
{
    check_seg_gen_num (generation_allocation_segment (generation_of (max_generation)));

    dprintf (REGIONS_LOG, ("  FFVR region %zx(%p), gen%d",
        (size_t)region, (region ? heap_segment_mem (region) : 0),
        (region ? heap_segment_gen_num (region) : 0)));

    if (!region)
        return 0;

    heap_segment* current_region = region;

    do
    {
        int gen_num = heap_segment_gen_num (current_region);
        int plan_gen_num = -1;
        if (compact_p)
        {
            assert (settings.compaction);
            plan_gen_num = heap_segment_plan_gen_num (current_region);
            dprintf (REGIONS_LOG, ("  gen%d->%d", gen_num, plan_gen_num));
        }
        else
        {
            plan_gen_num = (special_sweep_p ? gen_num : get_plan_gen_num (gen_num));
            dprintf (REGIONS_LOG, ("  gen%d->%d, special_sweep_p %d, swept_in_plan %d",
                gen_num, plan_gen_num, (int)special_sweep_p,
                (int)heap_segment_swept_in_plan (current_region)));
        }

        uint8_t* allocated = (compact_p ?
                              heap_segment_plan_allocated (current_region) :
                              heap_segment_allocated (current_region));
        if (heap_segment_mem (current_region) == allocated)
        {
            heap_segment* region_to_delete = current_region;
            current_region = heap_segment_next (current_region);
            return_free_region (region_to_delete);
            (*num_returned_regions)++;

            dprintf (REGIONS_LOG, ("  h%d gen%d return region %p to free, current->%p(%p)",
                heap_number, gen_num, heap_segment_mem (region_to_delete),
                current_region, (current_region ? heap_segment_mem (current_region) : 0)));
            if (!current_region)
                return 0;
        }
        else
        {
            if (compact_p)
            {
                dprintf (REGIONS_LOG, ("  gen%d setting region %p alloc %p to plan %p",
                    gen_num, heap_segment_mem (current_region),
                    heap_segment_allocated (current_region),
                    heap_segment_plan_allocated (current_region)));

                if (heap_segment_swept_in_plan (current_region))
                {
                    assert (heap_segment_allocated (current_region) ==
                            heap_segment_plan_allocated (current_region));
                }
                else
                {
                    heap_segment_allocated (current_region) = heap_segment_plan_allocated (current_region);
                }
            }
            else
            {
                // Set this so we keep plan gen and gen the same.
                set_region_plan_gen_num (current_region, plan_gen_num);
            }

            if (gen_num >= soh_gen2)
            {
                dprintf (REGIONS_LOG, ("  gen%d decommit end of region %p(%p)",
                    gen_num, current_region, heap_segment_mem (current_region)));
                decommit_heap_segment_pages (current_region, 0);
            }

            dprintf (REGIONS_LOG, ("  set region %p(%p) gen num to %d",
                current_region, heap_segment_mem (current_region), plan_gen_num));
            set_region_gen_num (current_region, plan_gen_num);
            break;
        }
    } while (current_region);

    assert (current_region);

    if (heap_segment_swept_in_plan (current_region))
    {
        int gen_num = heap_segment_gen_num (current_region);
        dprintf (REGIONS_LOG, ("threading SIP region %p surv %zd onto gen%d",
            heap_segment_mem (current_region), heap_segment_survived (current_region), gen_num));

        generation* gen = generation_of (gen_num);
        generation_allocator (gen)->thread_sip_fl (current_region);
        generation_free_list_space (gen) += heap_segment_free_list_size (current_region);
        generation_free_obj_space (gen) += heap_segment_free_obj_size (current_region);
    }

    // Take this opportunity to make sure all the regions left with flags only for this GC are reset.
    clear_region_sweep_in_plan (current_region);
    clear_region_demoted (current_region);

    return current_region;
}

void gc_heap::thread_final_regions (bool compact_p)
{
    int num_returned_regions = 0;
    int num_new_regions = 0;

    for (int i = 0; i < max_generation; i++)
    {
        if (reserved_free_regions_sip[i])
        {
            return_free_region (reserved_free_regions_sip[i]);
        }
    }

    int condemned_gen_number = settings.condemned_generation;
    generation_region_info generation_final_regions[max_generation + 1];
    memset (generation_final_regions, 0, sizeof (generation_final_regions));

    // Step 1: we initialize all the regions for generations we are not condemning with their
    // current head and tail as we know these regions will for sure exist.
    for (int gen_idx = max_generation; gen_idx > condemned_gen_number; gen_idx--)
    {
        generation* gen = generation_of (gen_idx);
        // Note this needs to be the first rw region as we will not be changing any ro regions and
        // we will work on thread rw regions here.
        generation_final_regions[gen_idx].head = heap_segment_rw (generation_start_segment (gen));
        generation_final_regions[gen_idx].tail = generation_tail_region (gen);
    }

#ifdef BACKGROUND_GC
    heap_segment* max_gen_tail_region = 0;
    if (should_update_end_mark_size())
    {
        max_gen_tail_region = generation_final_regions[max_generation].tail;
    }
#endif //BACKGROUND_GC

    // Step 2: for each region in the condemned generations, we thread it onto its planned generation
    // in our generation_final_regions array.
    for (int gen_idx = condemned_gen_number; gen_idx >= 0; gen_idx--)
    {
        heap_segment* current_region = heap_segment_rw (generation_start_segment (generation_of (gen_idx)));
        dprintf (REGIONS_LOG, ("gen%d start from %p", gen_idx, heap_segment_mem (current_region)));

        while ((current_region = find_first_valid_region (current_region, compact_p, &num_returned_regions)))
        {
            assert (!compact_p ||
                    (heap_segment_plan_gen_num (current_region) == heap_segment_gen_num (current_region)));
            int new_gen_num = heap_segment_plan_gen_num (current_region);
            generation* new_gen = generation_of (new_gen_num);
            heap_segment* next_region = heap_segment_next (current_region);
            if (generation_final_regions[new_gen_num].head)
            {
                assert (generation_final_regions[new_gen_num].tail);
                // The new gen already exists, just thread this region onto it.
                dprintf (REGIONS_LOG, ("gen%d exists, tail region %p next -> %p",
                    new_gen_num, heap_segment_mem (generation_final_regions[new_gen_num].tail),
                    heap_segment_mem (current_region)));
                heap_segment_next (generation_final_regions[new_gen_num].tail) = current_region;
                generation_final_regions[new_gen_num].tail = current_region;
            }
            else
            {
                generation_final_regions[new_gen_num].head = current_region;
                generation_final_regions[new_gen_num].tail = current_region;
            }

            current_region = next_region;
        }
    }

    // Step 3: all the tail regions' next needs to be set to 0.
    for (int gen_idx = 0; gen_idx <= max_generation; gen_idx++)
    {
        generation* gen = generation_of (gen_idx);
        if (generation_final_regions[gen_idx].tail)
        {
            heap_segment_next (generation_final_regions[gen_idx].tail) = 0;
            //if (heap_segment_next (generation_final_regions[gen_idx].tail) != 0)
            //{
            //    dprintf (REGIONS_LOG, ("tail->next is %zx",
            //        heap_segment_next (generation_final_regions[gen_idx].tail)));
            //    GCToOSInterface::DebugBreak();
            //}
        }
    }

#ifdef BACKGROUND_GC
    if (max_gen_tail_region)
    {
        max_gen_tail_region = heap_segment_next (max_gen_tail_region);

        while (max_gen_tail_region)
        {
            background_soh_size_end_mark += heap_segment_allocated (max_gen_tail_region) -
                                            heap_segment_mem (max_gen_tail_region);

            max_gen_tail_region = heap_segment_next (max_gen_tail_region);
        }
    }
#endif //BACKGROUND_GC

    // Step 4: if a generation doesn't have any regions, we need to get a new one for it;
    // otherwise we just set the head region as the start region for that generation.
    for (int gen_idx = 0; gen_idx <= max_generation; gen_idx++)
    {
        bool condemned_p = (gen_idx <= condemned_gen_number);
        assert (condemned_p || generation_final_regions[gen_idx].head);

        generation* gen = generation_of (gen_idx);
        heap_segment* start_region = 0;

        if (generation_final_regions[gen_idx].head)
        {
            if (condemned_p)
            {
                start_region = generation_final_regions[gen_idx].head;
                thread_start_region (gen, start_region);
            }
            generation_tail_region (gen) = generation_final_regions[gen_idx].tail;
            dprintf (REGIONS_LOG, ("setting gen%d start %p, tail %p",
                gen_idx,
                heap_segment_mem (heap_segment_rw (generation_start_segment (gen))),
                heap_segment_mem (generation_tail_region (gen))));
        }
        else
        {
            start_region = get_free_region (gen_idx);
            assert (start_region);
            num_new_regions++;
            thread_start_region (gen, start_region);
            dprintf (REGIONS_LOG, ("creating new gen%d at %p", gen_idx, heap_segment_mem (start_region)));
        }

        if (condemned_p)
        {
            uint8_t* gen_start = heap_segment_mem (start_region);
            reset_allocation_pointers (gen, gen_start);
        }
    }

    int net_added_regions = num_new_regions - num_returned_regions;
    dprintf (REGIONS_LOG, ("TFR: added %d, returned %d, net %d", num_new_regions, num_returned_regions, net_added_regions));

    // TODO: For sweeping GCs by design we will need to get a new region for gen0 unless we are doing a special sweep.
    // This means we need to know when we decided to sweep that we can get a new region (if needed). If we can't, we
    // need to turn special sweep on.
    if ((settings.compaction || special_sweep_p) && (net_added_regions > 0))
    {
        new_regions_in_threading += net_added_regions;

        assert (!"we shouldn't be getting new regions in TFR!");
    }

    verify_regions (true, false);
}

void gc_heap::thread_start_region (generation* gen, heap_segment* region)
{
    heap_segment* prev_region = generation_tail_ro_region (gen);

    if (prev_region)
    {
        heap_segment_next (prev_region) = region;
        dprintf (REGIONS_LOG,("gen%d tail ro %zx(%p) next -> %zx(%p)",
            gen->gen_num, (size_t)prev_region, heap_segment_mem (prev_region),
            (size_t)region, heap_segment_mem (region)));
    }
    else
    {
        generation_start_segment (gen) = region;
        dprintf (REGIONS_LOG, ("start region of gen%d -> %zx(%p)", gen->gen_num,
            (size_t)region, heap_segment_mem (region)));
    }

    dprintf (REGIONS_LOG, ("tail region of gen%d -> %zx(%p)", gen->gen_num,
        (size_t)region, heap_segment_mem (region)));
    generation_tail_region (gen) = region;
}

heap_segment* gc_heap::get_new_region (int gen_number, size_t size)
{
    heap_segment* new_region = get_free_region (gen_number, size);

    if (new_region)
    {
        switch (gen_number)
        {
        default:
            assert ((new_region->flags & (heap_segment_flags_loh | heap_segment_flags_poh)) == 0);
            break;

        case    loh_generation:
            new_region->flags |= heap_segment_flags_loh;
            break;

        case    poh_generation:
            new_region->flags |= heap_segment_flags_poh;
            break;
        }

        generation* gen = generation_of (gen_number);
        heap_segment_next (generation_tail_region (gen)) = new_region;
        generation_tail_region (gen) = new_region;

        verify_regions (gen_number, false, settings.concurrent);
    }

    return new_region;
}

heap_segment* gc_heap::allocate_new_region (gc_heap* hp, int gen_num, bool uoh_p, size_t size)
{
    uint8_t* start = 0;
    uint8_t* end = 0;

    // size parameter should be non-zero only for large regions
    assert (uoh_p || size == 0);

    // REGIONS TODO: allocate POH regions on the right
    bool allocated_p = (uoh_p ?
        global_region_allocator.allocate_large_region (gen_num, &start, &end, allocate_forward, size, on_used_changed) :
        global_region_allocator.allocate_basic_region (gen_num, &start, &end, on_used_changed));

    if (!allocated_p)
    {
        return 0;
    }

    heap_segment* res = make_heap_segment (start, (end - start), hp, gen_num);

    dprintf (REGIONS_LOG, ("got a new region %zx %p->%p", (size_t)res, start, end));

    if (res == nullptr)
    {
        global_region_allocator.delete_region (start);
    }

    return res;
}

void gc_heap::update_start_tail_regions (generation* gen,
                                         heap_segment* region_to_delete,
                                         heap_segment* prev_region,
                                         heap_segment* next_region)
{
    if (region_to_delete == heap_segment_rw (generation_start_segment (gen)))
    {
        assert (!prev_region);
        heap_segment* tail_ro_region = generation_tail_ro_region (gen);

        if (tail_ro_region)
        {
            heap_segment_next (tail_ro_region) = next_region;
            dprintf (REGIONS_LOG, ("gen%d tail ro %zx(%p) next updated to %zx(%p)",
                gen->gen_num, (size_t)tail_ro_region, heap_segment_mem (tail_ro_region),
                (size_t)next_region, heap_segment_mem (next_region)));
        }
        else
        {
            generation_start_segment (gen) = next_region;
            dprintf (REGIONS_LOG, ("start region of gen%d updated to %zx(%p)", gen->gen_num,
                (size_t)next_region, heap_segment_mem (next_region)));
        }
    }

    if (region_to_delete == generation_tail_region (gen))
    {
        assert (!next_region);
        generation_tail_region (gen) = prev_region;
        dprintf (REGIONS_LOG, ("tail region of gen%d updated to %zx(%p)", gen->gen_num,
            (size_t)prev_region, heap_segment_mem (prev_region)));
    }

    verify_regions (false, settings.concurrent);
}

// There's one complication with deciding whether we can make a region SIP or not - if the plan_gen_num of
// a generation is not maxgen, and if we want to make every region in that generation maxgen, we need to
// make sure we can get a new region for this generation so we can guarantee each generation has at least
// one region. If we can't get a new region, we need to make sure we leave at least one region in that gen
// to guarantee our invariant.
//
// This new region we get needs to be temporarily recorded instead of being on the free_regions list because
// we can't use it for other purposes.
inline
bool gc_heap::should_sweep_in_plan (heap_segment* region)
{
    if (!enable_special_regions_p)
    {
        return false;
    }

    if (settings.reason == reason_induced_aggressive)
    {
        return false;
    }
    bool sip_p = false;
    int gen_num = get_region_gen_num (region);
    int new_gen_num = get_plan_gen_num (gen_num);
    heap_segment_swept_in_plan (region) = false;

    dprintf (REGIONS_LOG, ("checking if region %p should be SIP", heap_segment_mem (region)));

#ifdef STRESS_REGIONS
    // Only do this for testing or it would keep too much swept.
    if (0)
    {
        num_condemned_regions++;
        if ((num_condemned_regions % sip_seg_interval) == 0)
        {
            set_region_plan_gen_num (region, new_gen_num);
            sip_p = true;
        }

        if ((num_condemned_regions % sip_seg_maxgen_interval) == 0)
        {
            set_region_plan_gen_num (region, max_generation);
            sip_maxgen_regions_per_gen[gen_num]++;
            sip_p = true;
        }
    }
    else
#endif //STRESS_REGIONS
    {
        size_t basic_region_size = (size_t)1 << min_segment_size_shr;
        assert (heap_segment_gen_num (region) == heap_segment_plan_gen_num (region));

        uint8_t surv_ratio = (uint8_t)(((double)heap_segment_survived (region) * 100.0) / (double)basic_region_size);
        dprintf (2222, ("SSIP: region %p surv %hu / %zd = %d%%(%d)",
            heap_segment_mem (region),
            heap_segment_survived (region),
            basic_region_size,
            surv_ratio, sip_surv_ratio_th));
        if (surv_ratio >= sip_surv_ratio_th)
        {
            set_region_plan_gen_num (region, new_gen_num);
            sip_p = true;
        }

        if (settings.promotion && (new_gen_num < max_generation))
        {
            int old_card_surv_ratio =
                (int)(((double)heap_segment_old_card_survived (region) * 100.0) / (double)basic_region_size);
            dprintf (2222, ("SSIP: region %p old card surv %d / %zd = %d%%(%d)",
                heap_segment_mem (region),
                heap_segment_old_card_survived (region),
                basic_region_size,
                old_card_surv_ratio, sip_surv_ratio_th));
            if (old_card_surv_ratio >= sip_old_card_surv_ratio_th)
            {
                set_region_plan_gen_num (region, max_generation, true);
                sip_maxgen_regions_per_gen[gen_num]++;
                sip_p = true;
            }
        }
    }

    if (sip_p)
    {
        if ((new_gen_num < max_generation) &&
            (sip_maxgen_regions_per_gen[gen_num] == regions_per_gen[gen_num]))
        {
            assert (get_region_gen_num (region) == 0);
            assert (new_gen_num < max_generation);

            heap_segment* reserved_free_region = get_free_region (gen_num);
            if (reserved_free_region)
            {
                dprintf (REGIONS_LOG, ("all regions in gen%d -> SIP 2, get a new region for it %p",
                    gen_num, heap_segment_mem (reserved_free_region)));
                reserved_free_regions_sip[gen_num] = reserved_free_region;
            }
            else
            {
                // If we cannot get another region, simply revert our decision.
                sip_maxgen_regions_per_gen[gen_num]--;
                set_region_plan_gen_num (region, new_gen_num, true);
            }
        }
    }

    dprintf (REGIONS_LOG, ("region %p %s SIP", heap_segment_mem (region),
        (sip_p ? "is" : "is not")));
    return sip_p;
}

void heap_segment::thread_free_obj (uint8_t* obj, size_t s)
{
    //dprintf (REGIONS_LOG, ("threading SIP free obj %zx-%zx(%zd)", obj, (obj + s), s));
    if (s >= min_free_list)
    {
        free_list_slot (obj) = 0;

        if (free_list_head)
        {
            assert (free_list_tail);
            free_list_slot (free_list_tail) = obj;
        }
        else
        {
            free_list_head = obj;
        }

        free_list_tail = obj;

        free_list_size += s;
    }
    else
    {
        free_obj_size += s;
    }
}

// For a region that we sweep in plan, we need to do the following -
//
// + set the swept_in_plan_p for this region.
// + update allocated for this region.
// + build bricks.
// + build free objects. We keep a list of them which will then be threaded onto the appropriate generation's
//   free list. This can be optimized, both gen0 and gen2 GCs are easy to handle - need to see how easy it is
//   to handle gen1 GCs as the commit/repair there is complicated.
//
// in plan_phase we also need to make sure to not call update_brick_table when handling end of this region,
// and the plan gen num is set accordingly.
void gc_heap::sweep_region_in_plan (heap_segment* region,
                                    BOOL use_mark_list,
                                    uint8_t**& mark_list_next,
                                    uint8_t** mark_list_index)
{
    set_region_sweep_in_plan (region);

    region->init_free_list();

    uint8_t* x = heap_segment_mem (region);
    uint8_t* last_marked_obj_start = 0;
    uint8_t* last_marked_obj_end = 0;
    uint8_t* end = heap_segment_allocated (region);
    dprintf (2222, ("h%d region %p->%p SIP, gen %d->%d, %s mark list(%p->%p, %p->%p)",
        heap_number, x, end, heap_segment_gen_num (region), heap_segment_plan_gen_num (region),
        (use_mark_list ? "using" : "not using"),
        (uint8_t*)mark_list_next, (mark_list_next ? *mark_list_next : 0),
        (uint8_t*)mark_list_index, (mark_list_index ? *mark_list_index : 0)));

#ifdef _DEBUG
    size_t survived = 0;
    uint8_t* saved_last_unmarked_obj_start = 0;
    uint8_t* saved_last_unmarked_obj_end = 0;
    size_t saved_obj_brick = 0;
    size_t saved_next_obj_brick = 0;
#endif //_DEBUG

    while (x < end)
    {
        uint8_t* obj = x;
        size_t obj_brick = (size_t)obj / brick_size;
        uint8_t* next_obj = 0;
        if (marked (obj))
        {
            if (pinned(obj))
            {
                clear_pinned (obj);
            }
            clear_marked (obj);

            size_t s = size (obj);
            next_obj = obj + Align (s);
            last_marked_obj_start = obj;
            last_marked_obj_end = next_obj;
#ifdef _DEBUG
            survived += s;
#endif //_DEBUG
            dprintf (4444, ("M: %p-%p(%zd)", obj, next_obj, s));
        }
        else
        {
            next_obj = find_next_marked (x, end, use_mark_list, mark_list_next, mark_list_index);

#ifdef _DEBUG
            saved_last_unmarked_obj_start = obj;
            saved_last_unmarked_obj_end = next_obj;
#endif //_DEBUG

            if ((next_obj > obj) && (next_obj != end))
            {
                size_t free_obj_size = next_obj - obj;
                make_unused_array (obj, free_obj_size);
                region->thread_free_obj (obj, free_obj_size);
                dprintf (4444, ("UM threading: %p-%p(%zd)", obj, next_obj, (next_obj - obj)));
            }
        }

        size_t next_obj_brick = (size_t)next_obj / brick_size;

#ifdef _DEBUG
        saved_obj_brick = obj_brick;
        saved_next_obj_brick = next_obj_brick;
#endif //_DEBUG

        if (next_obj_brick != obj_brick)
        {
            fix_brick_to_highest (obj, next_obj);
        }

        x = next_obj;
    }

    if (last_marked_obj_start)
    {
        // We only need to make sure we fix the brick the last marked object's end is in.
        // Note this brick could have been fixed already.
        size_t last_marked_obj_start_b = brick_of (last_marked_obj_start);
        size_t last_marked_obj_end_b = brick_of (last_marked_obj_end - 1);
        dprintf (REGIONS_LOG, ("last live obj %p(%p)-%p, fixing its brick(s) %zx-%zx",
            last_marked_obj_start, method_table (last_marked_obj_start), last_marked_obj_end,
            last_marked_obj_start_b, last_marked_obj_end_b));

        if (last_marked_obj_start_b == last_marked_obj_end_b)
        {
            set_brick (last_marked_obj_start_b,
                    (last_marked_obj_start - brick_address (last_marked_obj_start_b)));
        }
        else
        {
            set_brick (last_marked_obj_end_b,
                    (last_marked_obj_start_b - last_marked_obj_end_b));
        }
    }
    else
    {
        last_marked_obj_end = heap_segment_mem (region);
    }

#ifdef _DEBUG
    size_t region_index = get_basic_region_index_for_address (heap_segment_mem (region));
    dprintf (REGIONS_LOG, ("region #%zd %p survived %zd, %s recorded %d",
        region_index, heap_segment_mem (region), survived,
        ((survived == heap_segment_survived (region)) ? "same as" : "diff from"),
        heap_segment_survived (region)));
#ifdef MULTIPLE_HEAPS
    assert (survived <= heap_segment_survived (region));
#else
    assert (survived == heap_segment_survived (region));
#endif //MULTIPLE_HEAPS
#endif //_DEBUG

    assert (last_marked_obj_end);
    save_allocated(region);
    heap_segment_allocated (region) = last_marked_obj_end;
    heap_segment_plan_allocated (region) = heap_segment_allocated (region);

    int plan_gen_num = heap_segment_plan_gen_num (region);
    if (plan_gen_num < heap_segment_gen_num (region))
    {
        generation_allocation_size (generation_of (plan_gen_num)) += heap_segment_survived (region);
        dprintf (REGIONS_LOG, ("sip: g%d alloc size is now %zd", plan_gen_num,
            generation_allocation_size (generation_of (plan_gen_num))));
    }
}

inline
void gc_heap::check_demotion_helper_sip (uint8_t** pval, int parent_gen_num, uint8_t* parent_loc)
{
    uint8_t* child_object = *pval;
    if (!is_in_heap_range (child_object))
        return;
    assert (child_object != nullptr);
    int child_object_plan_gen = get_region_plan_gen_num (child_object);

    if (child_object_plan_gen < parent_gen_num)
    {
        set_card (card_of (parent_loc));
    }

    dprintf (3, ("SCS %d, %d", child_object_plan_gen, parent_gen_num));
}

heap_segment* gc_heap::relocate_advance_to_non_sip (heap_segment* region)
{
    THREAD_FROM_HEAP;

    heap_segment* current_region = region;
    dprintf (REGIONS_LOG, ("Relocate searching for next non SIP, starting from %p",
        (region ? heap_segment_mem (region) : 0)));

    while (current_region)
    {
        if (heap_segment_swept_in_plan (current_region))
        {
            int gen_num = heap_segment_gen_num (current_region);
            int plan_gen_num = heap_segment_plan_gen_num (current_region);
            bool use_sip_demotion = (plan_gen_num > get_plan_gen_num (gen_num));

            dprintf (REGIONS_LOG, ("region %p is SIP, relocating, gen %d, plan gen: %d(supposed to be %d) %s",
                heap_segment_mem (current_region), gen_num, plan_gen_num, get_plan_gen_num (gen_num),
                (use_sip_demotion ? "Sd" : "d")));
            uint8_t* x = heap_segment_mem (current_region);
            uint8_t* end = heap_segment_allocated (current_region);

            // For SIP regions, we go linearly in the region and relocate each object's references.
            while (x < end)
            {
                size_t s = size (x);
                assert (s > 0);
                uint8_t* next_obj = x + Align (s);
                Prefetch (next_obj);
                if (!(((CObjectHeader*)x)->IsFree()))
                {
                    //relocate_obj_helper (x, s);
                    if (contain_pointers (x))
                    {
                        dprintf (3, ("$%zx$", (size_t)x));

                        go_through_object_nostart (method_table(x), x, s, pval,
                        {
                            uint8_t* child = *pval;
                            //reloc_survivor_helper (pval);
                            relocate_address (pval THREAD_NUMBER_ARG);
                            if (use_sip_demotion)
                                check_demotion_helper_sip (pval, plan_gen_num, (uint8_t*)pval);
                            else
                                check_demotion_helper (pval, (uint8_t*)pval);

                            if (child)
                            {
                                dprintf (4444, ("SIP %p(%p)->%p->%p(%p)",
                                    x, (uint8_t*)pval, child, *pval, method_table (child)));
                            }
                        });
                    }
                    check_class_object_demotion (x);
                }
                x = next_obj;
            }
        }
        else
        {
            int gen_num = heap_segment_gen_num (current_region);
            int plan_gen_num = heap_segment_plan_gen_num (current_region);

            dprintf (REGIONS_LOG, ("region %p is not SIP, relocating, gen %d, plan gen: %d",
                heap_segment_mem (current_region), gen_num, plan_gen_num));
            return current_region;
        }

        current_region = heap_segment_next (current_region);
    }

    return 0;
}

#ifdef STRESS_REGIONS
void gc_heap::pin_by_gc (uint8_t* object)
{
    heap_segment* region = region_of (object);
    HndAssignHandleGC(pinning_handles_for_alloc[ph_index_per_heap], object);
    dprintf (REGIONS_LOG, ("h%d pinning object at %zx on eph seg %zx (ph#%d)",
        heap_number, object, heap_segment_mem (region), ph_index_per_heap));

    ph_index_per_heap++;
    if (ph_index_per_heap == PINNING_HANDLE_INITIAL_LENGTH)
    {
        ph_index_per_heap = 0;
    }
}
#endif //STRESS_REGIONS
#endif //USE_REGIONS

void gc_heap::make_free_lists (int condemned_gen_number)
{
    //Promotion has to happen in sweep case.
    assert (settings.promotion);

    make_free_args args = {};
    int stop_gen_idx = get_stop_generation_index (condemned_gen_number);
    for (int i = condemned_gen_number; i >= stop_gen_idx; i--)
    {
        generation* condemned_gen = generation_of (i);
        heap_segment* current_heap_segment = get_start_segment (condemned_gen);

#ifdef USE_REGIONS
    if (!current_heap_segment)
        continue;
#endif //USE_REGIONS

        uint8_t* start_address = get_soh_start_object (current_heap_segment, condemned_gen);
        size_t current_brick = brick_of (start_address);

        _ASSERTE(current_heap_segment != NULL);

        uint8_t* end_address = heap_segment_allocated (current_heap_segment);
        size_t  end_brick = brick_of (end_address - 1);

        int current_gen_num = i;
#ifdef USE_REGIONS
        args.free_list_gen_number = (special_sweep_p ? current_gen_num : get_plan_gen_num (current_gen_num));
#else
        args.free_list_gen_number = get_plan_gen_num (current_gen_num);
#endif //USE_REGIONS
        args.free_list_gen = generation_of (args.free_list_gen_number);
        args.highest_plug = 0;

#ifdef USE_REGIONS
        dprintf (REGIONS_LOG, ("starting at gen%d %p -> %p", i, start_address, end_address));
#else
        args.current_gen_limit = (((current_gen_num == max_generation)) ?
                                  MAX_PTR :
                                  (generation_limit (args.free_list_gen_number)));
#endif //USE_REGIONS

#ifndef USE_REGIONS
        if ((start_address >= end_address) && (condemned_gen_number < max_generation))
        {
            break;
        }
#endif //!USE_REGIONS

        while (1)
        {
            if ((current_brick > end_brick))
            {
#ifndef USE_REGIONS
                if (args.current_gen_limit == MAX_PTR)
                {
                    //We had an empty segment
                    //need to allocate the generation start
                    generation* gen = generation_of (max_generation);

                    heap_segment* start_seg = heap_segment_rw (generation_start_segment (gen));

                    _ASSERTE(start_seg != NULL);

                    uint8_t* gap = heap_segment_mem (start_seg);

                    generation_allocation_start (gen) = gap;
                    heap_segment_allocated (start_seg) = gap + Align (min_obj_size);
                    make_unused_array (gap, Align (min_obj_size));
                    reset_allocation_pointers (gen, gap);
                    dprintf (3, ("Start segment empty, fixing generation start of %d to: %zx",
                                max_generation, (size_t)gap));
                    args.current_gen_limit = generation_limit (args.free_list_gen_number);
                }
#endif //!USE_REGIONS

                if (heap_segment_next_non_sip (current_heap_segment))
                {
                    current_heap_segment = heap_segment_next_non_sip (current_heap_segment);
                }
                else
                {
                    break;
                }

                current_brick = brick_of (heap_segment_mem (current_heap_segment));
                end_brick = brick_of (heap_segment_allocated (current_heap_segment)-1);
                continue;
            }
            {
                int brick_entry =  brick_table [ current_brick ];
                if ((brick_entry >= 0))
                {
                    make_free_list_in_brick (brick_address (current_brick) + brick_entry-1, &args);
                    dprintf(3,("Fixing brick entry %zx to %zx",
                            current_brick, (size_t)args.highest_plug));
                    set_brick (current_brick,
                            (args.highest_plug - brick_address (current_brick)));
                }
                else
                {
                    if ((brick_entry > -32768))
                    {
#ifdef _DEBUG
                        ptrdiff_t offset = brick_of (args.highest_plug) - current_brick;
                        if ((brick_entry != -32767) && (! ((offset == brick_entry))))
                        {
                            assert ((brick_entry == -1));
                        }
#endif //_DEBUG
                        //init to -1 for faster find_first_object
                        set_brick (current_brick, -1);
                    }
                }
            }
            current_brick++;
        }
    }

    {
#ifdef USE_REGIONS
        check_seg_gen_num (generation_allocation_segment (generation_of (max_generation)));

        thread_final_regions (false);

        generation* gen_gen0 = generation_of (0);
        ephemeral_heap_segment = generation_start_segment (gen_gen0);
        alloc_allocated = heap_segment_allocated (ephemeral_heap_segment);
#else //USE_REGIONS
        int bottom_gen = 0;
        args.free_list_gen_number--;
        while (args.free_list_gen_number >= bottom_gen)
        {
            uint8_t*  gap = 0;
            generation* gen2 = generation_of (args.free_list_gen_number);
            gap = allocate_at_end (Align(min_obj_size));
            generation_allocation_start (gen2) = gap;
            reset_allocation_pointers (gen2, gap);
            dprintf(3,("Fixing generation start of %d to: %zx",
                       args.free_list_gen_number, (size_t)gap));
            _ASSERTE(gap != NULL);
            make_unused_array (gap, Align (min_obj_size));

            args.free_list_gen_number--;
        }

        //reset the allocated size
        uint8_t* start2 = generation_allocation_start (youngest_generation);
        alloc_allocated = start2 + Align (size (start2));
#endif //USE_REGIONS
    }
}

void gc_heap::make_free_list_in_brick (uint8_t* tree, make_free_args* args)
{
    assert ((tree != NULL));
    {
        int  right_node = node_right_child (tree);
        int left_node = node_left_child (tree);
        args->highest_plug = 0;
        if (! (0 == tree))
        {
            if (! (0 == left_node))
            {
                make_free_list_in_brick (tree + left_node, args);
            }
            {
                uint8_t*  plug = tree;
                size_t  gap_size = node_gap_size (tree);
                uint8_t*  gap = (plug - gap_size);
                args->highest_plug = tree;
                dprintf (3,("plug: %p (highest p: %p), free %zx len %zd in %d",
                        plug, args->highest_plug, (size_t)gap, gap_size, args->free_list_gen_number));
#ifdef SHORT_PLUGS
                if (is_plug_padded (plug))
                {
                    dprintf (3, ("%p padded", plug));
                    clear_plug_padded (plug);
                }
#endif //SHORT_PLUGS

#ifdef DOUBLY_LINKED_FL
                // These 2 checks should really just be merged into one.
                if (is_plug_bgc_mark_bit_set (plug))
                {
                    dprintf (3333, ("cbgcm: %p", plug));
                    clear_plug_bgc_mark_bit (plug);
                }
                if (is_free_obj_in_compact_bit_set (plug))
                {
                    dprintf (3333, ("cfoc: %p", plug));
                    clear_free_obj_in_compact_bit (plug);
                }
#endif //DOUBLY_LINKED_FL

#ifndef USE_REGIONS
            gen_crossing:
                {
                    if ((args->current_gen_limit == MAX_PTR) ||
                        ((plug >= args->current_gen_limit) &&
                         ephemeral_pointer_p (plug)))
                    {
                        dprintf(3,(" Crossing Generation boundary at %zx",
                               (size_t)args->current_gen_limit));
                        if (!(args->current_gen_limit == MAX_PTR))
                        {
                            args->free_list_gen_number--;
                            args->free_list_gen = generation_of (args->free_list_gen_number);
                        }
                        dprintf(3,( " Fixing generation start of %d to: %zx",
                                args->free_list_gen_number, (size_t)gap));

                        reset_allocation_pointers (args->free_list_gen, gap);
                        args->current_gen_limit = generation_limit (args->free_list_gen_number);

                        if ((gap_size >= (2*Align (min_obj_size))))
                        {
                            dprintf(3,(" Splitting the gap in two %zd left",
                                   gap_size));
                            make_unused_array (gap, Align(min_obj_size));
                            gap_size = (gap_size - Align(min_obj_size));
                            gap = (gap + Align(min_obj_size));
                        }
                        else
                        {
                            make_unused_array (gap, gap_size);
                            gap_size = 0;
                        }
                        goto gen_crossing;
                    }
                }
#endif //!USE_REGIONS

                thread_gap (gap, gap_size, args->free_list_gen);
                add_gen_free (args->free_list_gen->gen_num, gap_size);
            }
            if (! (0 == right_node))
            {
                make_free_list_in_brick (tree + right_node, args);
            }
        }
    }
}

void gc_heap::thread_gap (uint8_t* gap_start, size_t size, generation*  gen)
{
#ifndef USE_REGIONS
    assert (generation_allocation_start (gen));
#endif

    if ((size > 0))
    {
#ifndef USE_REGIONS
        assert ((heap_segment_rw (generation_start_segment (gen)) != ephemeral_heap_segment) ||
                (gap_start > generation_allocation_start (gen)));
#endif //USE_REGIONS

        // The beginning of a segment gap is not aligned
        assert (size >= Align (min_obj_size));
        make_unused_array (gap_start, size,
                          (!settings.concurrent && (gen != youngest_generation)),
                          (gen->gen_num == max_generation));
        dprintf (3, ("fr: [%zx, %zx[", (size_t)gap_start, (size_t)gap_start+size));

        if ((size >= min_free_list))
        {
            generation_free_list_space (gen) += size;
            generation_allocator (gen)->thread_item (gap_start, size);
        }
        else
        {
            generation_free_obj_space (gen) += size;
        }
    }
}

void gc_heap::uoh_thread_gap_front (uint8_t* gap_start, size_t size, generation*  gen)
{
#ifndef USE_REGIONS
    assert (generation_allocation_start (gen));
#endif

    if (size >= min_free_list)
    {
        generation_free_list_space (gen) += size;
        generation_allocator (gen)->thread_item_front (gap_start, size);
    }
}

void gc_heap::make_unused_array (uint8_t* x, size_t size, BOOL clearp, BOOL resetp)
{
    dprintf (3, (ThreadStressLog::gcMakeUnusedArrayMsg(),
        (size_t)x, (size_t)(x+size)));
    assert (size >= Align (min_obj_size));

//#if defined (VERIFY_HEAP) && defined (BACKGROUND_GC)
//    check_batch_mark_array_bits (x, x+size);
//#endif //VERIFY_HEAP && BACKGROUND_GC

    if (resetp)
    {
#ifdef BGC_SERVO_TUNING
        // Don't do this for servo tuning because it makes it even harder to regulate WS.
        if (!(bgc_tuning::enable_fl_tuning && bgc_tuning::fl_tuning_triggered))
#endif //BGC_SERVO_TUNING
        {
            reset_memory (x, size);
        }
    }
    ((CObjectHeader*)x)->SetFree(size);

#ifdef HOST_64BIT

#if BIGENDIAN
#error "This won't work on big endian platforms"
#endif

    size_t size_as_object = (uint32_t)(size - free_object_base_size) + free_object_base_size;

    if (size_as_object < size)
    {
        //
        // If the size is more than 4GB, we need to create multiple objects because of
        // the Array::m_NumComponents is uint32_t and the high 32 bits of unused array
        // size is ignored in regular object size computation.
        //
        uint8_t * tmp = x + size_as_object;
        size_t remaining_size = size - size_as_object;

        while (remaining_size > UINT32_MAX)
        {
            // Make sure that there will be at least Align(min_obj_size) left
            size_t current_size = UINT32_MAX - get_alignment_constant (FALSE)
                - Align (min_obj_size, get_alignment_constant (FALSE));

            ((CObjectHeader*)tmp)->SetFree(current_size);

            remaining_size -= current_size;
            tmp += current_size;
        }

        ((CObjectHeader*)tmp)->SetFree(remaining_size);
    }
#endif

    if (clearp)
        clear_card_for_addresses (x, x + Align(size));
}

// Clear memory set by make_unused_array.
void gc_heap::clear_unused_array (uint8_t* x, size_t size)
{
    // Also clear the sync block
    *(((PTR_PTR)x)-1) = 0;

    ((CObjectHeader*)x)->UnsetFree();

#ifdef HOST_64BIT

#if BIGENDIAN
#error "This won't work on big endian platforms"
#endif

    // The memory could have been cleared in the meantime. We have to mirror the algorithm
    // from make_unused_array since we cannot depend on the object sizes in memory.
    size_t size_as_object = (uint32_t)(size - free_object_base_size) + free_object_base_size;

    if (size_as_object < size)
    {
        uint8_t * tmp = x + size_as_object;
        size_t remaining_size = size - size_as_object;

        while (remaining_size > UINT32_MAX)
        {
            size_t current_size = UINT32_MAX - get_alignment_constant (FALSE)
                - Align (min_obj_size, get_alignment_constant (FALSE));

            ((CObjectHeader*)tmp)->UnsetFree();

            remaining_size -= current_size;
            tmp += current_size;
        }

        ((CObjectHeader*)tmp)->UnsetFree();
    }
#else
    UNREFERENCED_PARAMETER(size);
#endif
}

inline
uint8_t* tree_search (uint8_t* tree, uint8_t* old_address)
{
    uint8_t* candidate = 0;
    int cn;
    while (1)
    {
        if (tree < old_address)
        {
            if ((cn = node_right_child (tree)) != 0)
            {
                assert (candidate < tree);
                candidate = tree;
                tree = tree + cn;
                Prefetch (&((plug_and_pair*)tree)[-1].m_pair.left);
                continue;
            }
            else
                break;
        }
        else if (tree > old_address)
        {
            if ((cn = node_left_child (tree)) != 0)
            {
                tree = tree + cn;
                Prefetch (&((plug_and_pair*)tree)[-1].m_pair.left);
                continue;
            }
            else
                break;
        } else
            break;
    }
    if (tree <= old_address)
        return tree;
    else if (candidate)
        return candidate;
    else
        return tree;
}

void gc_heap::relocate_address (uint8_t** pold_address THREAD_NUMBER_DCL)
{
    uint8_t* old_address = *pold_address;
#ifdef USE_REGIONS
    if (!is_in_gc_range (old_address) || !should_check_brick_for_reloc (old_address))
    {
        return;
    }
#else //USE_REGIONS
    if (!((old_address >= gc_low) && (old_address < gc_high)))
#ifdef MULTIPLE_HEAPS
    {
        UNREFERENCED_PARAMETER(thread);
        if (old_address == 0)
            return;
        gc_heap* hp = heap_of (old_address);
        if ((hp == this) ||
            !((old_address >= hp->gc_low) && (old_address < hp->gc_high)))
            return;
    }
#else //MULTIPLE_HEAPS
        return ;
#endif //MULTIPLE_HEAPS
#endif //USE_REGIONS
    // delta translates old_address into address_gc (old_address);
    size_t  brick = brick_of (old_address);
    int    brick_entry =  brick_table [ brick ];
    uint8_t*  new_address = old_address;
    if (! ((brick_entry == 0)))
    {
    retry:
        {
            while (brick_entry < 0)
            {
                brick = (brick + brick_entry);
                brick_entry =  brick_table [ brick ];
            }
            uint8_t* old_loc = old_address;

            uint8_t* node = tree_search ((brick_address (brick) + brick_entry-1),
                                      old_loc);
            if ((node <= old_loc))
                new_address = (old_address + node_relocation_distance (node));
            else
            {
                if (node_left_p (node))
                {
                    dprintf(3,(" L: %zx", (size_t)node));
                    new_address = (old_address +
                                   (node_relocation_distance (node) +
                                    node_gap_size (node)));
                }
                else
                {
                    brick = brick - 1;
                    brick_entry =  brick_table [ brick ];
                    goto retry;
                }
            }
        }

        dprintf (4, (ThreadStressLog::gcRelocateReferenceMsg(), pold_address, old_address, new_address));
        *pold_address = new_address;
        return;
    }

#ifdef FEATURE_LOH_COMPACTION
    if (settings.loh_compaction)
    {
        heap_segment* pSegment = seg_mapping_table_segment_of ((uint8_t*)old_address);
#ifdef USE_REGIONS
        // pSegment could be 0 for regions, see comment for is_in_condemned.
        if (!pSegment)
        {
            return;
        }
#endif //USE_REGIONS

#ifdef MULTIPLE_HEAPS
        if (heap_segment_heap (pSegment)->loh_compacted_p)
#else
        if (loh_compacted_p)
#endif
        {
            size_t flags = pSegment->flags;
            if ((flags & heap_segment_flags_loh)
#ifdef FEATURE_BASICFREEZE
                && !(flags & heap_segment_flags_readonly)
#endif
                )
            {
                new_address = old_address + loh_node_relocation_distance (old_address);
                dprintf (4, (ThreadStressLog::gcRelocateReferenceMsg(), pold_address, old_address, new_address));
                *pold_address = new_address;
            }
        }
    }
#endif //FEATURE_LOH_COMPACTION
}

inline void
gc_heap::check_class_object_demotion (uint8_t* obj)
{
#ifdef COLLECTIBLE_CLASS
    if (is_collectible(obj))
    {
        check_class_object_demotion_internal (obj);
    }
#else
    UNREFERENCED_PARAMETER(obj);
#endif //COLLECTIBLE_CLASS
}

#ifdef COLLECTIBLE_CLASS
NOINLINE void
gc_heap::check_class_object_demotion_internal (uint8_t* obj)
{
    if (settings.demotion)
    {
#ifdef MULTIPLE_HEAPS
        // We set the card without checking the demotion range 'cause at this point
        // the handle that points to the loader allocator object may or may not have
        // been relocated by other GC threads.
        set_card (card_of (obj));
#else
        THREAD_FROM_HEAP;
        uint8_t* class_obj = get_class_object (obj);
        dprintf (3, ("%p: got classobj %p", obj, class_obj));
        uint8_t* temp_class_obj = class_obj;
        uint8_t** temp = &temp_class_obj;
        relocate_address (temp THREAD_NUMBER_ARG);

        check_demotion_helper (temp, obj);
#endif //MULTIPLE_HEAPS
    }
}

#endif //COLLECTIBLE_CLASS

inline void
gc_heap::check_demotion_helper (uint8_t** pval, uint8_t* parent_obj)
{
#ifdef USE_REGIONS
    uint8_t* child_object = *pval;
    if (!is_in_heap_range (child_object))
        return;
    int child_object_plan_gen = get_region_plan_gen_num (child_object);
    bool child_obj_demoted_p = is_region_demoted (child_object);

    if (child_obj_demoted_p)
    {
        set_card (card_of (parent_obj));
    }

    dprintf (3, ("SC %d (%s)", child_object_plan_gen, (child_obj_demoted_p ? "D" : "ND")));
#else //USE_REGIONS
    // detect if we are demoting an object
    if ((*pval < demotion_high) &&
        (*pval >= demotion_low))
    {
        dprintf(3, ("setting card %zx:%zx",
                    card_of((uint8_t*)pval),
                    (size_t)pval));

        set_card (card_of (parent_obj));
    }
#ifdef MULTIPLE_HEAPS
    else if (settings.demotion)
    {
        dprintf (4, ("Demotion active, computing heap_of object"));
        gc_heap* hp = heap_of (*pval);
        if ((*pval < hp->demotion_high) &&
            (*pval >= hp->demotion_low))
        {
            dprintf(3, ("setting card %zx:%zx",
                        card_of((uint8_t*)pval),
                        (size_t)pval));

            set_card (card_of (parent_obj));
        }
    }
#endif //MULTIPLE_HEAPS
#endif //USE_REGIONS
}

inline void
gc_heap::reloc_survivor_helper (uint8_t** pval)
{
    THREAD_FROM_HEAP;
    relocate_address (pval THREAD_NUMBER_ARG);

    check_demotion_helper (pval, (uint8_t*)pval);
}

inline void
gc_heap::relocate_obj_helper (uint8_t* x, size_t s)
{
    THREAD_FROM_HEAP;
    if (contain_pointers (x))
    {
        dprintf (3, ("o$%zx$", (size_t)x));

        go_through_object_nostart (method_table(x), x, s, pval,
                            {
                                uint8_t* child = *pval;
                                reloc_survivor_helper (pval);
                                if (child)
                                {
                                    dprintf (3, ("%p->%p->%p", (uint8_t*)pval, child, *pval));
                                }
                            });

    }
    check_class_object_demotion (x);
}

inline
void gc_heap::reloc_ref_in_shortened_obj (uint8_t** address_to_set_card, uint8_t** address_to_reloc)
{
    THREAD_FROM_HEAP;

    uint8_t* old_val = (address_to_reloc ? *address_to_reloc : 0);
    relocate_address (address_to_reloc THREAD_NUMBER_ARG);
    if (address_to_reloc)
    {
        dprintf (3, ("SR %p: %p->%p", (uint8_t*)address_to_reloc, old_val, *address_to_reloc));
    }

    check_demotion_helper (address_to_reloc, (uint8_t*)address_to_set_card);
}

void gc_heap::relocate_pre_plug_info (mark* pinned_plug_entry)
{
    THREAD_FROM_HEAP;
    uint8_t* plug = pinned_plug (pinned_plug_entry);
    uint8_t* pre_plug_start = plug - sizeof (plug_and_gap);
    // Note that we need to add one ptr size here otherwise we may not be able to find the relocated
    // address. Consider this scenario:
    // gen1 start | 3-ptr sized NP | PP
    // 0          | 0x18           | 0x30
    // If we are asking for the reloc address of 0x10 we will AV in relocate_address because
    // the first plug we saw in the brick is 0x18 which means 0x10 will cause us to go back a brick
    // which is 0, and then we'll AV in tree_search when we try to do node_right_child (tree).
    pre_plug_start += sizeof (uint8_t*);
    uint8_t** old_address = &pre_plug_start;

    uint8_t* old_val = (old_address ? *old_address : 0);
    relocate_address (old_address THREAD_NUMBER_ARG);
    if (old_address)
    {
        dprintf (3, ("PreR %p: %p->%p, set reloc: %p",
            (uint8_t*)old_address, old_val, *old_address, (pre_plug_start - sizeof (uint8_t*))));
    }

    pinned_plug_entry->set_pre_plug_info_reloc_start (pre_plug_start - sizeof (uint8_t*));
}

inline
void gc_heap::relocate_shortened_obj_helper (uint8_t* x, size_t s, uint8_t* end, mark* pinned_plug_entry, BOOL is_pinned)
{
    THREAD_FROM_HEAP;
    uint8_t* plug = pinned_plug (pinned_plug_entry);

    if (!is_pinned)
    {
        //// Temporary - we just wanna make sure we are doing things right when padding is needed.
        //if ((x + s) < plug)
        //{
        //    dprintf (3, ("obj %zx needed padding: end %zx is %d bytes from pinned obj %zx",
        //        x, (x + s), (plug- (x + s)), plug));
        //    GCToOSInterface::DebugBreak();
        //}

        relocate_pre_plug_info (pinned_plug_entry);
    }

    verify_pins_with_post_plug_info("after relocate_pre_plug_info");

    uint8_t* saved_plug_info_start = 0;
    uint8_t** saved_info_to_relocate = 0;

    if (is_pinned)
    {
        saved_plug_info_start = (uint8_t*)(pinned_plug_entry->get_post_plug_info_start());
        saved_info_to_relocate = (uint8_t**)(pinned_plug_entry->get_post_plug_reloc_info());
    }
    else
    {
        saved_plug_info_start = (plug - sizeof (plug_and_gap));
        saved_info_to_relocate = (uint8_t**)(pinned_plug_entry->get_pre_plug_reloc_info());
    }

    uint8_t** current_saved_info_to_relocate = 0;
    uint8_t* child = 0;

    dprintf (3, ("x: %p, pp: %p, end: %p", x, plug, end));

    if (contain_pointers (x))
    {
        dprintf (3,("s$%zx$", (size_t)x));

        go_through_object_nostart (method_table(x), x, s, pval,
        {
            dprintf (3, ("obj %p, member: %p->%p", x, (uint8_t*)pval, *pval));

            if ((uint8_t*)pval >= end)
            {
                current_saved_info_to_relocate = saved_info_to_relocate + ((uint8_t*)pval - saved_plug_info_start) / sizeof (uint8_t**);
                child = *current_saved_info_to_relocate;
                reloc_ref_in_shortened_obj (pval, current_saved_info_to_relocate);
                dprintf (3, ("last part: R-%p(saved: %p)->%p ->%p",
                    (uint8_t*)pval, current_saved_info_to_relocate, child, *current_saved_info_to_relocate));
            }
            else
            {
                reloc_survivor_helper (pval);
            }
        });
    }

    check_class_object_demotion (x);
}

void gc_heap::relocate_survivor_helper (uint8_t* plug, uint8_t* plug_end)
{
    uint8_t*  x = plug;
    while (x < plug_end)
    {
        size_t s = size (x);
        uint8_t* next_obj = x + Align (s);
        Prefetch (next_obj);
        relocate_obj_helper (x, s);
        assert (s > 0);
        x = next_obj;
    }
}

// if we expanded, right now we are not handling it as We are not saving the new reloc info.
void gc_heap::verify_pins_with_post_plug_info (const char* msg)
{
#if defined (_DEBUG) && defined (VERIFY_HEAP)
    if (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_GC)
    {
        if (!verify_pinned_queue_p)
            return;

        if (settings.heap_expansion)
            return;

        for (size_t i = 0; i < mark_stack_tos; i++)
        {
            mark& m = mark_stack_array[i];

            mark* pinned_plug_entry = pinned_plug_of(i);

            if (pinned_plug_entry->has_post_plug_info() &&
                pinned_plug_entry->post_short_p() &&
                (pinned_plug_entry->saved_post_plug_debug.gap != 1))
            {
                uint8_t* next_obj = pinned_plug_entry->get_post_plug_info_start() + sizeof (plug_and_gap);
                // object after pin
                dprintf (3, ("OFP: %p, G: %zx, R: %zx, LC: %d, RC: %d",
                    next_obj, node_gap_size (next_obj), node_relocation_distance (next_obj),
                    (int)node_left_child (next_obj), (int)node_right_child (next_obj)));

                size_t* post_plug_debug = (size_t*)(&m.saved_post_plug_debug);

                if (node_gap_size (next_obj) != *post_plug_debug)
                {
                    dprintf (1, ("obj: %p gap should be %zx but it is %zx",
                        next_obj, *post_plug_debug, (size_t)(node_gap_size (next_obj))));
                    FATAL_GC_ERROR();
                }
                post_plug_debug++;
                // can't do node_relocation_distance here as it clears the left bit.
                //if (node_relocation_distance (next_obj) != *post_plug_debug)
                if (*((size_t*)(next_obj - 3 * sizeof (size_t))) != *post_plug_debug)
                {
                    dprintf (1, ("obj: %p reloc should be %zx but it is %zx",
                        next_obj, *post_plug_debug, (size_t)(node_relocation_distance (next_obj))));
                    FATAL_GC_ERROR();
                }
                if (node_left_child (next_obj) > 0)
                {
                    dprintf (1, ("obj: %p, vLC: %d\n", next_obj, (int)(node_left_child (next_obj))));
                    FATAL_GC_ERROR();
                }
            }
        }

        dprintf (3, ("%s verified", msg));
    }
#else
    UNREFERENCED_PARAMETER(msg);
#endif // _DEBUG && VERIFY_HEAP
}

#ifdef COLLECTIBLE_CLASS
// We don't want to burn another ptr size space for pinned plugs to record this so just
// set the card unconditionally for collectible objects if we are demoting.
inline void
gc_heap::unconditional_set_card_collectible (uint8_t* obj)
{
    if (settings.demotion)
    {
        set_card (card_of (obj));
    }
}
#endif //COLLECTIBLE_CLASS

void gc_heap::relocate_shortened_survivor_helper (uint8_t* plug, uint8_t* plug_end, mark* pinned_plug_entry)
{
    uint8_t*  x = plug;
    uint8_t* p_plug = pinned_plug (pinned_plug_entry);
    BOOL is_pinned = (plug == p_plug);
    BOOL check_short_obj_p = (is_pinned ? pinned_plug_entry->post_short_p() : pinned_plug_entry->pre_short_p());

    plug_end += sizeof (gap_reloc_pair);

    //dprintf (3, ("%s %p is shortened, and last object %s overwritten", (is_pinned ? "PP" : "NP"), plug, (check_short_obj_p ? "is" : "is not")));
    dprintf (3, ("%s %p-%p short, LO: %s OW", (is_pinned ? "PP" : "NP"), plug, plug_end, (check_short_obj_p ? "is" : "is not")));

    verify_pins_with_post_plug_info("begin reloc short surv");

    while (x < plug_end)
    {
        if (check_short_obj_p && ((DWORD)(plug_end - x) < (DWORD)min_pre_pin_obj_size))
        {
            dprintf (3, ("last obj %p is short", x));

            if (is_pinned)
            {
#ifdef COLLECTIBLE_CLASS
                if (pinned_plug_entry->post_short_collectible_p())
                    unconditional_set_card_collectible (x);
#endif //COLLECTIBLE_CLASS

                // Relocate the saved references based on bits set.
                uint8_t** saved_plug_info_start = (uint8_t**)(pinned_plug_entry->get_post_plug_info_start());
                uint8_t** saved_info_to_relocate = (uint8_t**)(pinned_plug_entry->get_post_plug_reloc_info());
                for (size_t i = 0; i < pinned_plug_entry->get_max_short_bits(); i++)
                {
                    if (pinned_plug_entry->post_short_bit_p (i))
                    {
                        reloc_ref_in_shortened_obj ((saved_plug_info_start + i), (saved_info_to_relocate + i));
                    }
                }
            }
            else
            {
#ifdef COLLECTIBLE_CLASS
                if (pinned_plug_entry->pre_short_collectible_p())
                    unconditional_set_card_collectible (x);
#endif //COLLECTIBLE_CLASS

                relocate_pre_plug_info (pinned_plug_entry);

                // Relocate the saved references based on bits set.
                uint8_t** saved_plug_info_start = (uint8_t**)(p_plug - sizeof (plug_and_gap));
                uint8_t** saved_info_to_relocate = (uint8_t**)(pinned_plug_entry->get_pre_plug_reloc_info());
                for (size_t i = 0; i < pinned_plug_entry->get_max_short_bits(); i++)
                {
                    if (pinned_plug_entry->pre_short_bit_p (i))
                    {
                        reloc_ref_in_shortened_obj ((saved_plug_info_start + i), (saved_info_to_relocate + i));
                    }
                }
            }

            break;
        }

        size_t s = size (x);
        uint8_t* next_obj = x + Align (s);
        Prefetch (next_obj);

        if (next_obj >= plug_end)
        {
            dprintf (3, ("object %p is at the end of the plug %p->%p",
                next_obj, plug, plug_end));

            verify_pins_with_post_plug_info("before reloc short obj");

            relocate_shortened_obj_helper (x, s, (x + Align (s) - sizeof (plug_and_gap)), pinned_plug_entry, is_pinned);
        }
        else
        {
            relocate_obj_helper (x, s);
        }

        assert (s > 0);
        x = next_obj;
    }

    verify_pins_with_post_plug_info("end reloc short surv");
}

void gc_heap::relocate_survivors_in_plug (uint8_t* plug, uint8_t* plug_end,
                                          BOOL check_last_object_p,
                                          mark* pinned_plug_entry)
{
    dprintf (3,("RP: [%zx(%zx->%zx),%zx(%zx->%zx)[",
        (size_t)plug, brick_of (plug), (size_t)brick_table[brick_of (plug)],
        (size_t)plug_end, brick_of (plug_end), (size_t)brick_table[brick_of (plug_end)]));

    if (check_last_object_p)
    {
        relocate_shortened_survivor_helper (plug, plug_end, pinned_plug_entry);
    }
    else
    {
        relocate_survivor_helper (plug, plug_end);
    }
}

void gc_heap::relocate_survivors_in_brick (uint8_t* tree, relocate_args* args)
{
    assert ((tree != NULL));

    dprintf (3, ("tree: %p, args->last_plug: %p, left: %p, right: %p, gap(t): %zx",
        tree, args->last_plug,
        (tree + node_left_child (tree)),
        (tree + node_right_child (tree)),
        node_gap_size (tree)));

    if (node_left_child (tree))
    {
        relocate_survivors_in_brick (tree + node_left_child (tree), args);
    }
    {
        uint8_t*  plug = tree;
        BOOL   has_post_plug_info_p = FALSE;
        BOOL   has_pre_plug_info_p = FALSE;

        if (tree == oldest_pinned_plug)
        {
            args->pinned_plug_entry = get_oldest_pinned_entry (&has_pre_plug_info_p,
                                                               &has_post_plug_info_p);
            assert (tree == pinned_plug (args->pinned_plug_entry));

            dprintf (3, ("tree is the oldest pin: %p", tree));
        }
        if (args->last_plug)
        {
            size_t  gap_size = node_gap_size (tree);
            uint8_t*  gap = (plug - gap_size);
            dprintf (3, ("tree: %p, gap: %p (%zx)", tree, gap, gap_size));
            assert (gap_size >= Align (min_obj_size));
            uint8_t*  last_plug_end = gap;

            BOOL check_last_object_p = (args->is_shortened || has_pre_plug_info_p);

            {
                relocate_survivors_in_plug (args->last_plug, last_plug_end, check_last_object_p, args->pinned_plug_entry);
            }
        }
        else
        {
            assert (!has_pre_plug_info_p);
        }

        args->last_plug = plug;
        args->is_shortened = has_post_plug_info_p;
        if (has_post_plug_info_p)
        {
            dprintf (3, ("setting %p as shortened", plug));
        }
        dprintf (3, ("last_plug: %p(shortened: %d)", plug, (args->is_shortened ? 1 : 0)));
    }
    if (node_right_child (tree))
    {
        relocate_survivors_in_brick (tree + node_right_child (tree), args);
    }
}

inline
void gc_heap::update_oldest_pinned_plug()
{
    oldest_pinned_plug = (pinned_plug_que_empty_p() ? 0 : pinned_plug (oldest_pin()));
}

heap_segment* gc_heap::get_start_segment (generation* gen)
{
    heap_segment* start_heap_segment = heap_segment_rw (generation_start_segment (gen));
#ifdef USE_REGIONS
    heap_segment* current_heap_segment = heap_segment_non_sip (start_heap_segment);
    if (current_heap_segment != start_heap_segment)
    {
        dprintf (REGIONS_LOG, ("h%d skipped gen%d SIP regions, start %p->%p",
            heap_number,
            (current_heap_segment ? heap_segment_gen_num (current_heap_segment) : -1),
            heap_segment_mem (start_heap_segment),
            (current_heap_segment ? heap_segment_mem (current_heap_segment) : 0)));
    }
    start_heap_segment = current_heap_segment;
#endif //USE_REGIONS

    return start_heap_segment;
}

void gc_heap::relocate_survivors (int condemned_gen_number,
                                  uint8_t* first_condemned_address)
{
    reset_pinned_queue_bos();
    update_oldest_pinned_plug();

    int stop_gen_idx = get_stop_generation_index (condemned_gen_number);

#ifndef USE_REGIONS
    assert (first_condemned_address == generation_allocation_start (generation_of (condemned_gen_number)));
#endif //!USE_REGIONS

    for (int i = condemned_gen_number; i >= stop_gen_idx; i--)
    {
        generation* condemned_gen = generation_of (i);
        heap_segment* current_heap_segment = heap_segment_rw (generation_start_segment (condemned_gen));
#ifdef USE_REGIONS
        current_heap_segment = relocate_advance_to_non_sip (current_heap_segment);
        if (!current_heap_segment)
            continue;
#endif //USE_REGIONS
        uint8_t*  start_address = get_soh_start_object (current_heap_segment, condemned_gen);
        size_t  current_brick = brick_of (start_address);

        _ASSERTE(current_heap_segment != NULL);

        uint8_t*  end_address = heap_segment_allocated (current_heap_segment);

        size_t  end_brick = brick_of (end_address - 1);
        relocate_args args;
        args.is_shortened = FALSE;
        args.pinned_plug_entry = 0;
        args.last_plug = 0;

        while (1)
        {
            if (current_brick > end_brick)
            {
                if (args.last_plug)
                {
                    {
                        assert (!(args.is_shortened));
                        relocate_survivors_in_plug (args.last_plug,
                                                    heap_segment_allocated (current_heap_segment),
                                                    args.is_shortened,
                                                    args.pinned_plug_entry);
                    }

                    args.last_plug = 0;
                }

                heap_segment* next_heap_segment = heap_segment_next (current_heap_segment);
                if (next_heap_segment)
                {
#ifdef USE_REGIONS
                    next_heap_segment = relocate_advance_to_non_sip (next_heap_segment);
#endif //USE_REGIONS
                    if (next_heap_segment)
                    {
                        current_heap_segment = next_heap_segment;
                        current_brick = brick_of (heap_segment_mem (current_heap_segment));
                        end_brick = brick_of (heap_segment_allocated (current_heap_segment)-1);
                        continue;
                    }
                    else
                        break;
                }
                else
                {
                    break;
                }
            }
            {
                int brick_entry =  brick_table [ current_brick ];

                if (brick_entry >= 0)
                {
                    relocate_survivors_in_brick (brick_address (current_brick) +
                                                brick_entry -1,
                                                &args);
                }
            }
            current_brick++;
        }
    }
}

void gc_heap::walk_plug (uint8_t* plug, size_t size, BOOL check_last_object_p, walk_relocate_args* args)
{
    if (check_last_object_p)
    {
        size += sizeof (gap_reloc_pair);
        mark* entry = args->pinned_plug_entry;

        if (args->is_shortened)
        {
            assert (entry->has_post_plug_info());
            entry->swap_post_plug_and_saved_for_profiler();
        }
        else
        {
            assert (entry->has_pre_plug_info());
            entry->swap_pre_plug_and_saved_for_profiler();
        }
    }

    ptrdiff_t last_plug_relocation = node_relocation_distance (plug);
    STRESS_LOG_PLUG_MOVE(plug, (plug + size), -last_plug_relocation);
    ptrdiff_t reloc = settings.compaction ? last_plug_relocation : 0;

    (args->fn) (plug, (plug + size), reloc, args->profiling_context, !!settings.compaction, false);

    if (check_last_object_p)
    {
        mark* entry = args->pinned_plug_entry;

        if (args->is_shortened)
        {
            entry->swap_post_plug_and_saved_for_profiler();
        }
        else
        {
            entry->swap_pre_plug_and_saved_for_profiler();
        }
    }
}

void gc_heap::walk_relocation_in_brick (uint8_t* tree, walk_relocate_args* args)
{
    assert ((tree != NULL));
    if (node_left_child (tree))
    {
        walk_relocation_in_brick (tree + node_left_child (tree), args);
    }

    uint8_t*  plug = tree;
    BOOL   has_pre_plug_info_p = FALSE;
    BOOL   has_post_plug_info_p = FALSE;

    if (tree == oldest_pinned_plug)
    {
        args->pinned_plug_entry = get_oldest_pinned_entry (&has_pre_plug_info_p,
                                                           &has_post_plug_info_p);
        assert (tree == pinned_plug (args->pinned_plug_entry));
    }

    if (args->last_plug != 0)
    {
        size_t gap_size = node_gap_size (tree);
        uint8_t*  gap = (plug - gap_size);
        uint8_t*  last_plug_end = gap;
        size_t last_plug_size = (last_plug_end - args->last_plug);
        dprintf (3, ("tree: %p, last_plug: %p, gap: %p(%zx), last_plug_end: %p, size: %zx",
            tree, args->last_plug, gap, gap_size, last_plug_end, last_plug_size));

        BOOL check_last_object_p = (args->is_shortened || has_pre_plug_info_p);
        if (!check_last_object_p)
        {
            assert (last_plug_size >= Align (min_obj_size));
        }

        walk_plug (args->last_plug, last_plug_size, check_last_object_p, args);
    }
    else
    {
        assert (!has_pre_plug_info_p);
    }

    dprintf (3, ("set args last plug to plug: %p", plug));
    args->last_plug = plug;
    args->is_shortened = has_post_plug_info_p;

    if (node_right_child (tree))
    {
        walk_relocation_in_brick (tree + node_right_child (tree), args);
    }
}

void gc_heap::walk_relocation (void* profiling_context, record_surv_fn fn)
{
    int condemned_gen_number = settings.condemned_generation;
    int stop_gen_idx = get_stop_generation_index (condemned_gen_number);

    reset_pinned_queue_bos();
    update_oldest_pinned_plug();

    for (int i = condemned_gen_number; i >= stop_gen_idx; i--)
    {
        generation* condemned_gen = generation_of (i);
        heap_segment*  current_heap_segment = heap_segment_rw (generation_start_segment (condemned_gen));
#ifdef USE_REGIONS
        current_heap_segment = walk_relocation_sip (current_heap_segment, profiling_context, fn);
        if (!current_heap_segment)
            continue;
#endif // USE_REGIONS
        uint8_t*  start_address = get_soh_start_object (current_heap_segment, condemned_gen);
        size_t  current_brick = brick_of (start_address);

        _ASSERTE(current_heap_segment != NULL);
        size_t end_brick = brick_of (heap_segment_allocated (current_heap_segment)-1);
        walk_relocate_args args;
        args.is_shortened = FALSE;
        args.pinned_plug_entry = 0;
        args.last_plug = 0;
        args.profiling_context = profiling_context;
        args.fn = fn;

        while (1)
        {
            if (current_brick > end_brick)
            {
                if (args.last_plug)
                {
                    walk_plug (args.last_plug,
                            (heap_segment_allocated (current_heap_segment) - args.last_plug),
                            args.is_shortened,
                            &args);
                    args.last_plug = 0;
                }
                current_heap_segment = heap_segment_next_rw (current_heap_segment);
#ifdef USE_REGIONS
                current_heap_segment = walk_relocation_sip (current_heap_segment, profiling_context, fn);
#endif // USE_REGIONS
                if (current_heap_segment)
                {
                    current_brick = brick_of (heap_segment_mem (current_heap_segment));
                    end_brick = brick_of (heap_segment_allocated (current_heap_segment)-1);
                    continue;
                }
                else
                {
                    break;
                }
            }
            {
                int brick_entry =  brick_table [ current_brick ];
                if (brick_entry >= 0)
                {
                    walk_relocation_in_brick (brick_address (current_brick) +
                                            brick_entry - 1,
                                            &args);
                }
            }
            current_brick++;
        }
    }
}

#ifdef USE_REGIONS
heap_segment* gc_heap::walk_relocation_sip (heap_segment* current_heap_segment, void* profiling_context, record_surv_fn fn)
{
    while (current_heap_segment && heap_segment_swept_in_plan (current_heap_segment))
    {
        uint8_t* start = heap_segment_mem (current_heap_segment);
        uint8_t* end = heap_segment_allocated (current_heap_segment);
        uint8_t* obj = start;
        uint8_t* plug_start = nullptr;
        while (obj < end)
        {
            if (((CObjectHeader*)obj)->IsFree())
            {
                if (plug_start)
                {
                    fn (plug_start, obj, 0, profiling_context, false, false);
                    plug_start = nullptr;
                }
            }
            else
            {
                if (!plug_start)
                {
                    plug_start = obj;
                }
            }

            obj += Align (size (obj));
        }
        if (plug_start)
        {
            fn (plug_start, end, 0, profiling_context, false, false);
        }
        current_heap_segment = heap_segment_next_rw (current_heap_segment);
    }
    return current_heap_segment;
}
#endif // USE_REGIONS

void gc_heap::walk_survivors (record_surv_fn fn, void* context, walk_surv_type type)
{
    if (type == walk_for_gc)
        walk_survivors_relocation (context, fn);
#if defined(BACKGROUND_GC) && defined(FEATURE_EVENT_TRACE)
    else if (type == walk_for_bgc)
        walk_survivors_for_bgc (context, fn);
#endif //BACKGROUND_GC && FEATURE_EVENT_TRACE
    else
        assert (!"unknown type!");
}

#if defined(BACKGROUND_GC) && defined(FEATURE_EVENT_TRACE)
void gc_heap::walk_survivors_for_bgc (void* profiling_context, record_surv_fn fn)
{
    assert(settings.concurrent);

    for (int i = get_start_generation_index(); i < total_generation_count; i++)
    {
        int align_const = get_alignment_constant (i == max_generation);
        heap_segment* seg = heap_segment_rw (generation_start_segment (generation_of (i)));

        while (seg)
        {
            uint8_t* o = heap_segment_mem (seg);
            uint8_t* end = heap_segment_allocated (seg);

            while (o < end)
            {
                if (method_table(o) == g_gc_pFreeObjectMethodTable)
                {
                    o += Align (size (o), align_const);
                    continue;
                }

                // It's survived. Make a fake plug, starting at o,
                // and send the event

                uint8_t* plug_start = o;

                while (method_table(o) != g_gc_pFreeObjectMethodTable)
                {
                    o += Align (size (o), align_const);
                    if (o >= end)
                    {
                        break;
                    }
                }

                uint8_t* plug_end = o;

                fn (plug_start,
                    plug_end,
                    0,              // Reloc distance == 0 as this is non-compacting
                    profiling_context,
                    false,          // Non-compacting
                    true);          // BGC
            }

            seg = heap_segment_next (seg);
        }
    }
}
#endif //BACKGROUND_GC && FEATURE_EVENT_TRACE

void gc_heap::relocate_phase (int condemned_gen_number,
                              uint8_t* first_condemned_address)
{
    ScanContext sc;
    sc.thread_number = heap_number;
    sc.thread_count = n_heaps;
    sc.promotion = FALSE;
    sc.concurrent = FALSE;

#ifdef MULTIPLE_HEAPS
    //join all threads to make sure they are synchronized
    dprintf(3, ("Joining after end of plan"));
    gc_t_join.join(this, gc_join_begin_relocate_phase);
    if (gc_t_join.joined())
    {
#endif //MULTIPLE_HEAPS

#ifdef FEATURE_EVENT_TRACE
        if (informational_event_enabled_p)
        {
            gc_time_info[time_relocate] = GetHighPrecisionTimeStamp();
        }
#endif //FEATURE_EVENT_TRACE

#ifdef USE_REGIONS
        verify_region_to_generation_map();
#endif //USE_REGIONS

#ifdef MULTIPLE_HEAPS
        //join all threads to make sure they are synchronized
        dprintf(3, ("Restarting for relocation"));
        gc_t_join.restart();
    }
#endif //MULTIPLE_HEAPS

    dprintf (2, (ThreadStressLog::gcStartRelocateMsg(), heap_number));

    dprintf(3,("Relocating roots"));
    GCScan::GcScanRoots(GCHeap::Relocate,
                            condemned_gen_number, max_generation, &sc);

    verify_pins_with_post_plug_info("after reloc stack");

#ifdef BACKGROUND_GC
    if (gc_heap::background_running_p())
    {
        scan_background_roots (GCHeap::Relocate, heap_number, &sc);
    }
#endif //BACKGROUND_GC

#ifdef FEATURE_CARD_MARKING_STEALING
    // for card marking stealing, do the other relocations *before* we scan the older generations
    // this gives us a chance to make up for imbalance in these phases later
    {
        dprintf(3, ("Relocating survivors"));
        relocate_survivors(condemned_gen_number,
            first_condemned_address);
    }

#ifdef FEATURE_PREMORTEM_FINALIZATION
    dprintf(3, ("Relocating finalization data"));
    finalize_queue->RelocateFinalizationData(condemned_gen_number,
        __this);
#endif // FEATURE_PREMORTEM_FINALIZATION

    {
        dprintf(3, ("Relocating handle table"));
        GCScan::GcScanHandles(GCHeap::Relocate,
            condemned_gen_number, max_generation, &sc);
    }
#endif // FEATURE_CARD_MARKING_STEALING

    if (condemned_gen_number != max_generation)
    {
#if defined(MULTIPLE_HEAPS) && defined(FEATURE_CARD_MARKING_STEALING)
        if (!card_mark_done_soh)
#endif // MULTIPLE_HEAPS && FEATURE_CARD_MARKING_STEALING
        {
            dprintf (3, ("Relocating cross generation pointers on heap %d", heap_number));
            mark_through_cards_for_segments(&gc_heap::relocate_address, TRUE THIS_ARG);
            verify_pins_with_post_plug_info("after reloc cards");
#if defined(MULTIPLE_HEAPS) && defined(FEATURE_CARD_MARKING_STEALING)
            card_mark_done_soh = true;
#endif // MULTIPLE_HEAPS && FEATURE_CARD_MARKING_STEALING
        }
    }
    if (condemned_gen_number != max_generation)
    {
#if defined(MULTIPLE_HEAPS) && defined(FEATURE_CARD_MARKING_STEALING)
        if (!card_mark_done_uoh)
#endif // MULTIPLE_HEAPS && FEATURE_CARD_MARKING_STEALING
        {
            dprintf (3, ("Relocating cross generation pointers for uoh objects on heap %d", heap_number));
            for (int i = uoh_start_generation; i < total_generation_count; i++)
            {
#ifndef ALLOW_REFERENCES_IN_POH
                if (i != poh_generation)
#endif //ALLOW_REFERENCES_IN_POH
                    mark_through_cards_for_uoh_objects(&gc_heap::relocate_address, i, TRUE THIS_ARG);
            }

#if defined(MULTIPLE_HEAPS) && defined(FEATURE_CARD_MARKING_STEALING)
            card_mark_done_uoh = true;
#endif // MULTIPLE_HEAPS && FEATURE_CARD_MARKING_STEALING
        }
    }
    else
    {
#ifdef FEATURE_LOH_COMPACTION
        if (loh_compacted_p)
        {
            assert (settings.condemned_generation == max_generation);
            relocate_in_loh_compact();
        }
        else
#endif //FEATURE_LOH_COMPACTION
        {
            relocate_in_uoh_objects (loh_generation);
        }

#ifdef ALLOW_REFERENCES_IN_POH
        relocate_in_uoh_objects (poh_generation);
#endif
    }
#ifndef FEATURE_CARD_MARKING_STEALING
    // moved this code *before* we scan the older generations via mark_through_cards_xxx
    // this gives us a chance to have mark_through_cards_xxx make up for imbalance in the other relocations
    {
        dprintf(3,("Relocating survivors"));
        relocate_survivors (condemned_gen_number,
                            first_condemned_address);
    }

#ifdef FEATURE_PREMORTEM_FINALIZATION
        dprintf(3,("Relocating finalization data"));
        finalize_queue->RelocateFinalizationData (condemned_gen_number,
                                                       __this);
#endif // FEATURE_PREMORTEM_FINALIZATION

    {
        dprintf(3,("Relocating handle table"));
        GCScan::GcScanHandles(GCHeap::Relocate,
                                  condemned_gen_number, max_generation, &sc);
    }
#endif // !FEATURE_CARD_MARKING_STEALING


#if defined(MULTIPLE_HEAPS) && defined(FEATURE_CARD_MARKING_STEALING)
    if (condemned_gen_number != max_generation)
    {
        // check the other heaps cyclically and try to help out where the relocation isn't done
        for (int i = 0; i < gc_heap::n_heaps; i++)
        {
            int heap_number_to_look_at = (i + heap_number) % gc_heap::n_heaps;
            gc_heap* hp = gc_heap::g_heaps[heap_number_to_look_at];
            if (!hp->card_mark_done_soh)
            {
                dprintf(3, ("Relocating cross generation pointers on heap %d", hp->heap_number));
                hp->mark_through_cards_for_segments(&gc_heap::relocate_address, TRUE THIS_ARG);
                hp->card_mark_done_soh = true;
            }

            if (!hp->card_mark_done_uoh)
            {
                dprintf(3, ("Relocating cross generation pointers for uoh objects on heap %d", hp->heap_number));
                for (int i = uoh_start_generation; i < total_generation_count; i++)
                {
#ifndef ALLOW_REFERENCES_IN_POH
                    if (i != poh_generation)
#endif //ALLOW_REFERENCES_IN_POH
                        hp->mark_through_cards_for_uoh_objects(&gc_heap::relocate_address, i, TRUE THIS_ARG);
                }
                hp->card_mark_done_uoh = true;
            }
        }
    }
#endif // MULTIPLE_HEAPS && FEATURE_CARD_MARKING_STEALING

    dprintf(2, (ThreadStressLog::gcEndRelocateMsg(), heap_number));
}

// This compares to see if tree is the current pinned plug and returns info
// for this pinned plug. Also advances the pinned queue if that's the case.
//
// We don't change the values of the plug info if tree is not the same as
// the current pinned plug - the caller is responsible for setting the right
// values to begin with.
//
// POPO TODO: We are keeping this temporarily as this is also used by realloc
// where it passes FALSE to deque_p, change it to use the same optimization
// as relocate. Not as essential since realloc is already a slow path.
mark* gc_heap::get_next_pinned_entry (uint8_t* tree,
                                      BOOL* has_pre_plug_info_p,
                                      BOOL* has_post_plug_info_p,
                                      BOOL deque_p)
{
    if (!pinned_plug_que_empty_p())
    {
        mark* oldest_entry = oldest_pin();
        uint8_t* oldest_plug = pinned_plug (oldest_entry);
        if (tree == oldest_plug)
        {
            *has_pre_plug_info_p =  oldest_entry->has_pre_plug_info();
            *has_post_plug_info_p = oldest_entry->has_post_plug_info();

            if (deque_p)
            {
                deque_pinned_plug();
            }

            dprintf (3, ("found a pinned plug %p, pre: %d, post: %d",
                tree,
                (*has_pre_plug_info_p ? 1 : 0),
                (*has_post_plug_info_p ? 1 : 0)));

            return oldest_entry;
        }
    }

    return NULL;
}

// This also deques the oldest entry and update the oldest plug
mark* gc_heap::get_oldest_pinned_entry (BOOL* has_pre_plug_info_p,
                                        BOOL* has_post_plug_info_p)
{
    mark* oldest_entry = oldest_pin();
    *has_pre_plug_info_p =  oldest_entry->has_pre_plug_info();
    *has_post_plug_info_p = oldest_entry->has_post_plug_info();

    deque_pinned_plug();
    update_oldest_pinned_plug();
    return oldest_entry;
}

inline
void gc_heap::copy_cards_range (uint8_t* dest, uint8_t* src, size_t len, BOOL copy_cards_p)
{
    if (copy_cards_p)
        copy_cards_for_addresses (dest, src, len);
    else
        clear_card_for_addresses (dest, dest + len);
}

// POPO TODO: We should actually just recover the artificially made gaps here..because when we copy
// we always copy the earlier plugs first which means we won't need the gap sizes anymore. This way
// we won't need to individually recover each overwritten part of plugs.
inline
void  gc_heap::gcmemcopy (uint8_t* dest, uint8_t* src, size_t len, BOOL copy_cards_p)
{
    if (dest != src)
    {
#ifdef BACKGROUND_GC
        if (current_c_gc_state == c_gc_state_marking)
        {
            //TODO: should look to see whether we should consider changing this
            // to copy a consecutive region of the mark array instead.
            copy_mark_bits_for_addresses (dest, src, len);
        }
#endif //BACKGROUND_GC

#ifdef DOUBLY_LINKED_FL
        BOOL set_bgc_mark_bits_p = is_plug_bgc_mark_bit_set (src);
        if (set_bgc_mark_bits_p)
        {
            clear_plug_bgc_mark_bit (src);
        }

        BOOL make_free_obj_p = FALSE;
        if (len <= min_free_item_no_prev)
        {
            make_free_obj_p = is_free_obj_in_compact_bit_set (src);

            if (make_free_obj_p)
            {
                clear_free_obj_in_compact_bit (src);
            }
        }
#endif //DOUBLY_LINKED_FL

        //dprintf(3,(" Memcopy [%p->%p, %p->%p[", (size_t)src, (size_t)dest, (size_t)src+len, (size_t)dest+len));
        dprintf(3,(ThreadStressLog::gcMemCopyMsg(), (size_t)src, (size_t)dest, (size_t)src+len, (size_t)dest+len));
        memcopy (dest - plug_skew, src - plug_skew, len);

#ifdef DOUBLY_LINKED_FL
        if (set_bgc_mark_bits_p)
        {
            uint8_t* dest_o = dest;
            uint8_t* dest_end_o = dest + len;
            while (dest_o < dest_end_o)
            {
                uint8_t* next_o = dest_o + Align (size (dest_o));
                background_mark (dest_o, background_saved_lowest_address, background_saved_highest_address);

                dest_o = next_o;
            }
            dprintf (3333, ("[h%d] GM: %p(%zx-%zx)->%p(%zx-%zx)",
                heap_number, dest,
                (size_t)(&mark_array [mark_word_of (dest)]),
                (size_t)(mark_array [mark_word_of (dest)]),
                dest_end_o,
                (size_t)(&mark_array [mark_word_of (dest_o)]),
                (size_t)(mark_array [mark_word_of (dest_o)])));
        }

        if (make_free_obj_p)
        {
            size_t* filler_free_obj_size_location = (size_t*)(dest + min_free_item_no_prev);
            size_t filler_free_obj_size = *filler_free_obj_size_location;
            make_unused_array ((dest + len), filler_free_obj_size);
            dprintf (3333, ("[h%d] smallobj, %p(%zd): %p->%p", heap_number,
                filler_free_obj_size_location, filler_free_obj_size, (dest + len), (dest + len + filler_free_obj_size)));
        }
#endif //DOUBLY_LINKED_FL

#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
        if (SoftwareWriteWatch::IsEnabledForGCHeap())
        {
            // The ranges [src - plug_kew .. src[ and [src + len - plug_skew .. src + len[ are ObjHeaders, which don't have GC
            // references, and are not relevant for write watch. The latter range actually corresponds to the ObjHeader for the
            // object at (src + len), so it can be ignored anyway.
            SoftwareWriteWatch::SetDirtyRegion(dest, len - plug_skew);
        }
#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
        copy_cards_range (dest, src, len, copy_cards_p);
    }
}

void gc_heap::compact_plug (uint8_t* plug, size_t size, BOOL check_last_object_p, compact_args* args)
{
    args->print();
    uint8_t* reloc_plug = plug + args->last_plug_relocation;

    if (check_last_object_p)
    {
        size += sizeof (gap_reloc_pair);
        mark* entry = args->pinned_plug_entry;

        if (args->is_shortened)
        {
            assert (entry->has_post_plug_info());
            entry->swap_post_plug_and_saved();
        }
        else
        {
            assert (entry->has_pre_plug_info());
            entry->swap_pre_plug_and_saved();
        }
    }

    int  old_brick_entry =  brick_table [brick_of (plug)];

    assert (node_relocation_distance (plug) == args->last_plug_relocation);

#ifdef FEATURE_STRUCTALIGN
    ptrdiff_t alignpad = node_alignpad(plug);
    if (alignpad)
    {
        make_unused_array (reloc_plug - alignpad, alignpad);
        if (brick_of (reloc_plug - alignpad) != brick_of (reloc_plug))
        {
            // The alignment padding is straddling one or more bricks;
            // it has to be the last "object" of its first brick.
            fix_brick_to_highest (reloc_plug - alignpad, reloc_plug);
        }
    }
#else // FEATURE_STRUCTALIGN
    size_t unused_arr_size = 0;
    BOOL  already_padded_p = FALSE;
#ifdef SHORT_PLUGS
    if (is_plug_padded (plug))
    {
        already_padded_p = TRUE;
        clear_plug_padded (plug);
        unused_arr_size = Align (min_obj_size);
    }
#endif //SHORT_PLUGS
    if (node_realigned (plug))
    {
        unused_arr_size += switch_alignment_size (already_padded_p);
    }

    if (unused_arr_size != 0)
    {
        make_unused_array (reloc_plug - unused_arr_size, unused_arr_size);

        if (brick_of (reloc_plug - unused_arr_size) != brick_of (reloc_plug))
        {
            dprintf (3, ("fix B for padding: %zd: %p->%p",
                unused_arr_size, (reloc_plug - unused_arr_size), reloc_plug));
            // The alignment padding is straddling one or more bricks;
            // it has to be the last "object" of its first brick.
            fix_brick_to_highest (reloc_plug - unused_arr_size, reloc_plug);
        }
    }
#endif // FEATURE_STRUCTALIGN

#ifdef SHORT_PLUGS
    if (is_plug_padded (plug))
    {
        make_unused_array (reloc_plug - Align (min_obj_size), Align (min_obj_size));

        if (brick_of (reloc_plug - Align (min_obj_size)) != brick_of (reloc_plug))
        {
            // The alignment padding is straddling one or more bricks;
            // it has to be the last "object" of its first brick.
            fix_brick_to_highest (reloc_plug - Align (min_obj_size), reloc_plug);
        }
    }
#endif //SHORT_PLUGS

    gcmemcopy (reloc_plug, plug, size, args->copy_cards_p);

    if (args->check_gennum_p)
    {
        int src_gennum = args->src_gennum;
        if (src_gennum == -1)
        {
            src_gennum = object_gennum (plug);
        }

        int dest_gennum = object_gennum_plan (reloc_plug);

        if (src_gennum < dest_gennum)
        {
            generation_allocation_size (generation_of (dest_gennum)) += size;
        }
    }

    size_t current_reloc_brick = args->current_compacted_brick;

    if (brick_of (reloc_plug) != current_reloc_brick)
    {
        dprintf (3, ("last reloc B: %zx, current reloc B: %zx",
            current_reloc_brick, brick_of (reloc_plug)));

        if (args->before_last_plug)
        {
            dprintf (3,(" fixing last brick %zx to point to last plug %p(%zx)",
                     current_reloc_brick,
                     args->before_last_plug,
                     (args->before_last_plug - brick_address (current_reloc_brick))));

            {
                set_brick (current_reloc_brick,
                        args->before_last_plug - brick_address (current_reloc_brick));
            }
        }
        current_reloc_brick = brick_of (reloc_plug);
    }
    size_t end_brick = brick_of (reloc_plug + size-1);
    if (end_brick != current_reloc_brick)
    {
        // The plug is straddling one or more bricks
        // It has to be the last plug of its first brick
        dprintf (3,("plug spanning multiple bricks, fixing first brick %zx to %zx(%zx)",
                 current_reloc_brick, (size_t)reloc_plug,
                 (reloc_plug - brick_address (current_reloc_brick))));

        {
            set_brick (current_reloc_brick,
                    reloc_plug - brick_address (current_reloc_brick));
        }
        // update all intervening brick
        size_t brick = current_reloc_brick + 1;
        dprintf (3,("setting intervening bricks %zu->%zu to -1",
            brick, (end_brick - 1)));
        while (brick < end_brick)
        {
            set_brick (brick, -1);
            brick++;
        }
        // code last brick offset as a plug address
        args->before_last_plug = brick_address (end_brick) -1;
        current_reloc_brick = end_brick;
        dprintf (3, ("setting before last to %p, last brick to %zx",
            args->before_last_plug, current_reloc_brick));
    }
    else
    {
        dprintf (3, ("still in the same brick: %zx", end_brick));
        args->before_last_plug = reloc_plug;
    }
    args->current_compacted_brick = current_reloc_brick;

    if (check_last_object_p)
    {
        mark* entry = args->pinned_plug_entry;

        if (args->is_shortened)
        {
            entry->swap_post_plug_and_saved();
        }
        else
        {
            entry->swap_pre_plug_and_saved();
        }
    }
}

void gc_heap::compact_in_brick (uint8_t* tree, compact_args* args)
{
    assert (tree != NULL);
    int   left_node = node_left_child (tree);
    int   right_node = node_right_child (tree);
    ptrdiff_t relocation = node_relocation_distance (tree);

    args->print();

    if (left_node)
    {
        dprintf (3, ("B: L: %d->%p", left_node, (tree + left_node)));
        compact_in_brick ((tree + left_node), args);
    }

    uint8_t*  plug = tree;
    BOOL   has_pre_plug_info_p = FALSE;
    BOOL   has_post_plug_info_p = FALSE;

    if (tree == oldest_pinned_plug)
    {
        args->pinned_plug_entry = get_oldest_pinned_entry (&has_pre_plug_info_p,
                                                           &has_post_plug_info_p);
        assert (tree == pinned_plug (args->pinned_plug_entry));
    }

    if (args->last_plug != 0)
    {
        size_t gap_size = node_gap_size (tree);
        uint8_t*  gap = (plug - gap_size);
        uint8_t*  last_plug_end = gap;
        size_t last_plug_size = (last_plug_end - args->last_plug);
        assert ((last_plug_size & (sizeof(PTR_PTR) - 1)) == 0);
        dprintf (3, ("tree: %p, last_plug: %p, gap: %p(%zx), last_plug_end: %p, size: %zx",
            tree, args->last_plug, gap, gap_size, last_plug_end, last_plug_size));

        BOOL check_last_object_p = (args->is_shortened || has_pre_plug_info_p);
        if (!check_last_object_p)
        {
            assert (last_plug_size >= Align (min_obj_size));
        }

        compact_plug (args->last_plug, last_plug_size, check_last_object_p, args);
    }
    else
    {
        assert (!has_pre_plug_info_p);
    }

    dprintf (3, ("set args last plug to plug: %p, reloc: %zx", plug, relocation));
    args->last_plug = plug;
    args->last_plug_relocation = relocation;
    args->is_shortened = has_post_plug_info_p;

    if (right_node)
    {
        dprintf (3, ("B: R: %d->%p", right_node, (tree + right_node)));
        compact_in_brick ((tree + right_node), args);
    }
}

// This returns the recovered size for gen2 plugs as that's what we need
// mostly - would be nice to make it work for all generations.
size_t gc_heap::recover_saved_pinned_info()
{
    reset_pinned_queue_bos();
    size_t total_recovered_sweep_size = 0;

    while (!(pinned_plug_que_empty_p()))
    {
        mark* oldest_entry = oldest_pin();
        size_t recovered_sweep_size = oldest_entry->recover_plug_info();

        if (recovered_sweep_size > 0)
        {
            uint8_t* plug = pinned_plug (oldest_entry);
            if (object_gennum (plug) == max_generation)
            {
                dprintf (3, ("recovered %p(%zd) from pin", plug, recovered_sweep_size));
                total_recovered_sweep_size += recovered_sweep_size;
            }
        }
#ifdef GC_CONFIG_DRIVEN
        if (oldest_entry->has_pre_plug_info() && oldest_entry->has_post_plug_info())
            record_interesting_data_point (idp_pre_and_post_pin);
        else if (oldest_entry->has_pre_plug_info())
            record_interesting_data_point (idp_pre_pin);
        else if (oldest_entry->has_post_plug_info())
            record_interesting_data_point (idp_post_pin);
#endif //GC_CONFIG_DRIVEN

        deque_pinned_plug();
    }

    return total_recovered_sweep_size;
}

void gc_heap::compact_phase (int condemned_gen_number,
                             uint8_t*  first_condemned_address,
                             BOOL clear_cards)
{
#ifdef MULTIPLE_HEAPS
    dprintf(3, ("Joining after end of relocation"));
    gc_t_join.join(this, gc_join_relocate_phase_done);
    if (gc_t_join.joined())
#endif //MULTIPLE_HEAPS
    {
#ifdef FEATURE_EVENT_TRACE
        if (informational_event_enabled_p)
        {
            gc_time_info[time_compact] = GetHighPrecisionTimeStamp();
            gc_time_info[time_relocate] = gc_time_info[time_compact] - gc_time_info[time_relocate];
        }
#endif //FEATURE_EVENT_TRACE

#ifdef MULTIPLE_HEAPS
        dprintf(3, ("Restarting for compaction"));
        gc_t_join.restart();
#endif //MULTIPLE_HEAPS
    }

    dprintf (2, (ThreadStressLog::gcStartCompactMsg(), heap_number,
        first_condemned_address, brick_of (first_condemned_address)));

#ifdef FEATURE_LOH_COMPACTION
    if (loh_compacted_p)
    {
        compact_loh();
    }
#endif //FEATURE_LOH_COMPACTION

    reset_pinned_queue_bos();
    update_oldest_pinned_plug();
    BOOL reused_seg = expand_reused_seg_p();
    if (reused_seg)
    {
        for (int i = 1; i <= max_generation; i++)
        {
            generation_allocation_size (generation_of (i)) = 0;
        }
    }

    int stop_gen_idx = get_stop_generation_index (condemned_gen_number);
    for (int i = condemned_gen_number; i >= stop_gen_idx; i--)
    {
        generation* condemned_gen = generation_of (i);
        heap_segment* current_heap_segment = get_start_segment (condemned_gen);
#ifdef USE_REGIONS
        if (!current_heap_segment)
            continue;

        size_t   current_brick = brick_of (heap_segment_mem (current_heap_segment));
#else
        size_t   current_brick = brick_of (first_condemned_address);
#endif //USE_REGIONS

        uint8_t*  end_address = heap_segment_allocated (current_heap_segment);

#ifndef USE_REGIONS
        if ((first_condemned_address >= end_address) && (condemned_gen_number < max_generation))
        {
            return;
        }
#endif //!USE_REGIONS

        size_t  end_brick = brick_of (end_address-1);
        compact_args args;
        args.last_plug = 0;
        args.before_last_plug = 0;
        args.current_compacted_brick = ~((size_t)1);
        args.is_shortened = FALSE;
        args.pinned_plug_entry = 0;
        args.copy_cards_p =  (condemned_gen_number >= 1) || !clear_cards;
        args.check_gennum_p = reused_seg;
        if (args.check_gennum_p)
        {
            args.src_gennum = ((current_heap_segment == ephemeral_heap_segment) ? -1 : 2);
        }
#ifdef USE_REGIONS
        assert (!args.check_gennum_p);
#endif //USE_REGIONS

        while (1)
        {
            if (current_brick > end_brick)
            {
                if (args.last_plug != 0)
                {
                    dprintf (3, ("compacting last plug: %p", args.last_plug))
                    compact_plug (args.last_plug,
                                  (heap_segment_allocated (current_heap_segment) - args.last_plug),
                                  args.is_shortened,
                                  &args);
                }

                heap_segment* next_heap_segment = heap_segment_next_non_sip (current_heap_segment);
                if (next_heap_segment)
                {
                    current_heap_segment = next_heap_segment;
                    current_brick = brick_of (heap_segment_mem (current_heap_segment));
                    end_brick = brick_of (heap_segment_allocated (current_heap_segment)-1);
                    args.last_plug = 0;
                    if (args.check_gennum_p)
                    {
                        args.src_gennum = ((current_heap_segment == ephemeral_heap_segment) ? -1 : 2);
                    }
                    continue;
                }
                else
                {
                    if (args.before_last_plug !=0)
                    {
                        dprintf (3, ("Fixing last brick %zx to point to plug %zx",
                                    args.current_compacted_brick, (size_t)args.before_last_plug));
                        assert (args.current_compacted_brick != ~1u);
                        set_brick (args.current_compacted_brick,
                                   args.before_last_plug - brick_address (args.current_compacted_brick));
                    }
                    break;
                }
            }
            {
                int  brick_entry =  brick_table [ current_brick ];
                dprintf (3, ("B: %zx(%zx)->%p",
                    current_brick, (size_t)brick_entry, (brick_address (current_brick) + brick_entry - 1)));

                if (brick_entry >= 0)
                {
                    compact_in_brick ((brick_address (current_brick) + brick_entry -1),
                                      &args);

                }
            }
            current_brick++;
        }
    }

    recover_saved_pinned_info();

    concurrent_print_time_delta ("compact end");

    dprintf (2, (ThreadStressLog::gcEndCompactMsg(), heap_number));
}

#ifdef MULTIPLE_HEAPS

#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable:4702) // C4702: unreachable code: gc_thread_function may not return
#endif //_MSC_VER
void gc_heap::gc_thread_stub (void* arg)
{
    gc_heap* heap = (gc_heap*)arg;
    if (!gc_thread_no_affinitize_p)
    {
        // We are about to set affinity for GC threads. It is a good place to set up NUMA and
        // CPU groups because the process mask, processor number, and group number are all
        // readily available.
        set_thread_affinity_for_heap (heap->heap_number, heap_select::find_proc_no_from_heap_no (heap->heap_number));
    }

    // server GC threads run at a higher priority than normal.
    GCToOSInterface::BoostThreadPriority();
    void* tmp = _alloca (256*heap->heap_number);
    heap->gc_thread_function();
}
#ifdef _MSC_VER
#pragma warning(pop)
#endif //_MSC_VER

#endif //MULTIPLE_HEAPS

#ifdef BACKGROUND_GC

#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable:4702) // C4702: unreachable code: gc_thread_function may not return
#endif //_MSC_VER
void gc_heap::bgc_thread_stub (void* arg)
{
    gc_heap* heap = (gc_heap*)arg;

#ifdef STRESS_DYNAMIC_HEAP_COUNT
    // We should only do this every so often; otherwise we'll never be able to do a BGC
    int r = (int)gc_rand::get_rand (30);
    bool wait_p = (r < 10);

    if (wait_p)
    {
        GCToOSInterface::Sleep (100);
    }
    dprintf (6666, ("h%d %s", heap->heap_number, (wait_p ? "waited" : "did not wait")));
#endif

    heap->bgc_thread = GCToEEInterface::GetThread();
    assert(heap->bgc_thread != nullptr);
    heap->bgc_thread_function();
}
#ifdef _MSC_VER
#pragma warning(pop)
#endif //_MSC_VER

void gc_heap::background_drain_mark_list (int thread)
{
#ifndef MULTIPLE_HEAPS
    UNREFERENCED_PARAMETER(thread);
#endif //!MULTIPLE_HEAPS

    size_t saved_c_mark_list_index = c_mark_list_index;

    if (saved_c_mark_list_index)
    {
        concurrent_print_time_delta ("SML");
    }
    while (c_mark_list_index != 0)
    {
        size_t current_index = c_mark_list_index - 1;
        uint8_t* o = c_mark_list [current_index];
        background_mark_object (o THREAD_NUMBER_ARG);
        c_mark_list_index--;
    }
    if (saved_c_mark_list_index)
    {
        concurrent_print_time_delta ("EML");
    }

    fire_drain_mark_list_event (saved_c_mark_list_index);
}


// The background GC version of scan_dependent_handles (see that method for a more in-depth comment).
#ifdef MULTIPLE_HEAPS
// Since we only scan dependent handles while we are stopped we'll never interfere with FGCs scanning
// them. So we can use the same static variables.
void gc_heap::background_scan_dependent_handles (ScanContext *sc)
{
    // Whenever we call this method there may have been preceding object promotions. So set
    // s_fUnscannedPromotions unconditionally (during further iterations of the scanning loop this will be set
    // based on the how the scanning proceeded).
    s_fUnscannedPromotions = TRUE;

    // We don't know how many times we need to loop yet. In particular we can't base the loop condition on
    // the state of this thread's portion of the dependent handle table. That's because promotions on other
    // threads could cause handle promotions to become necessary here. Even if there are definitely no more
    // promotions possible in this thread's handles, we still have to stay in lock-step with those worker
    // threads that haven't finished yet (each GC worker thread has to join exactly the same number of times
    // as all the others or they'll get out of step).
    while (true)
    {
        // The various worker threads are all currently racing in this code. We need to work out if at least
        // one of them think they have work to do this cycle. Each thread needs to rescan its portion of the
        // dependent handle table when both of the following conditions apply:
        //  1) At least one (arbitrary) object might have been promoted since the last scan (because if this
        //     object happens to correspond to a primary in one of our handles we might potentially have to
        //     promote the associated secondary).
        //  2) The table for this thread has at least one handle with a secondary that isn't promoted yet.
        //
        // The first condition is represented by s_fUnscannedPromotions. This is always non-zero for the first
        // iteration of this loop (see comment above) and in subsequent cycles each thread updates this
        // whenever a mark stack overflow occurs or scanning their dependent handles results in a secondary
        // being promoted. This value is cleared back to zero in a synchronized fashion in the join that
        // follows below. Note that we can't read this outside of the join since on any iteration apart from
        // the first threads will be racing between reading this value and completing their previous
        // iteration's table scan.
        //
        // The second condition is tracked by the dependent handle code itself on a per worker thread basis
        // (and updated by the GcDhReScan() method). We call GcDhUnpromotedHandlesExist() on each thread to
        // determine the local value and collect the results into the s_fUnpromotedHandles variable in what is
        // effectively an OR operation. As per s_fUnscannedPromotions we can't read the final result until
        // we're safely joined.
        if (GCScan::GcDhUnpromotedHandlesExist(sc))
            s_fUnpromotedHandles = TRUE;

        // Synchronize all the threads so we can read our state variables safely. The following shared
        // variable (indicating whether we should scan the tables or terminate the loop) will be set by a
        // single thread inside the join.
        bgc_t_join.join(this, gc_join_scan_dependent_handles);
        if (bgc_t_join.joined())
        {
            // We're synchronized so it's safe to read our shared state variables. We update another shared
            // variable to indicate to all threads whether we'll be scanning for another cycle or terminating
            // the loop. We scan if there has been at least one object promotion since last time and at least
            // one thread has a dependent handle table with a potential handle promotion possible.
            s_fScanRequired = s_fUnscannedPromotions && s_fUnpromotedHandles;

            // Reset our shared state variables (ready to be set again on this scan or with a good initial
            // value for the next call if we're terminating the loop).
            s_fUnscannedPromotions = FALSE;
            s_fUnpromotedHandles = FALSE;

            if (!s_fScanRequired)
            {
#ifdef USE_REGIONS
                BOOL all_heaps_background_overflow_p = FALSE;
#else //USE_REGIONS
                uint8_t* all_heaps_max = 0;
                uint8_t* all_heaps_min = MAX_PTR;
#endif //USE_REGIONS
                int i;
                for (i = 0; i < n_heaps; i++)
                {
#ifdef USE_REGIONS
                    // in the regions case, compute the OR of all the per-heap flags
                    if (g_heaps[i]->background_overflow_p)
                        all_heaps_background_overflow_p = TRUE;
#else //USE_REGIONS
                    if (all_heaps_max < g_heaps[i]->background_max_overflow_address)
                        all_heaps_max = g_heaps[i]->background_max_overflow_address;
                    if (all_heaps_min > g_heaps[i]->background_min_overflow_address)
                        all_heaps_min = g_heaps[i]->background_min_overflow_address;
#endif //USE_REGIONS
                }
                for (i = 0; i < n_heaps; i++)
                {
#ifdef USE_REGIONS
                    g_heaps[i]->background_overflow_p = all_heaps_background_overflow_p;
#else //USE_REGIONS
                    g_heaps[i]->background_max_overflow_address = all_heaps_max;
                    g_heaps[i]->background_min_overflow_address = all_heaps_min;
#endif //USE_REGIONS
                }
            }

            dprintf(2, ("Starting all gc thread mark stack overflow processing"));
            bgc_t_join.restart();
        }

        // Handle any mark stack overflow: scanning dependent handles relies on all previous object promotions
        // being visible. If there really was an overflow (process_mark_overflow returns true) then set the
        // global flag indicating that at least one object promotion may have occurred (the usual comment
        // about races applies). (Note it's OK to set this flag even if we're about to terminate the loop and
        // exit the method since we unconditionally set this variable on method entry anyway).
        if (background_process_mark_overflow (sc->concurrent))
            s_fUnscannedPromotions = TRUE;

        // If we decided that no scan was required we can terminate the loop now.
        if (!s_fScanRequired)
            break;

        // Otherwise we must join with the other workers to ensure that all mark stack overflows have been
        // processed before we start scanning dependent handle tables (if overflows remain while we scan we
        // could miss noting the promotion of some primary objects).
        bgc_t_join.join(this, gc_join_rescan_dependent_handles);
        if (bgc_t_join.joined())
        {
            dprintf(3, ("Starting all gc thread for dependent handle promotion"));
            bgc_t_join.restart();
        }

        // If the portion of the dependent handle table managed by this worker has handles that could still be
        // promoted perform a rescan. If the rescan resulted in at least one promotion note this fact since it
        // could require a rescan of handles on this or other workers.
        if (GCScan::GcDhUnpromotedHandlesExist(sc))
            if (GCScan::GcDhReScan(sc))
                s_fUnscannedPromotions = TRUE;
    }
}
#else
void gc_heap::background_scan_dependent_handles (ScanContext *sc)
{
    // Whenever we call this method there may have been preceding object promotions. So set
    // fUnscannedPromotions unconditionally (during further iterations of the scanning loop this will be set
    // based on the how the scanning proceeded).
    bool fUnscannedPromotions = true;

    // Scan dependent handles repeatedly until there are no further promotions that can be made or we made a
    // scan without performing any new promotions.
    while (GCScan::GcDhUnpromotedHandlesExist(sc) && fUnscannedPromotions)
    {
        // On each iteration of the loop start with the assumption that no further objects have been promoted.
        fUnscannedPromotions = false;

        // Handle any mark stack overflow: scanning dependent handles relies on all previous object promotions
        // being visible. If there was an overflow (background_process_mark_overflow returned true) then
        // additional objects now appear to be promoted and we should set the flag.
        if (background_process_mark_overflow (sc->concurrent))
            fUnscannedPromotions = true;

        // Perform the scan and set the flag if any promotions resulted.
        if (GCScan::GcDhReScan (sc))
            fUnscannedPromotions = true;
    }

    // Perform a last processing of any overflowed mark stack.
    background_process_mark_overflow (sc->concurrent);
}
#endif //MULTIPLE_HEAPS

void gc_heap::recover_bgc_settings()
{
    if ((settings.condemned_generation < max_generation) && gc_heap::background_running_p())
    {
        dprintf (2, ("restoring bgc settings"));
        settings = saved_bgc_settings;
        GCHeap::GcCondemnedGeneration = gc_heap::settings.condemned_generation;
    }
}

void gc_heap::allow_fgc()
{
    assert (bgc_thread == GCToEEInterface::GetThread());
    bool bToggleGC = false;

    if (g_fSuspensionPending > 0)
    {
        bToggleGC = GCToEEInterface::EnablePreemptiveGC();
        if (bToggleGC)
        {
            GCToEEInterface::DisablePreemptiveGC();
        }
    }
}

BOOL gc_heap::is_bgc_in_progress()
{
#ifdef MULTIPLE_HEAPS
    // All heaps are changed to/from the bgc_initialized state during the VM suspension at the start of BGC,
    // so checking any heap will work.
    gc_heap* hp = g_heaps[0];
#else
    gc_heap* hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS

    return (background_running_p() || (hp->current_bgc_state == bgc_initialized));
}

void gc_heap::clear_commit_flag()
{
    for (int i = get_start_generation_index(); i < total_generation_count; i++)
    {
        generation* gen = generation_of (i);
        heap_segment* seg = heap_segment_in_range (generation_start_segment (gen));
        while (seg)
        {
            if (seg->flags & heap_segment_flags_ma_committed)
            {
                seg->flags &= ~heap_segment_flags_ma_committed;
            }

            if (seg->flags & heap_segment_flags_ma_pcommitted)
            {
                seg->flags &= ~heap_segment_flags_ma_pcommitted;
            }

            seg = heap_segment_next (seg);
        }
    }
}

void gc_heap::clear_commit_flag_global()
{
#ifdef MULTIPLE_HEAPS
    for (int i = 0; i < n_heaps; i++)
    {
        g_heaps[i]->clear_commit_flag();
    }
#else
    clear_commit_flag();
#endif //MULTIPLE_HEAPS
}

void gc_heap::verify_mark_array_cleared (uint8_t* begin, uint8_t* end, uint32_t* mark_array_addr)
{
#ifdef _DEBUG
    size_t  markw = mark_word_of (begin);
    size_t  markw_end = mark_word_of (end);

    while (markw < markw_end)
    {
        if (mark_array_addr[markw])
        {
            uint8_t* addr = mark_word_address (markw);
#ifdef USE_REGIONS
            heap_segment* region = region_of (addr);
            dprintf (1, ("The mark bits at 0x%zx:0x%x(addr: 0x%p, r: %zx(%p)) were not cleared",
                            markw, mark_array_addr[markw], addr,
                            (size_t)region, heap_segment_mem (region)));
#else
            dprintf (1, ("The mark bits at 0x%zx:0x%x(addr: 0x%p) were not cleared",
                            markw, mark_array_addr[markw], addr));
#endif //USE_REGIONS
            FATAL_GC_ERROR();
        }
        markw++;
    }
#else // _DEBUG
    UNREFERENCED_PARAMETER(begin);
    UNREFERENCED_PARAMETER(end);
    UNREFERENCED_PARAMETER(mark_array_addr);
#endif //_DEBUG
}

uint8_t* gc_heap::get_start_address (heap_segment* seg)
{
    uint8_t* start =
#ifdef USE_REGIONS
        heap_segment_mem (seg);
#else
        (heap_segment_read_only_p(seg) ? heap_segment_mem (seg) : (uint8_t*)seg);
#endif //USE_REGIONS
    return start;
}

BOOL gc_heap::commit_mark_array_new_seg (gc_heap* hp,
                                         heap_segment* seg,
                                         uint32_t* new_card_table,
                                         uint8_t* new_lowest_address)
{
    uint8_t* start = get_start_address (seg);
    uint8_t* end = heap_segment_reserved (seg);

    uint8_t* lowest = hp->background_saved_lowest_address;
    uint8_t* highest = hp->background_saved_highest_address;

    uint8_t* commit_start = NULL;
    uint8_t* commit_end = NULL;
    size_t commit_flag = 0;

    if ((highest >= start) &&
        (lowest <= end))
    {
        if ((start >= lowest) && (end <= highest))
        {
            dprintf (GC_TABLE_LOG, ("completely in bgc range: seg %p-%p, bgc: %p-%p",
                                    start, end, lowest, highest));
            commit_flag = heap_segment_flags_ma_committed;
        }
        else
        {
            dprintf (GC_TABLE_LOG, ("partially in bgc range: seg %p-%p, bgc: %p-%p",
                                    start, end, lowest, highest));
            commit_flag = heap_segment_flags_ma_pcommitted;
#ifdef USE_REGIONS
            assert (!"Region should not have its mark array partially committed.");
#endif
        }

        commit_start = max (lowest, start);
        commit_end = min (highest, end);

        if (!commit_mark_array_by_range (commit_start, commit_end, hp->mark_array))
        {
            return FALSE;
        }

        if (new_card_table == 0)
        {
            new_card_table = g_gc_card_table;
        }

        if (hp->card_table != new_card_table)
        {
            if (new_lowest_address == 0)
            {
                new_lowest_address = g_gc_lowest_address;
            }

            uint32_t* ct = &new_card_table[card_word (gcard_of (new_lowest_address))];
            uint32_t* ma = (uint32_t*)((uint8_t*)card_table_mark_array (ct) - size_mark_array_of (0, new_lowest_address));

            dprintf (GC_TABLE_LOG, ("table realloc-ed: %p->%p, MA: %p->%p",
                                    hp->card_table, new_card_table,
                                    hp->mark_array, ma));

            if (!commit_mark_array_by_range (commit_start, commit_end, ma))
            {
                return FALSE;
            }
        }

        seg->flags |= commit_flag;
    }

    return TRUE;
}

BOOL gc_heap::commit_mark_array_by_range (uint8_t* begin, uint8_t* end, uint32_t* mark_array_addr)
{
    size_t beg_word = mark_word_of (begin);
    size_t end_word = mark_word_of (align_on_mark_word (end));
    uint8_t* commit_start = align_lower_page ((uint8_t*)&mark_array_addr[beg_word]);
    uint8_t* commit_end = align_on_page ((uint8_t*)&mark_array_addr[end_word]);
    size_t size = (size_t)(commit_end - commit_start);

#ifdef SIMPLE_DPRINTF
    dprintf (GC_TABLE_LOG, ("range: %p->%p mark word: %zx->%zx(%zd), mark array: %p->%p(%zd), commit %p->%p(%zd)",
                            begin, end,
                            beg_word, end_word,
                            (end_word - beg_word) * sizeof (uint32_t),
                            &mark_array_addr[beg_word],
                            &mark_array_addr[end_word],
                            (size_t)(&mark_array_addr[end_word] - &mark_array_addr[beg_word]),
                            commit_start, commit_end,
                            size));
#endif //SIMPLE_DPRINTF

    if (virtual_commit (commit_start, size, recorded_committed_mark_array_bucket))
    {
        // We can only verify the mark array is cleared from begin to end, the first and the last
        // page aren't necessarily all cleared 'cause they could be used by other segments or
        // card bundle.
        verify_mark_array_cleared (begin, end, mark_array_addr);
        return TRUE;
    }
    else
    {
        dprintf (GC_TABLE_LOG, ("failed to commit %zd bytes", (end_word - beg_word) * sizeof (uint32_t)));
        return FALSE;
    }
}

BOOL gc_heap::commit_mark_array_with_check (heap_segment* seg, uint32_t* new_mark_array_addr)
{
    uint8_t* start = get_start_address (seg);
    uint8_t* end = heap_segment_reserved (seg);

#ifdef MULTIPLE_HEAPS
    uint8_t* lowest = heap_segment_heap (seg)->background_saved_lowest_address;
    uint8_t* highest = heap_segment_heap (seg)->background_saved_highest_address;
#else
    uint8_t* lowest = background_saved_lowest_address;
    uint8_t* highest = background_saved_highest_address;
#endif //MULTIPLE_HEAPS

    if ((highest >= start) &&
        (lowest <= end))
    {
        start = max (lowest, start);
        end = min (highest, end);
        if (!commit_mark_array_by_range (start, end, new_mark_array_addr))
        {
            return FALSE;
        }
    }

    return TRUE;
}

BOOL gc_heap::commit_mark_array_by_seg (heap_segment* seg, uint32_t* mark_array_addr)
{
    dprintf (GC_TABLE_LOG, ("seg: %p->%p; MA: %p",
        seg,
        heap_segment_reserved (seg),
        mark_array_addr));
    uint8_t* start = get_start_address (seg);

    return commit_mark_array_by_range (start, heap_segment_reserved (seg), mark_array_addr);
}

BOOL gc_heap::commit_mark_array_bgc_init()
{
    dprintf (GC_TABLE_LOG, ("BGC init commit: lowest: %p, highest: %p, mark_array: %p",
                            lowest_address, highest_address, mark_array));

    for (int i = get_start_generation_index(); i < total_generation_count; i++)
    {
        generation* gen = generation_of (i);
        heap_segment* seg = heap_segment_in_range (generation_start_segment (gen));
        while (seg)
        {
            dprintf (GC_TABLE_LOG, ("h%d gen%d seg: %p(%p-%p), flags: %zd",
                heap_number, i, seg, heap_segment_mem (seg), heap_segment_allocated (seg), seg->flags));

            if (!(seg->flags & heap_segment_flags_ma_committed))
            {
                // For ro segments they could always be only partially in range so we'd
                // be calling this at the beginning of every BGC. We are not making this
                // more efficient right now - ro segments are currently only used by NativeAOT.
                if (heap_segment_read_only_p (seg))
                {
                    if ((heap_segment_mem (seg) >= lowest_address) &&
                        (heap_segment_reserved (seg) <= highest_address))
                    {
                        if (commit_mark_array_by_seg (seg, mark_array))
                        {
                            seg->flags |= heap_segment_flags_ma_committed;
                        }
                        else
                        {
                            return FALSE;
                        }
                    }
                    else
                    {
                        uint8_t* start = max (lowest_address, heap_segment_mem (seg));
                        uint8_t* end = min (highest_address, heap_segment_reserved (seg));
                        if (commit_mark_array_by_range (start, end, mark_array))
                        {
                            seg->flags |= heap_segment_flags_ma_pcommitted;
                        }
                        else
                        {
                            return FALSE;
                        }
                    }
                }
                else
                {
                    // For normal segments they are by design completely in range so just
                    // commit the whole mark array for each seg.
                    if (commit_mark_array_by_seg (seg, mark_array))
                    {
                        if (seg->flags & heap_segment_flags_ma_pcommitted)
                        {
                            seg->flags &= ~heap_segment_flags_ma_pcommitted;
                        }
                        seg->flags |= heap_segment_flags_ma_committed;
                    }
                    else
                    {
                        return FALSE;
                    }
                }
            }

            seg = heap_segment_next (seg);
        }
    }

    return TRUE;
}

// This function doesn't check the commit flag since it's for a new array -
// the mark_array flag for these segments will remain the same.
BOOL gc_heap::commit_new_mark_array (uint32_t* new_mark_array_addr)
{
    dprintf (GC_TABLE_LOG, ("committing existing segs on MA %p", new_mark_array_addr));

    for (int i = get_start_generation_index(); i < total_generation_count; i++)
    {
        generation* gen = generation_of (i);
        heap_segment* seg = heap_segment_in_range (generation_start_segment (gen));
        while (seg)
        {
            if (!commit_mark_array_with_check (seg, new_mark_array_addr))
            {
                return FALSE;
            }

            seg = heap_segment_next (seg);
        }
    }

#if defined(MULTIPLE_HEAPS) && !defined(USE_REGIONS)
    if (new_heap_segment)
    {
        if (!commit_mark_array_with_check (new_heap_segment, new_mark_array_addr))
        {
            return FALSE;
        }
    }
#endif //MULTIPLE_HEAPS && !USE_REGIONS

    return TRUE;
}

BOOL gc_heap::commit_new_mark_array_global (uint32_t* new_mark_array)
{
#ifdef MULTIPLE_HEAPS
    for (int i = 0; i < n_heaps; i++)
    {
        if (!g_heaps[i]->commit_new_mark_array (new_mark_array))
        {
            return FALSE;
        }
    }
#else
    if (!commit_new_mark_array (new_mark_array))
    {
        return FALSE;
    }
#endif //MULTIPLE_HEAPS

    return TRUE;
}

void gc_heap::decommit_mark_array_by_seg (heap_segment* seg)
{
    // if BGC is disabled (the finalize watchdog does this at shutdown), the mark array could have
    // been set to NULL.
    if (mark_array == NULL)
    {
        return;
    }

    dprintf (GC_TABLE_LOG, ("decommitting seg %p(%zx), MA: %p", seg, seg->flags, mark_array));

    size_t flags = seg->flags;

    if ((flags & heap_segment_flags_ma_committed) ||
        (flags & heap_segment_flags_ma_pcommitted))
    {
        uint8_t* start = get_start_address (seg);
        uint8_t* end = heap_segment_reserved (seg);

        if (flags & heap_segment_flags_ma_pcommitted)
        {
            start = max (lowest_address, start);
            end = min (highest_address, end);
        }

        size_t beg_word = mark_word_of (start);
        size_t end_word = mark_word_of (align_on_mark_word (end));
        uint8_t* decommit_start = align_on_page ((uint8_t*)&mark_array[beg_word]);
        uint8_t* decommit_end = align_lower_page ((uint8_t*)&mark_array[end_word]);
        size_t size = (size_t)(decommit_end - decommit_start);

#ifdef SIMPLE_DPRINTF
        dprintf (GC_TABLE_LOG, ("seg: %p mark word: %zx->%zx(%zd), mark array: %p->%p(%zd), decommit %p->%p(%zd)",
                                seg,
                                beg_word, end_word,
                                (end_word - beg_word) * sizeof (uint32_t),
                                &mark_array[beg_word],
                                &mark_array[end_word],
                                (size_t)(&mark_array[end_word] - &mark_array[beg_word]),
                                decommit_start, decommit_end,
                                size));
#endif //SIMPLE_DPRINTF

        if (decommit_start < decommit_end)
        {
            if (!virtual_decommit (decommit_start, size, recorded_committed_mark_array_bucket))
            {
                dprintf (GC_TABLE_LOG, ("decommit on %p for %zd bytes failed",
                                        decommit_start, size));
                assert (!"decommit failed");
            }
        }

        dprintf (GC_TABLE_LOG, ("decommitted [%zx for address [%p", beg_word, seg));
    }
}

bool gc_heap::should_update_end_mark_size()
{
    return ((settings.condemned_generation == (max_generation - 1)) && (current_c_gc_state == c_gc_state_planning));
}

void gc_heap::background_mark_phase ()
{
    verify_mark_array_cleared();

    ScanContext sc;
    sc.thread_number = heap_number;
    sc.thread_count = n_heaps;
    sc.promotion = TRUE;
    sc.concurrent = FALSE;

    THREAD_FROM_HEAP;
    BOOL cooperative_mode = TRUE;
#ifndef MULTIPLE_HEAPS
    const int thread = heap_number;
#endif //!MULTIPLE_HEAPS

    dprintf(2,("-(GC%zu)BMark-", VolatileLoad(&settings.gc_index)));

    assert (settings.concurrent);

    if (gen0_must_clear_bricks > 0)
        gen0_must_clear_bricks--;

    background_soh_alloc_count = 0;
    bgc_overflow_count = 0;

    bpromoted_bytes (heap_number) = 0;
    static uint32_t num_sizedrefs = 0;

#ifdef USE_REGIONS
    background_overflow_p = FALSE;
#else
    background_min_overflow_address = MAX_PTR;
    background_max_overflow_address = 0;
    background_min_soh_overflow_address = MAX_PTR;
    background_max_soh_overflow_address = 0;
#endif //USE_REGIONS
    processed_eph_overflow_p = FALSE;

    //set up the mark lists from g_mark_list
    assert (g_mark_list);
    mark_list = g_mark_list;
    //dont use the mark list for full gc
    //because multiple segments are more complex to handle and the list
    //is likely to overflow
    mark_list_end = &mark_list [0];
    mark_list_index = &mark_list [0];

    c_mark_list_index = 0;

#ifndef MULTIPLE_HEAPS
    shigh = (uint8_t*) 0;
    slow  = MAX_PTR;
#endif //MULTIPLE_HEAPS

    dprintf(3,("BGC: stack marking"));
    sc.concurrent = TRUE;

    GCScan::GcScanRoots(background_promote_callback,
                            max_generation, max_generation,
                            &sc);

    dprintf(3,("BGC: finalization marking"));
    finalize_queue->GcScanRoots(background_promote_callback, heap_number, 0);

    background_soh_size_end_mark = 0;

    for (int uoh_gen_idx = uoh_start_generation; uoh_gen_idx < total_generation_count; uoh_gen_idx++)
    {
        size_t uoh_size = generation_size (uoh_gen_idx);
        int uoh_idx = uoh_gen_idx - uoh_start_generation;
        bgc_begin_uoh_size[uoh_idx] = uoh_size;
        bgc_uoh_current_size[uoh_idx] = uoh_size;
    }

    dprintf (GTC_LOG, ("BM: h%d: soh: %zd, loh: %zd, poh: %zd",
        heap_number, generation_sizes (generation_of (max_generation)),
        bgc_uoh_current_size[loh_generation - uoh_start_generation], bgc_uoh_current_size[poh_generation - uoh_start_generation]));

    //concurrent_print_time_delta ("copying stack roots");
    concurrent_print_time_delta ("CS");

    FIRE_EVENT(BGC1stNonConEnd);

#ifndef USE_REGIONS
    saved_overflow_ephemeral_seg = 0;
#endif //!USE_REGIONS
    current_bgc_state = bgc_reset_ww;

    // we don't need a join here - just whichever thread that gets here
    // first can change the states and call restart_vm.
    // this is not true - we can't let the EE run when we are scanning stack.
    // since we now allow reset ww to run concurrently and have a join for it,
    // we can do restart ee on the 1st thread that got here. Make sure we handle the
    // sizedref handles correctly.
#ifdef MULTIPLE_HEAPS
    bgc_t_join.join(this, gc_join_restart_ee);
    if (bgc_t_join.joined())
#endif //MULTIPLE_HEAPS
    {
#ifdef USE_REGIONS
        // There's no need to distribute a second time if we just did an ephemeral GC, and we don't want to
        // age the free regions twice.
        if (!do_ephemeral_gc_p)
        {
            distribute_free_regions ();
            age_free_regions ("BGC");
        }
#endif //USE_REGIONS

#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
        // Resetting write watch for software write watch is pretty fast, much faster than for hardware write watch. Reset
        // can be done while the runtime is suspended or after the runtime is restarted, the preference was to reset while
        // the runtime is suspended. The reset for hardware write watch is done after the runtime is restarted below.
        concurrent_print_time_delta ("CRWW begin");

#ifdef MULTIPLE_HEAPS
        for (int i = 0; i < n_heaps; i++)
        {
            g_heaps[i]->reset_write_watch (FALSE);
        }
#else
        reset_write_watch (FALSE);
#endif //MULTIPLE_HEAPS

        concurrent_print_time_delta ("CRWW");
#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP

#ifdef FEATURE_SIZED_REF_HANDLES
        num_sizedrefs = GCToEEInterface::GetTotalNumSizedRefHandles();
#endif // FEATURE_SIZED_REF_HANDLES

        // this c_write is not really necessary because restart_vm
        // has an instruction that will flush the cpu cache (interlocked
        // or whatever) but we don't want to rely on that.
        dprintf (GTC_LOG, ("setting cm_in_progress"));
        c_write (cm_in_progress, TRUE);

        assert (dont_restart_ee_p);
        dont_restart_ee_p = FALSE;
        last_alloc_reset_suspended_end_time = GetHighPrecisionTimeStamp();

        restart_vm();
        GCToOSInterface::YieldThread (0);
#ifdef MULTIPLE_HEAPS
        dprintf(3, ("Starting all gc threads for gc"));
        bgc_t_join.restart();
#endif //MULTIPLE_HEAPS
    }

#ifdef MULTIPLE_HEAPS
    bgc_t_join.join(this, gc_join_after_reset);
    if (bgc_t_join.joined())
#endif //MULTIPLE_HEAPS
    {
        disable_preemptive (true);

#ifndef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
        // When software write watch is enabled, resetting write watch is done while the runtime is
        // suspended above. The post-reset call to revisit_written_pages is only necessary for concurrent
        // reset_write_watch, to discard dirtied pages during the concurrent reset.
#ifdef WRITE_WATCH
        concurrent_print_time_delta ("CRWW begin");

#ifdef MULTIPLE_HEAPS
        for (int i = 0; i < n_heaps; i++)
        {
            g_heaps[i]->reset_write_watch (TRUE);
        }
#else
        reset_write_watch (TRUE);
#endif //MULTIPLE_HEAPS

        concurrent_print_time_delta ("CRWW");
#endif //WRITE_WATCH

#ifdef MULTIPLE_HEAPS
        for (int i = 0; i < n_heaps; i++)
        {
            g_heaps[i]->revisit_written_pages (TRUE, TRUE);
        }
#else
        revisit_written_pages (TRUE, TRUE);
#endif //MULTIPLE_HEAPS

        concurrent_print_time_delta ("CRW");
#endif // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP

#ifdef MULTIPLE_HEAPS
        for (int i = 0; i < n_heaps; i++)
        {
            g_heaps[i]->current_bgc_state = bgc_mark_handles;
        }
#else
        current_bgc_state = bgc_mark_handles;
#endif //MULTIPLE_HEAPS

        current_c_gc_state = c_gc_state_marking;

        enable_preemptive ();

#ifdef MULTIPLE_HEAPS
        dprintf(3, ("Joining BGC threads after resetting writewatch"));
        bgc_t_join.restart();
#endif //MULTIPLE_HEAPS
    }

    disable_preemptive (true);

#ifdef FEATURE_SIZED_REF_HANDLES
    if (num_sizedrefs > 0)
    {
        GCScan::GcScanSizedRefs(background_promote, max_generation, max_generation, &sc);

        enable_preemptive ();

#ifdef MULTIPLE_HEAPS
        bgc_t_join.join(this, gc_join_scan_sizedref_done);
        if (bgc_t_join.joined())
        {
            dprintf(3, ("Done with marking all sized refs. Starting all bgc thread for marking other strong roots"));
            bgc_t_join.restart();
        }
#endif //MULTIPLE_HEAPS

        disable_preemptive (true);
    }
#endif // FEATURE_SIZED_REF_HANDLES

    dprintf (3,("BGC: handle table marking"));
    GCScan::GcScanHandles(background_promote,
                                max_generation, max_generation,
                                &sc);
    //concurrent_print_time_delta ("concurrent marking handle table");
    concurrent_print_time_delta ("CRH");

    current_bgc_state = bgc_mark_stack;
    dprintf (2,("concurrent draining mark list"));
    background_drain_mark_list (thread);
    //concurrent_print_time_delta ("concurrent marking stack roots");
    concurrent_print_time_delta ("CRS");

    dprintf (2,("concurrent revisiting dirtied pages"));

    // tuning has shown that there are advantages in doing this 2 times
    revisit_written_pages (TRUE);
    revisit_written_pages (TRUE);

    //concurrent_print_time_delta ("concurrent marking dirtied pages on LOH");
    concurrent_print_time_delta ("CRre");

    enable_preemptive ();

#if defined(MULTIPLE_HEAPS)
    bgc_t_join.join(this, gc_join_concurrent_overflow);
    if (bgc_t_join.joined())
    {
#ifdef USE_REGIONS
        BOOL all_heaps_background_overflow_p = FALSE;
#else //USE_REGIONS
        uint8_t* all_heaps_max = 0;
        uint8_t* all_heaps_min = MAX_PTR;
#endif //USE_REGIONS
        int i;
        for (i = 0; i < n_heaps; i++)
        {
#ifdef USE_REGIONS
            // in the regions case, compute the OR of all the per-heap flags
            if (g_heaps[i]->background_overflow_p)
                all_heaps_background_overflow_p = TRUE;
#else //USE_REGIONS
            dprintf (3, ("heap %d overflow max is %p, min is %p",
                i,
                g_heaps[i]->background_max_overflow_address,
                g_heaps[i]->background_min_overflow_address));
            if (all_heaps_max < g_heaps[i]->background_max_overflow_address)
                all_heaps_max = g_heaps[i]->background_max_overflow_address;
            if (all_heaps_min > g_heaps[i]->background_min_overflow_address)
                all_heaps_min = g_heaps[i]->background_min_overflow_address;
#endif //USE_REGIONS
        }
        for (i = 0; i < n_heaps; i++)
        {
#ifdef USE_REGIONS
            g_heaps[i]->background_overflow_p = all_heaps_background_overflow_p;
#else //USE_REGIONS
            g_heaps[i]->background_max_overflow_address = all_heaps_max;
            g_heaps[i]->background_min_overflow_address = all_heaps_min;
#endif //USE_REGIONS
        }
        dprintf(3, ("Starting all bgc threads after updating the overflow info"));
        bgc_t_join.restart();
    }
#endif //MULTIPLE_HEAPS

    disable_preemptive (true);

    dprintf (2, ("before CRov count: %zu", bgc_overflow_count));
    bgc_overflow_count = 0;
    background_process_mark_overflow (TRUE);
    dprintf (2, ("after CRov count: %zu", bgc_overflow_count));
    bgc_overflow_count = 0;
    //concurrent_print_time_delta ("concurrent processing mark overflow");
    concurrent_print_time_delta ("CRov");

    // Stop all threads, crawl all stacks and revisit changed pages.
    FIRE_EVENT(BGC1stConEnd);

    dprintf (2, ("Stopping the EE"));

    enable_preemptive ();

#ifdef MULTIPLE_HEAPS
    bgc_t_join.join(this, gc_join_suspend_ee);
    if (bgc_t_join.joined())
    {
        bgc_threads_sync_event.Reset();

        dprintf(3, ("Joining BGC threads for non concurrent final marking"));
        bgc_t_join.restart();
    }
#endif //MULTIPLE_HEAPS

    if (heap_number == 0)
    {
        enter_spin_lock (&gc_lock);

        suspended_start_time = GetHighPrecisionTimeStamp();
        bgc_suspend_EE ();
        //suspend_EE ();
        bgc_threads_sync_event.Set();
    }
    else
    {
        bgc_threads_sync_event.Wait(INFINITE, FALSE);
        dprintf (2, ("bgc_threads_sync_event is signalled"));
    }

    assert (settings.concurrent);
    assert (settings.condemned_generation == max_generation);

    dprintf (2, ("clearing cm_in_progress"));
    c_write (cm_in_progress, FALSE);

    bgc_alloc_lock->check();

    current_bgc_state = bgc_final_marking;

    //concurrent_print_time_delta ("concurrent marking ended");
    concurrent_print_time_delta ("CR");

    FIRE_EVENT(BGC2ndNonConBegin);

    mark_absorb_new_alloc();

#ifdef FEATURE_EVENT_TRACE
    static uint64_t current_mark_time = 0;
    static uint64_t last_mark_time = 0;
#endif //FEATURE_EVENT_TRACE

    // We need a join here 'cause find_object would complain if the gen0
    // bricks of another heap haven't been fixed up. So we need to make sure
    // that every heap's gen0 bricks are fixed up before we proceed.
#ifdef MULTIPLE_HEAPS
    bgc_t_join.join(this, gc_join_after_absorb);
    if (bgc_t_join.joined())
#endif //MULTIPLE_HEAPS
    {
#ifdef BGC_SERVO_TUNING
        bgc_tuning::record_bgc_sweep_start();
#endif //BGC_SERVO_TUNING

        GCToEEInterface::BeforeGcScanRoots(max_generation, /* is_bgc */ true, /* is_concurrent */ false);

#ifdef FEATURE_EVENT_TRACE
        informational_event_enabled_p = EVENT_ENABLED (GCMarkWithType);
        if (informational_event_enabled_p)
            last_mark_time = GetHighPrecisionTimeStamp();
#endif //FEATURE_EVENT_TRACE

#ifdef MULTIPLE_HEAPS
        dprintf(3, ("Joining BGC threads after absorb"));
        bgc_t_join.restart();
#endif //MULTIPLE_HEAPS
    }

    //reset the flag, indicating that the EE no longer expect concurrent
    //marking
    sc.concurrent = FALSE;

    dprintf (GTC_LOG, ("FM: h%d: soh: %zd, loh: %zd, poh: %zd", heap_number,
        generation_sizes (generation_of (max_generation)),
        bgc_uoh_current_size[loh_generation - uoh_start_generation],
        bgc_uoh_current_size[poh_generation - uoh_start_generation]));

#if defined(FEATURE_BASICFREEZE) && !defined(USE_REGIONS)
    if (ro_segments_in_range)
    {
        dprintf (2, ("nonconcurrent marking in range ro segments"));
        mark_ro_segments();
        //concurrent_print_time_delta ("nonconcurrent marking in range ro segments");
        concurrent_print_time_delta ("NRRO");
    }
#endif //FEATURE_BASICFREEZE && !USE_REGIONS

    dprintf (2, ("nonconcurrent marking stack roots"));
    GCScan::GcScanRoots(background_promote,
                            max_generation, max_generation,
                            &sc);
    //concurrent_print_time_delta ("nonconcurrent marking stack roots");
    concurrent_print_time_delta ("NRS");

    finalize_queue->GcScanRoots(background_promote, heap_number, 0);

    dprintf (2, ("nonconcurrent marking handle table"));
    GCScan::GcScanHandles(background_promote,
                                max_generation, max_generation,
                                &sc);
    //concurrent_print_time_delta ("nonconcurrent marking handle table");
    concurrent_print_time_delta ("NRH");

    dprintf (2,("---- (GC%zu)final going through written pages ----", VolatileLoad(&settings.gc_index)));
    revisit_written_pages (FALSE);
    //concurrent_print_time_delta ("nonconcurrent revisit dirtied pages on LOH");
    concurrent_print_time_delta ("NRre LOH");

    dprintf (2, ("before NR 1st Hov count: %zu", bgc_overflow_count));
    bgc_overflow_count = 0;

    // Dependent handles need to be scanned with a special algorithm (see the header comment on
    // scan_dependent_handles for more detail). We perform an initial scan without processing any mark
    // stack overflow. This is not guaranteed to complete the operation but in a common case (where there
    // are no dependent handles that are due to be collected) it allows us to optimize away further scans.
    // The call to background_scan_dependent_handles is what will cycle through more iterations if
    // required and will also perform processing of any mark stack overflow once the dependent handle
    // table has been fully promoted.
    dprintf (2, ("1st dependent handle scan and process mark overflow"));
    GCScan::GcDhInitialScan(background_promote, max_generation, max_generation, &sc);
    background_scan_dependent_handles (&sc);
    //concurrent_print_time_delta ("1st nonconcurrent dependent handle scan and process mark overflow");
    concurrent_print_time_delta ("NR 1st Hov");

    dprintf (2, ("after NR 1st Hov count: %zu", bgc_overflow_count));
    bgc_overflow_count = 0;

#ifdef FEATURE_JAVAMARSHAL

    // FIXME Any reason this code should be different for BGC ? Otherwise extract it to some common method ?

#ifdef MULTIPLE_HEAPS
    dprintf(3, ("Joining for short weak handle scan"));
    gc_t_join.join(this, gc_join_bridge_processing);
    if (gc_t_join.joined())
    {
#endif //MULTIPLE_HEAPS
        global_bridge_list = GCScan::GcProcessBridgeObjects (max_generation, max_generation, &sc, &num_global_bridge_objs);

#ifdef MULTIPLE_HEAPS
        dprintf (3, ("Starting all gc thread after bridge processing"));
        gc_t_join.restart();
    }
#endif //MULTIPLE_HEAPS

    {
        int thread = heap_number;
        // Each thread will receive an equal chunk of bridge objects, with the last thread
        // handling a few more objects from the remainder.
        size_t count_per_heap = num_global_bridge_objs / n_heaps;
        size_t start_index = thread * count_per_heap;
        size_t end_index = (thread == n_heaps - 1) ? num_global_bridge_objs : (thread + 1) * count_per_heap;

        for (size_t obj_idx = start_index; obj_idx < end_index; obj_idx++)
        {
            background_mark_simple (global_bridge_list[obj_idx] THREAD_NUMBER_ARG);
        }

        drain_mark_queue();
    }
#endif //FEATURE_JAVAMARSHAL

#ifdef MULTIPLE_HEAPS
    bgc_t_join.join(this, gc_join_null_dead_short_weak);
    if (bgc_t_join.joined())
#endif //MULTIPLE_HEAPS
    {
#ifdef FEATURE_EVENT_TRACE
        bgc_time_info[time_mark_sizedref] = 0;
        record_mark_time (bgc_time_info[time_mark_roots], current_mark_time, last_mark_time);
#endif //FEATURE_EVENT_TRACE

#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
        // The runtime is suspended, take this opportunity to pause tracking written pages to
        // avoid further perf penalty after the runtime is restarted
        SoftwareWriteWatch::DisableForGCHeap();
#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP

        GCToEEInterface::AfterGcScanRoots (max_generation, max_generation, &sc);

#ifdef MULTIPLE_HEAPS
        dprintf(3, ("Joining BGC threads for short weak handle scan"));
        bgc_t_join.restart();
#endif //MULTIPLE_HEAPS
    }

    // null out the target of short weakref that were not promoted.
    GCScan::GcShortWeakPtrScan(max_generation, max_generation, &sc);

    //concurrent_print_time_delta ("bgc GcShortWeakPtrScan");
    concurrent_print_time_delta ("NR GcShortWeakPtrScan");

    {
#ifdef MULTIPLE_HEAPS
        bgc_t_join.join(this, gc_join_scan_finalization);
        if (bgc_t_join.joined())
        {
#endif //MULTIPLE_HEAPS

#ifdef FEATURE_EVENT_TRACE
            record_mark_time (bgc_time_info[time_mark_short_weak], current_mark_time, last_mark_time);
#endif //FEATURE_EVENT_TRACE

#ifdef MULTIPLE_HEAPS
            dprintf(3, ("Joining BGC threads for finalization"));
            bgc_t_join.restart();
        }
#endif //MULTIPLE_HEAPS

        dprintf(3,("Marking finalization data"));
        //concurrent_print_time_delta ("bgc joined to mark finalization");
        concurrent_print_time_delta ("NRj");
        finalize_queue->ScanForFinalization (background_promote, max_generation, __this);
        concurrent_print_time_delta ("NRF");
    }

    dprintf (2, ("before NR 2nd Hov count: %zu", bgc_overflow_count));
    bgc_overflow_count = 0;

    // Scan dependent handles again to promote any secondaries associated with primaries that were promoted
    // for finalization. As before background_scan_dependent_handles will also process any mark stack
    // overflow.
    dprintf (2, ("2nd dependent handle scan and process mark overflow"));
    background_scan_dependent_handles (&sc);
    //concurrent_print_time_delta ("2nd nonconcurrent dependent handle scan and process mark overflow");
    concurrent_print_time_delta ("NR 2nd Hov");

#ifdef MULTIPLE_HEAPS
    bgc_t_join.join(this, gc_join_null_dead_long_weak);
    if (bgc_t_join.joined())
#endif //MULTIPLE_HEAPS
    {

#ifdef FEATURE_EVENT_TRACE
        record_mark_time (bgc_time_info[time_mark_scan_finalization], current_mark_time, last_mark_time);
#endif //FEATURE_EVENT_TRACE

#ifdef MULTIPLE_HEAPS
        dprintf(2, ("Joining BGC threads for weak pointer deletion"));
        bgc_t_join.restart();
#endif //MULTIPLE_HEAPS
    }

    // null out the target of long weakref that were not promoted.
    GCScan::GcWeakPtrScan (max_generation, max_generation, &sc);
    concurrent_print_time_delta ("NR GcWeakPtrScan");

#ifdef MULTIPLE_HEAPS
    bgc_t_join.join(this, gc_join_null_dead_syncblk);
    if (bgc_t_join.joined())
#endif //MULTIPLE_HEAPS
    {
        dprintf (2, ("calling GcWeakPtrScanBySingleThread"));
        // scan for deleted entries in the syncblk cache
        GCScan::GcWeakPtrScanBySingleThread (max_generation, max_generation, &sc);

#ifdef FEATURE_EVENT_TRACE
        record_mark_time (bgc_time_info[time_mark_long_weak], current_mark_time, last_mark_time);
#endif //FEATURE_EVENT_TRACE

        concurrent_print_time_delta ("NR GcWeakPtrScanBySingleThread");
#ifdef MULTIPLE_HEAPS
        dprintf(2, ("Starting BGC threads for end of background mark phase"));
        bgc_t_join.restart();
#endif //MULTIPLE_HEAPS
    }

    dprintf (2, ("end of bgc mark: loh: %zu, poh: %zu, soh: %zu",
                 generation_size (loh_generation),
                 generation_size (poh_generation),
                 generation_sizes (generation_of (max_generation))));

    for (int gen_idx = max_generation; gen_idx < total_generation_count; gen_idx++)
    {
        generation* gen = generation_of (gen_idx);
        dynamic_data* dd = dynamic_data_of (gen_idx);
        dd_begin_data_size (dd) = generation_size (gen_idx) -
                                  (generation_free_list_space (gen) + generation_free_obj_space (gen)) -
                                   get_generation_start_size (gen_idx);
        dd_survived_size (dd) = 0;
        dd_pinned_survived_size (dd) = 0;
        dd_artificial_pinned_survived_size (dd) = 0;
        dd_added_pinned_size (dd) = 0;
    }

    for (int i = get_start_generation_index(); i < uoh_start_generation; i++)
    {
        heap_segment* seg = heap_segment_rw (generation_start_segment (generation_of (i)));
        _ASSERTE(seg != NULL);

        while (seg)
        {
            seg->flags &= ~heap_segment_flags_swept;

#ifndef USE_REGIONS
            if (heap_segment_allocated (seg) == heap_segment_mem (seg))
            {
                FATAL_GC_ERROR();
            }

            if (seg == ephemeral_heap_segment)
            {
                heap_segment_background_allocated (seg) = generation_allocation_start (generation_of (max_generation - 1));
            }
            else
#endif //!USE_REGIONS
            {
                heap_segment_background_allocated (seg) = heap_segment_allocated (seg);
            }

            background_soh_size_end_mark += heap_segment_background_allocated (seg) - heap_segment_mem (seg);

            dprintf (3333, ("h%d gen%d seg %zx (%p) background allocated is %p",
                            heap_number, i, (size_t)(seg), heap_segment_mem (seg),
                            heap_segment_background_allocated (seg)));
            seg = heap_segment_next_rw (seg);
        }
    }

    // We need to void alloc contexts here 'cause while background_ephemeral_sweep is running
    // we can't let the user code consume the left over parts in these alloc contexts.
    repair_allocation_contexts (FALSE);

    dprintf (2, ("end of bgc mark: gen2 free list space: %zu, free obj space: %zu",
        generation_free_list_space (generation_of (max_generation)),
        generation_free_obj_space (generation_of (max_generation))));

    dprintf(2,("---- (GC%zu)End of background mark phase ----", VolatileLoad(&settings.gc_index)));
}

#ifdef MULTIPLE_HEAPS
void
gc_heap::bgc_suspend_EE ()
{
    for (int i = 0; i < n_heaps; i++)
    {
        gc_heap::g_heaps[i]->reset_gc_done();
    }
    gc_started = TRUE;
    dprintf (2, ("bgc_suspend_EE"));
    GCToEEInterface::SuspendEE(SUSPEND_FOR_GC_PREP);

    gc_started = FALSE;
    for (int i = 0; i < n_heaps; i++)
    {
        gc_heap::g_heaps[i]->set_gc_done();
    }
}
#else
void
gc_heap::bgc_suspend_EE ()
{
    reset_gc_done();
    gc_started = TRUE;
    dprintf (2, ("bgc_suspend_EE"));
    GCToEEInterface::SuspendEE(SUSPEND_FOR_GC_PREP);
    gc_started = FALSE;
    set_gc_done();
}
#endif //MULTIPLE_HEAPS

inline uint8_t* gc_heap::high_page (heap_segment* seg, BOOL concurrent_p)
{
#ifdef USE_REGIONS
    assert (!concurrent_p || (heap_segment_gen_num (seg) >= max_generation));
#else
    if (concurrent_p)
    {
        uint8_t* end = ((seg == ephemeral_heap_segment) ?
                     generation_allocation_start (generation_of (max_generation - 1)) :
                     heap_segment_allocated (seg));
        return align_lower_page (end);
    }
    else
#endif //USE_REGIONS
    {
        return heap_segment_allocated (seg);
    }
}

void gc_heap::revisit_written_page (uint8_t* page,
                                    uint8_t* end,
                                    BOOL concurrent_p,
                                    uint8_t*& last_page,
                                    uint8_t*& last_object,
                                    BOOL large_objects_p,
                                    size_t& num_marked_objects)
{
    uint8_t*   start_address = page;
    uint8_t*   o             = 0;
    int align_const = get_alignment_constant (!large_objects_p);
    uint8_t* high_address = end;
    uint8_t* current_lowest_address = background_saved_lowest_address;
    uint8_t* current_highest_address = background_saved_highest_address;
    BOOL no_more_loop_p = FALSE;

    THREAD_FROM_HEAP;
#ifndef MULTIPLE_HEAPS
    const int thread = heap_number;
#endif //!MULTIPLE_HEAPS

    if (large_objects_p)
    {
        o = last_object;
    }
    else
    {
        if (((last_page + WRITE_WATCH_UNIT_SIZE) == page)
            || (start_address <= last_object))
        {
            o = last_object;
        }
        else
        {
            o = find_first_object (start_address, last_object);
            // We can visit the same object again, but on a different page.
            assert (o >= last_object);
        }
    }

    dprintf (3,("page %zx start: %zx, %zx[ ",
               (size_t)page, (size_t)o,
               (size_t)(min (high_address, page + WRITE_WATCH_UNIT_SIZE))));

    while (o < (min (high_address, page + WRITE_WATCH_UNIT_SIZE)))
    {
        size_t s;

        if (concurrent_p && large_objects_p)
        {
            bgc_alloc_lock->bgc_mark_set (o);

            if (((CObjectHeader*)o)->IsFree())
            {
                s = unused_array_size (o);
            }
            else
            {
                s = size (o);
            }
        }
        else
        {
            s = size (o);
        }

        dprintf (3,("Considering object %zx(%s)", (size_t)o, (background_object_marked (o, FALSE) ? "bm" : "nbm")));

        assert (Align (s) >= Align (min_obj_size));

        uint8_t* next_o =  o + Align (s, align_const);

        if (next_o >= start_address)
        {
#ifdef MULTIPLE_HEAPS
            if (concurrent_p)
            {
                // We set last_object here for SVR BGC here because SVR BGC has more than
                // one GC thread. When we have more than one GC thread we would run into this
                // situation if we skipped unmarked objects:
                // bgc thread 1 calls GWW, and detect object X not marked so it would skip it
                // for revisit.
                // bgc thread 2 marks X and all its current children.
                // user thread comes along and dirties more (and later) pages in X.
                // bgc thread 1 calls GWW again and gets those later pages but it will not mark anything
                // on them because it had already skipped X. We need to detect that this object is now
                // marked and mark the children on the dirtied pages.
                // In the future if we have less BGC threads than we have heaps we should add
                // the check to the number of BGC threads.
                last_object = o;
            }
#endif //MULTIPLE_HEAPS

            if (contain_pointers (o) &&
                (!((o >= current_lowest_address) && (o < current_highest_address)) ||
                background_marked (o)))
            {
                dprintf (3, ("going through %zx", (size_t)o));
                go_through_object (method_table(o), o, s, poo, start_address, use_start, (o + s),
                                    if ((uint8_t*)poo >= min (high_address, page + WRITE_WATCH_UNIT_SIZE))
                                    {
                                        no_more_loop_p = TRUE;
                                        goto end_limit;
                                    }
                                    uint8_t* oo = VolatileLoadWithoutBarrier(poo);

                                    num_marked_objects++;
                                    background_mark_object (oo THREAD_NUMBER_ARG);
                                );
            }
            else if (concurrent_p &&
                     ((CObjectHeader*)o)->IsFree() &&
                     (next_o > min (high_address, page + WRITE_WATCH_UNIT_SIZE)))
            {
                // We need to not skip the object here because of this corner scenario:
                // A large object was being allocated during BGC mark so we first made it
                // into a free object, then cleared its memory. In this loop we would detect
                // that it's a free object which normally we would skip. But by the next time
                // we call GetWriteWatch we could still be on this object and the object had
                // been made into a valid object and some of its memory was changed. We need
                // to be sure to process those written pages so we can't skip the object just
                // yet.
                //
                // Similarly, when using software write watch, don't advance last_object when
                // the current object is a free object that spans beyond the current page or
                // high_address. Software write watch acquires gc_lock before the concurrent
                // GetWriteWatch() call during revisit_written_pages(). A foreground GC may
                // happen at that point and allocate from this free region, so when
                // revisit_written_pages() continues, it cannot skip now-valid objects in this
                // region.
                no_more_loop_p = TRUE;
                goto end_limit;
            }
        }
end_limit:
        if (concurrent_p && large_objects_p)
        {
            bgc_alloc_lock->bgc_mark_done ();
        }
        if (no_more_loop_p)
        {
            break;
        }
        o = next_o;
    }

#ifdef MULTIPLE_HEAPS
    if (concurrent_p)
    {
        assert (last_object < (min (high_address, page + WRITE_WATCH_UNIT_SIZE)));
    }
    else
#endif //MULTIPLE_HEAPS
    {
        last_object = o;
    }

    dprintf (3,("Last object: %zx", (size_t)last_object));
    last_page = align_write_watch_lower_page (o);

    if (concurrent_p)
    {
        allow_fgc();
    }
}

// When reset_only_p is TRUE, we should only reset pages that are in range
// because we need to consider the segments or part of segments that were
// allocated out of range all live.
void gc_heap::revisit_written_pages (BOOL concurrent_p, BOOL reset_only_p)
{
    if (concurrent_p && !reset_only_p)
    {
        current_bgc_state = bgc_revisit_soh;
    }

    size_t total_dirtied_pages = 0;
    size_t total_marked_objects = 0;

    bool reset_watch_state = !!concurrent_p;
    bool is_runtime_suspended = !concurrent_p;
    BOOL small_object_segments = TRUE;
    int start_gen_idx = get_start_generation_index();
#ifdef USE_REGIONS
    if (concurrent_p && !reset_only_p)
    {
        // We don't go into ephemeral regions during concurrent revisit.
        start_gen_idx = max_generation;
    }
#endif //USE_REGIONS

    for (int i = start_gen_idx; i < total_generation_count; i++)
    {
        heap_segment* seg = heap_segment_rw (generation_start_segment (generation_of (i)));
        _ASSERTE(seg != NULL);

        while (seg)
        {
            uint8_t* base_address = (uint8_t*)heap_segment_mem (seg);
            //we need to truncate to the base of the page because
            //some newly allocated could exist beyond heap_segment_allocated
            //and if we reset the last page write watch status,
            // they wouldn't be guaranteed to be visited -> gc hole.
            uintptr_t bcount = array_size;
            uint8_t* last_page = 0;
            uint8_t* last_object = heap_segment_mem (seg);
            uint8_t* high_address = 0;

            BOOL skip_seg_p = FALSE;

            if (reset_only_p)
            {
                if ((heap_segment_mem (seg) >= background_saved_lowest_address) ||
                    (heap_segment_reserved (seg) <= background_saved_highest_address))
                {
                    dprintf (3, ("h%d: sseg: %p(-%p)", heap_number,
                        heap_segment_mem (seg), heap_segment_reserved (seg)));
                    skip_seg_p = TRUE;
                }
            }

            if (!skip_seg_p)
            {
                dprintf (3, ("looking at seg %zx", (size_t)last_object));

                if (reset_only_p)
                {
                    base_address = max (base_address, background_saved_lowest_address);
                    dprintf (3, ("h%d: reset only starting %p", heap_number, base_address));
                }

                dprintf (3, ("h%d: starting: %p, seg %p-%p", heap_number, base_address,
                    heap_segment_mem (seg), heap_segment_reserved (seg)));


                while (1)
                {
                    if (reset_only_p)
                    {
                        high_address = ((seg == ephemeral_heap_segment) ? alloc_allocated : heap_segment_allocated (seg));
                        high_address = min (high_address, background_saved_highest_address);
                    }
                    else
                    {
                        high_address = high_page (seg, concurrent_p);
                    }

                    if ((base_address < high_address) &&
                        (bcount >= array_size))
                    {
                        ptrdiff_t region_size = high_address - base_address;
                        dprintf (3, ("h%d: gw: [%zx(%zd)", heap_number, (size_t)base_address, (size_t)region_size));

#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
                        // When the runtime is not suspended, it's possible for the table to be resized concurrently with the scan
                        // for dirty pages below. Prevent that by synchronizing with grow_brick_card_tables(). When the runtime is
                        // suspended, it's ok to scan for dirty pages concurrently from multiple background GC threads for disjoint
                        // memory regions.
                        if (!is_runtime_suspended)
                        {
                            enter_spin_lock(&gc_lock);
                        }
#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP

                        get_write_watch_for_gc_heap (reset_watch_state, base_address, region_size,
                                                     (void**)background_written_addresses,
                                                     &bcount, is_runtime_suspended);

#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
                        if (!is_runtime_suspended)
                        {
                            leave_spin_lock(&gc_lock);
                        }
#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP

                        if (bcount != 0)
                        {
                            total_dirtied_pages += bcount;

                            dprintf (3, ("Found %zu pages [%zx, %zx[",
                                            bcount, (size_t)base_address, (size_t)high_address));
                        }

                        if (!reset_only_p)
                        {
                            // refetch the high address in case it has changed while we fetched dirty pages
                            // this is only an issue for the page high_address is on - we may have new
                            // objects after high_address.
                            high_address = high_page (seg, concurrent_p);

                            for (unsigned i = 0; i < bcount; i++)
                            {
                                uint8_t* page = (uint8_t*)background_written_addresses[i];
                                dprintf (3, ("looking at page %d at %zx(h: %zx)", i,
                                    (size_t)page, (size_t)high_address));
                                if (page < high_address)
                                {
                                    //search for marked objects in the page
                                    revisit_written_page (page, high_address, concurrent_p,
                                                          last_page, last_object,
                                                          !small_object_segments,
                                                          total_marked_objects);
                                }
                                else
                                {
                                    dprintf (3, ("page %d at %zx is >= %zx!", i, (size_t)page, (size_t)high_address));
                                    assert (!"page shouldn't have exceeded limit");
                                }
                            }
                        }

                        if (bcount >= array_size){
                            base_address = background_written_addresses [array_size-1] + WRITE_WATCH_UNIT_SIZE;
                            bcount = array_size;
                        }
                    }
                    else
                    {
                        break;
                    }
                }
            }

            seg = heap_segment_next_rw (seg);
        }

        if (i == soh_gen2)
        {
            if (!reset_only_p)
            {
                dprintf (GTC_LOG, ("h%d: SOH: dp:%zd; mo: %zd", heap_number, total_dirtied_pages, total_marked_objects));
                fire_revisit_event (total_dirtied_pages, total_marked_objects, FALSE);
                concurrent_print_time_delta (concurrent_p ? "CR SOH" : "NR SOH");
                total_dirtied_pages = 0;
                total_marked_objects = 0;
            }

            if (concurrent_p && !reset_only_p)
            {
                current_bgc_state = bgc_revisit_uoh;
            }

            small_object_segments = FALSE;
            dprintf (3, ("now revisiting large object segments"));
        }
        else
        {
            if (reset_only_p)
            {
                dprintf (GTC_LOG, ("h%d: tdp: %zd", heap_number, total_dirtied_pages));
            }
            else
            {
                dprintf (GTC_LOG, ("h%d: LOH: dp:%zd; mo: %zd", heap_number, total_dirtied_pages, total_marked_objects));
                fire_revisit_event (total_dirtied_pages, total_marked_objects, TRUE);
            }
        }
    }
}

void gc_heap::background_grow_c_mark_list()
{
    assert (c_mark_list_index >= c_mark_list_length);
    BOOL should_drain_p = FALSE;
    THREAD_FROM_HEAP;
#ifndef MULTIPLE_HEAPS
    const int thread = heap_number;
#endif //!MULTIPLE_HEAPS

    dprintf (2, ("stack copy buffer overflow"));
    uint8_t** new_c_mark_list = 0;
    {
        FAULT_NOT_FATAL();
        if (c_mark_list_length >= (SIZE_T_MAX / (2 * sizeof (uint8_t*))))
        {
            should_drain_p = TRUE;
        }
        else
        {
            new_c_mark_list = new (nothrow) uint8_t*[c_mark_list_length*2];
            if (new_c_mark_list == 0)
            {
                should_drain_p = TRUE;
            }
        }
    }
    if (should_drain_p)

    {
        dprintf (2, ("No more memory for the stacks copy, draining.."));
        //drain the list by marking its elements
        background_drain_mark_list (thread);
    }
    else
    {
        assert (new_c_mark_list);
        memcpy (new_c_mark_list, c_mark_list, c_mark_list_length*sizeof(uint8_t*));
        c_mark_list_length = c_mark_list_length*2;
        dprintf (5555, ("h%d replacing mark list at %Ix with %Ix", heap_number, (size_t)c_mark_list, (size_t)new_c_mark_list));
        delete[] c_mark_list;
        c_mark_list = new_c_mark_list;
    }
}

void gc_heap::background_promote_callback (Object** ppObject, ScanContext* sc,
                                  uint32_t flags)
{
    UNREFERENCED_PARAMETER(sc);
    //in order to save space on the array, mark the object,
    //knowing that it will be visited later
    assert (settings.concurrent);

    THREAD_NUMBER_FROM_CONTEXT;
#ifndef MULTIPLE_HEAPS
    const int thread = 0;
#endif //!MULTIPLE_HEAPS

    uint8_t* o = (uint8_t*)*ppObject;

    if (!is_in_find_object_range (o))
    {
        return;
    }

    HEAP_FROM_THREAD;

    gc_heap* hp = gc_heap::heap_of (o);

    if ((o < hp->background_saved_lowest_address) || (o >= hp->background_saved_highest_address))
    {
        return;
    }

    if (flags & GC_CALL_INTERIOR)
    {
        o = hp->find_object (o);
        if (o == 0)
            return;
    }

#ifdef FEATURE_CONSERVATIVE_GC
    // For conservative GC, a value on stack may point to middle of a free object.
    // In this case, we don't need to promote the pointer.
    if (GCConfig::GetConservativeGC() && ((CObjectHeader*)o)->IsFree())
    {
        return;
    }
#endif //FEATURE_CONSERVATIVE_GC

#ifdef _DEBUG
    ((CObjectHeader*)o)->Validate();
#endif //_DEBUG

    dprintf (3, ("Concurrent Background Promote %zx", (size_t)o));
    if (o && (size (o) > loh_size_threshold))
    {
        dprintf (3, ("Brc %zx", (size_t)o));
    }

    if (hpt->c_mark_list_index >= hpt->c_mark_list_length)
    {
        hpt->background_grow_c_mark_list();
    }
    dprintf (3, ("pushing %zx into mark_list", (size_t)o));
    hpt->c_mark_list [hpt->c_mark_list_index++] = o;

    STRESS_LOG3(LF_GC|LF_GCROOTS, LL_INFO1000000, "    GCHeap::Background Promote: Promote GC Root *%p = %p MT = %pT", ppObject, o, o ? ((Object*) o)->GetGCSafeMethodTable() : NULL);
}

void gc_heap::mark_absorb_new_alloc()
{
    fix_allocation_contexts (FALSE);

    gen0_bricks_cleared = FALSE;

    clear_gen0_bricks();
}

#ifdef DYNAMIC_HEAP_COUNT
void gc_heap::add_to_bgc_th_creation_history (size_t gc_index, size_t count_created,
                                              size_t count_created_th_existed, size_t count_creation_failed)
{
    if ((count_created != 0) || (count_created_th_existed != 0) || (count_creation_failed != 0))
    {
        dprintf (6666, ("ADDING to BGC th hist entry%d gc index %Id, created %d, %d th existed, %d failed",
            bgc_th_creation_hist_index, gc_index, count_created, count_created_th_existed, count_creation_failed));

        bgc_thread_creation_history* current_hist = &bgc_th_creation_hist[bgc_th_creation_hist_index];
        current_hist->gc_index = gc_index;
        current_hist->n_heaps = (short)n_heaps;
        current_hist->count_created = (short)count_created;
        current_hist->count_created_th_existed = (short)count_created_th_existed;
        current_hist->count_creation_failed = (short)count_creation_failed;

        bgc_th_creation_hist_index = (bgc_th_creation_hist_index + 1) % max_bgc_thread_creation_count;
    }
}
#endif //DYNAMIC_HEAP_COUNT

// If this returns TRUE, we are saying we expect that thread to be there. However, when that thread is available to work is indeterministic.
// But when we actually start a BGC, naturally we'll need to wait till it gets to the point it can work.
BOOL gc_heap::prepare_bgc_thread(gc_heap* gh)
{
    BOOL success = FALSE;
    BOOL thread_created = FALSE;
    dprintf (2, ("Preparing gc thread"));
    gh->bgc_threads_timeout_cs.Enter();
    if (!(gh->bgc_thread_running))
    {
        dprintf (2, ("GC thread not running"));
        if (gh->bgc_thread == 0)
        {
#ifdef STRESS_DYNAMIC_HEAP_COUNT
            // to stress, we just don't actually try to create the thread to simulate a failure
            int r = (int)gc_rand::get_rand (100);
            bool try_to_create_p = (r > 10);
            BOOL thread_created_p = (try_to_create_p ? create_bgc_thread (gh) : FALSE);
            if (!thread_created_p)
            {
                dprintf (6666, ("h%d we failed to create the thread, %s", gh->heap_number, (try_to_create_p ? "tried" : "didn't try")));
            }
            if (thread_created_p)
#else //STRESS_DYNAMIC_HEAP_COUNT
            if (create_bgc_thread(gh))
#endif //STRESS_DYNAMIC_HEAP_COUNT
            {
                success = TRUE;
                thread_created = TRUE;
#ifdef DYNAMIC_HEAP_COUNT
                bgc_th_count_created++;
#endif //DYNAMIC_HEAP_COUNT
            }
            else
            {
#ifdef DYNAMIC_HEAP_COUNT
                bgc_th_count_creation_failed++;
#endif //DYNAMIC_HEAP_COUNT
            }
        }
        else
        {
#ifdef DYNAMIC_HEAP_COUNT
            // This would be a very unusual scenario where GCToEEInterface::CreateThread told us it failed yet the thread was created.
            bgc_th_count_created_th_existed++;
            dprintf (6666, ("h%d we cannot have a thread that runs yet CreateThread reported it failed to create it", gh->heap_number));
#endif //DYNAMIC_HEAP_COUNT
            assert (!"GCToEEInterface::CreateThread returned FALSE yet the thread was created!");
        }
    }
    else
    {
        dprintf (3, ("GC thread already running"));
        success = TRUE;
    }
    gh->bgc_threads_timeout_cs.Leave();

    if(thread_created)
        FIRE_EVENT(GCCreateConcurrentThread_V1);

    return success;
}

BOOL gc_heap::create_bgc_thread(gc_heap* gh)
{
    assert (background_gc_done_event.IsValid());

    //dprintf (2, ("Creating BGC thread"));

    gh->bgc_thread_running = GCToEEInterface::CreateThread(gh->bgc_thread_stub, gh, true, ".NET BGC");
    return gh->bgc_thread_running;
}

BOOL gc_heap::create_bgc_threads_support (int number_of_heaps)
{
    BOOL ret = FALSE;
    dprintf (3, ("Creating concurrent GC thread for the first time"));
    if (!background_gc_done_event.CreateManualEventNoThrow(TRUE))
    {
        goto cleanup;
    }
    if (!bgc_threads_sync_event.CreateManualEventNoThrow(FALSE))
    {
        goto cleanup;
    }
    if (!ee_proceed_event.CreateAutoEventNoThrow(FALSE))
    {
        goto cleanup;
    }
    if (!bgc_start_event.CreateManualEventNoThrow(FALSE))
    {
        goto cleanup;
    }

#ifdef MULTIPLE_HEAPS
    bgc_t_join.init (number_of_heaps, join_flavor_bgc);
#else
    UNREFERENCED_PARAMETER(number_of_heaps);
#endif //MULTIPLE_HEAPS

    ret = TRUE;

cleanup:

    if (!ret)
    {
        if (background_gc_done_event.IsValid())
        {
            background_gc_done_event.CloseEvent();
        }
        if (bgc_threads_sync_event.IsValid())
        {
            bgc_threads_sync_event.CloseEvent();
        }
        if (ee_proceed_event.IsValid())
        {
            ee_proceed_event.CloseEvent();
        }
        if (bgc_start_event.IsValid())
        {
            bgc_start_event.CloseEvent();
        }
    }

    return ret;
}

BOOL gc_heap::create_bgc_thread_support()
{
    uint8_t** parr;

    //needs to have room for enough smallest objects fitting on a page
    parr = new (nothrow) uint8_t*[1 + OS_PAGE_SIZE / MIN_OBJECT_SIZE];
    if (!parr)
    {
        return FALSE;
    }

    make_c_mark_list (parr);

    return TRUE;
}

int gc_heap::check_for_ephemeral_alloc()
{
    int gen = ((settings.reason == reason_oos_soh) ? (max_generation - 1) : -1);

    if (gen == -1)
    {
#ifdef MULTIPLE_HEAPS
        for (int heap_index = 0; heap_index < n_heaps; heap_index++)
#endif //MULTIPLE_HEAPS
        {
            for (int i = 0; i < max_generation; i++)
            {
#ifdef MULTIPLE_HEAPS
                if (g_heaps[heap_index]->get_new_allocation (i) <= 0)
#else
                if (get_new_allocation (i) <= 0)
#endif //MULTIPLE_HEAPS
                {
                    gen = max (gen, i);
                }
                else
                    break;
            }
        }
    }

    return gen;
}

// Wait for gc to finish sequential part
void gc_heap::wait_to_proceed()
{
    assert (background_gc_done_event.IsValid());
    assert (bgc_start_event.IsValid());

    user_thread_wait(&ee_proceed_event, FALSE);
}

// Start a new concurrent gc
void gc_heap::start_c_gc()
{
    assert (background_gc_done_event.IsValid());
    assert (bgc_start_event.IsValid());

//Need to make sure that the gc thread is in the right place.
    background_gc_done_event.Wait(INFINITE, FALSE);
    background_gc_done_event.Reset();
    bgc_start_event.Set();
}

void gc_heap::do_background_gc()
{
    dprintf (2, ("starting a BGC"));
#ifdef MULTIPLE_HEAPS
    for (int i = 0; i < n_heaps; i++)
    {
        g_heaps[i]->init_background_gc();
    }
#else
    init_background_gc();
#endif //MULTIPLE_HEAPS

#ifdef BGC_SERVO_TUNING
    bgc_tuning::record_bgc_start();
#endif //BGC_SERVO_TUNING

    //start the background gc
    start_c_gc ();

    //wait until we get restarted by the BGC.
    wait_to_proceed();
}

void gc_heap::kill_gc_thread()
{
    //assert (settings.concurrent == FALSE);

    // We are doing a two-stage shutdown now.
    // In the first stage, we do minimum work, and call ExitProcess at the end.
    // In the secodn stage, we have the Loader lock and only one thread is
    // alive.  Hence we do not need to kill gc thread.
    background_gc_done_event.CloseEvent();
    bgc_start_event.CloseEvent();
    bgc_threads_timeout_cs.Destroy();
    bgc_thread = 0;
}

void gc_heap::bgc_thread_function()
{
    assert (background_gc_done_event.IsValid());
    assert (bgc_start_event.IsValid());

    dprintf (3, ("gc_thread thread starting..."));

    BOOL do_exit = FALSE;

    bool cooperative_mode = true;
    bgc_thread_id.SetToCurrentThread();
    dprintf (1, ("bgc_thread_id is set to %x", (uint32_t)GCToOSInterface::GetCurrentThreadIdForLogging()));
    while (1)
    {
        // Wait for work to do...
        dprintf (6666, ("h%d bgc thread: waiting...", heap_number));

        cooperative_mode = enable_preemptive ();
        //current_thread->m_fPreemptiveGCDisabled = 0;

        uint32_t result = bgc_start_event.Wait(
#ifdef _DEBUG
#ifdef MULTIPLE_HEAPS
                                             INFINITE,
#else
                                             2000,
#endif //MULTIPLE_HEAPS
#else //_DEBUG
#ifdef MULTIPLE_HEAPS
                                             INFINITE,
#else
                                             20000,
#endif //MULTIPLE_HEAPS
#endif //_DEBUG
            FALSE);
        dprintf (2, ("gc thread: finished waiting"));

        // not calling disable_preemptive here 'cause we
        // can't wait for GC complete here - RestartEE will be called
        // when we've done the init work.

        if (result == WAIT_TIMEOUT)
        {
            // Should join the bgc threads and terminate all of them
            // at once.
            dprintf (1, ("GC thread timeout"));
            bgc_threads_timeout_cs.Enter();
            if (!keep_bgc_threads_p)
            {
                dprintf (2, ("GC thread exiting"));
                bgc_thread_running = FALSE;
                bgc_thread = 0;
                bgc_thread_id.Clear();
                do_exit = TRUE;
            }
            bgc_threads_timeout_cs.Leave();
            if (do_exit)
                break;
            else
            {
                dprintf (3, ("GC thread needed, not exiting"));
                continue;
            }
        }

#ifdef STRESS_DYNAMIC_HEAP_COUNT
        if (n_heaps <= heap_number)
        {
            uint32_t delay_ms = (uint32_t)gc_rand::get_rand (200);
            GCToOSInterface::Sleep (delay_ms);
        }
#endif //STRESS_DYNAMIC_HEAP_COUNT

        // if we signal the thread with no concurrent work to do -> exit
        if (!settings.concurrent)
        {
            dprintf (6666, ("h%d no concurrent GC needed, exiting", heap_number));

#if defined(TRACE_GC) && defined(SIMPLE_DPRINTF) && defined(STRESS_DYNAMIC_HEAP_COUNT)
            flush_gc_log (true);
            GCToOSInterface::DebugBreak();
#endif
            break;
        }

#ifdef DYNAMIC_HEAP_COUNT
        if (n_heaps <= heap_number)
        {
            Interlocked::Increment (&dynamic_heap_count_data.idle_bgc_thread_count);
            add_to_bgc_hc_history (hc_record_bgc_inactive);

            // this is the case where we have more background GC threads than heaps
            // - wait until we're told to continue...
            dprintf (6666, ("BGC%Id h%d going idle (%d heaps), idle count is now %d",
                VolatileLoadWithoutBarrier (&settings.gc_index), heap_number, n_heaps, VolatileLoadWithoutBarrier (&dynamic_heap_count_data.idle_bgc_thread_count)));
            bgc_idle_thread_event.Wait(INFINITE, FALSE);
            dprintf (6666, ("BGC%Id h%d woke from idle (%d heaps), idle count is now %d",
                VolatileLoadWithoutBarrier (&settings.gc_index), heap_number, n_heaps, VolatileLoadWithoutBarrier (&dynamic_heap_count_data.idle_bgc_thread_count)));
            continue;
        }
        else
        {
            if (heap_number == 0)
            {
                const int spin_count = 1024;
                int idle_bgc_thread_count = total_bgc_threads - n_heaps;
                dprintf (6666, ("n_heaps %d, total %d bgc threads, bgc idle should be %d and is %d",
                    n_heaps, total_bgc_threads, idle_bgc_thread_count, VolatileLoadWithoutBarrier (&dynamic_heap_count_data.idle_bgc_thread_count)));
                if (idle_bgc_thread_count != dynamic_heap_count_data.idle_bgc_thread_count)
                {
                    dprintf (6666, ("current idle is %d, trying to get to %d",
                        VolatileLoadWithoutBarrier (&dynamic_heap_count_data.idle_bgc_thread_count), idle_bgc_thread_count));
                    spin_and_wait (spin_count, (idle_bgc_thread_count == dynamic_heap_count_data.idle_bgc_thread_count));
                }
            }

            add_to_bgc_hc_history (hc_record_bgc_active);
        }
#endif //DYNAMIC_HEAP_COUNT

        if (heap_number == 0)
        {
            gc_background_running = TRUE;
            dprintf (6666, (ThreadStressLog::gcStartBgcThread(), heap_number,
                generation_free_list_space (generation_of (max_generation)),
                generation_free_obj_space (generation_of (max_generation)),
                dd_fragmentation (dynamic_data_of (max_generation))));
        }

        gc1();

#ifndef DOUBLY_LINKED_FL
        current_bgc_state = bgc_not_in_process;
#endif //!DOUBLY_LINKED_FL

        enable_preemptive ();
#ifdef MULTIPLE_HEAPS
        bgc_t_join.join(this, gc_join_done);
        if (bgc_t_join.joined())
#endif //MULTIPLE_HEAPS
        {
            enter_spin_lock (&gc_lock);
            dprintf (SPINLOCK_LOG, ("bgc Egc"));

            bgc_start_event.Reset();
            do_post_gc();
#ifdef MULTIPLE_HEAPS
            for (int gen = max_generation; gen < total_generation_count; gen++)
            {
                size_t desired_per_heap = 0;
                size_t total_desired = 0;
                gc_heap* hp = 0;
                dynamic_data* dd;
                for (int i = 0; i < n_heaps; i++)
                {
                    hp = g_heaps[i];
                    dd = hp->dynamic_data_of (gen);
                    size_t temp_total_desired = total_desired + dd_desired_allocation (dd);
                    if (temp_total_desired < total_desired)
                    {
                        // we overflowed.
                        total_desired = (size_t)MAX_PTR;
                        break;
                    }
                    total_desired = temp_total_desired;
                }

                desired_per_heap = Align ((total_desired/n_heaps), get_alignment_constant (FALSE));

                if (gen >= loh_generation)
                {
                    desired_per_heap = exponential_smoothing (gen, dd_collection_count (dynamic_data_of (max_generation)), desired_per_heap);
                }

                for (int i = 0; i < n_heaps; i++)
                {
                    hp = gc_heap::g_heaps[i];
                    dd = hp->dynamic_data_of (gen);
                    dd_desired_allocation (dd) = desired_per_heap;
                    dd_gc_new_allocation (dd) = desired_per_heap;
                    dd_new_allocation (dd) = desired_per_heap;
                }
            }

            fire_pevents();
#endif //MULTIPLE_HEAPS

#ifdef DYNAMIC_HEAP_COUNT
            if (trigger_bgc_for_rethreading_p)
            {
                trigger_bgc_for_rethreading_p = false;
            }
#endif //DYNAMIC_HEAP_COUNT

            c_write (settings.concurrent, FALSE);
            gc_background_running = FALSE;
            keep_bgc_threads_p = FALSE;
            background_gc_done_event.Set();

            dprintf (SPINLOCK_LOG, ("bgc Lgc"));
            leave_spin_lock (&gc_lock);
#ifdef MULTIPLE_HEAPS
            dprintf(1, ("End of BGC"));
            bgc_t_join.restart();
#endif //MULTIPLE_HEAPS
        }
        // We can't disable preempt here because there might've been a GC already
        // started and decided to do a BGC and waiting for a BGC thread to restart
        // vm. That GC will be waiting in wait_to_proceed and we are waiting for it
        // to restart the VM so we deadlock.
        //gc_heap::disable_preemptive (true);
    }

    FIRE_EVENT(GCTerminateConcurrentThread_V1);

    dprintf (3, ("bgc_thread thread exiting"));
    return;
}

#ifdef BGC_SERVO_TUNING
bool gc_heap::bgc_tuning::stepping_trigger (uint32_t current_memory_load, size_t current_gen2_count)
{
    if (!bgc_tuning::enable_fl_tuning)
    {
        return false;
    }

    bool stepping_trigger_p = false;
    if (use_stepping_trigger_p)
    {
        dprintf (BGC_TUNING_LOG, ("current ml: %d, goal: %d",
            current_memory_load, memory_load_goal));
        // We don't go all the way up to mem goal because if we do we could end up with every
        // BGC being triggered by stepping all the way up to goal, and when we actually reach
        // goal we have no time to react 'cause the next BGC could already be over goal.
        if ((current_memory_load <= (memory_load_goal * 2 / 3)) ||
            ((memory_load_goal > current_memory_load) &&
             ((memory_load_goal - current_memory_load) > (stepping_interval * 3))))
        {
            int memory_load_delta = (int)current_memory_load - (int)last_stepping_mem_load;
            if (memory_load_delta >= (int)stepping_interval)
            {
                stepping_trigger_p = (current_gen2_count == last_stepping_bgc_count);
                if (stepping_trigger_p)
                {
                    current_gen2_count++;
                }

                dprintf (BGC_TUNING_LOG, ("current ml: %u - %u = %d (>= %u), gen2 count: %zu->%zu, stepping trigger: %s ",
                    current_memory_load, last_stepping_mem_load, memory_load_delta, stepping_interval,
                    last_stepping_bgc_count, current_gen2_count,
                    (stepping_trigger_p ? "yes" : "no")));
                last_stepping_mem_load = current_memory_load;
                last_stepping_bgc_count = current_gen2_count;
            }
        }
        else
        {
            use_stepping_trigger_p = false;
        }
    }

    return stepping_trigger_p;
}

// Note that I am doing this per heap but as we are in this calculation other
// heaps could increase their fl alloc. We are okay with that inaccurancy.
bool gc_heap::bgc_tuning::should_trigger_bgc_loh()
{
    if (fl_tuning_triggered)
    {
#ifdef MULTIPLE_HEAPS
        gc_heap* hp = g_heaps[0];
#else
        gc_heap* hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS

        if (!(gc_heap::background_running_p()))
        {
            size_t current_alloc = get_total_servo_alloc (loh_generation);
            tuning_calculation* current_gen_calc = &gen_calc[loh_generation - max_generation];

            if (current_alloc < current_gen_calc->last_bgc_end_alloc)
            {
                dprintf (BGC_TUNING_LOG, ("BTL: current alloc: %zd, last alloc: %zd?",
                    current_alloc, current_gen_calc->last_bgc_end_alloc));
            }

            bool trigger_p = ((current_alloc - current_gen_calc->last_bgc_end_alloc) >= current_gen_calc->alloc_to_trigger);
            dprintf (2, ("BTL3: LOH a %zd, la: %zd(%zd), %zd",
                    current_alloc, current_gen_calc->last_bgc_end_alloc,
                    (current_alloc - current_gen_calc->last_bgc_end_alloc),
                    current_gen_calc->alloc_to_trigger));

            if (trigger_p)
            {
                dprintf (BGC_TUNING_LOG, ("BTL3: LOH detected (%zd - %zd) >= %zd, TRIGGER",
                        current_alloc, current_gen_calc->last_bgc_end_alloc, current_gen_calc->alloc_to_trigger));
                return true;
            }
        }
    }

    return false;
}

bool gc_heap::bgc_tuning::should_trigger_bgc()
{
    if (!bgc_tuning::enable_fl_tuning || gc_heap::background_running_p())
    {
        return false;
    }

    if (settings.reason == reason_bgc_tuning_loh)
    {
        // TODO: this should be an assert because if the reason was reason_bgc_tuning_loh,
        // we should have already set to condemn max_generation but I'm keeping it
        // for now in case we are reverting it for other reasons.
        bgc_tuning::next_bgc_p = true;
        dprintf (BGC_TUNING_LOG, ("BTL LOH triggered"));
        return true;
    }

    if (!bgc_tuning::next_bgc_p &&
        !fl_tuning_triggered &&
        (gc_heap::settings.entry_memory_load >= (memory_load_goal * 2 / 3)) &&
        (gc_heap::full_gc_counts[gc_type_background] >= 2))
    {
        next_bgc_p = true;

        gen_calc[0].first_alloc_to_trigger = gc_heap::get_total_servo_alloc (max_generation);
        gen_calc[1].first_alloc_to_trigger = gc_heap::get_total_servo_alloc (loh_generation);
        dprintf (BGC_TUNING_LOG, ("BTL[GTC] mem high enough: %d(goal: %d), %zd BGCs done, g2a=%zd, g3a=%zd, trigger FL tuning!",
            gc_heap::settings.entry_memory_load, memory_load_goal,
            gc_heap::full_gc_counts[gc_type_background],
            gen_calc[0].first_alloc_to_trigger,
            gen_calc[1].first_alloc_to_trigger));
    }

    if (bgc_tuning::next_bgc_p)
    {
        dprintf (BGC_TUNING_LOG, ("BTL started FL tuning"));
        return true;
    }

    if (!fl_tuning_triggered)
    {
        return false;
    }

    // If the tuning started, we need to check if we've exceeded the alloc.
    int index = 0;
    bgc_tuning::tuning_calculation* current_gen_calc = 0;

    index = 0;
    current_gen_calc = &bgc_tuning::gen_calc[index];

#ifdef MULTIPLE_HEAPS
    gc_heap* hp = g_heaps[0];
#else
    gc_heap* hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS

    size_t current_gen1_index = dd_collection_count (hp->dynamic_data_of (max_generation - 1));
    size_t gen1_so_far = current_gen1_index - gen1_index_last_bgc_end;

    if (current_gen_calc->alloc_to_trigger > 0)
    {
        // We are specifically checking for gen2 here. LOH is covered by should_trigger_bgc_loh.
        size_t current_alloc = get_total_servo_alloc (max_generation);
        if ((current_alloc - current_gen_calc->last_bgc_end_alloc) >= current_gen_calc->alloc_to_trigger)
        {
            dprintf (BGC_TUNING_LOG, ("BTL2: SOH detected (%zd - %zd) >= %zd, TRIGGER",
                    current_alloc, current_gen_calc->last_bgc_end_alloc, current_gen_calc->alloc_to_trigger));
            settings.reason = reason_bgc_tuning_soh;
            return true;
        }
    }

    return false;
}

bool gc_heap::bgc_tuning::should_delay_alloc (int gen_number)
{
    if ((gen_number != max_generation) || !bgc_tuning::enable_fl_tuning)
        return false;

    if (current_c_gc_state == c_gc_state_planning)
    {
        int i = 0;
#ifdef MULTIPLE_HEAPS
        for (; i < gc_heap::n_heaps; i++)
        {
            gc_heap* hp = gc_heap::g_heaps[i];
            size_t current_fl_size = generation_free_list_space (hp->generation_of (max_generation));
            size_t last_bgc_fl_size = hp->bgc_maxgen_end_fl_size;
#else
        {
            size_t current_fl_size = generation_free_list_space (generation_of (max_generation));
            size_t last_bgc_fl_size = bgc_maxgen_end_fl_size;
#endif //MULTIPLE_HEAPS

            if (last_bgc_fl_size)
            {
                float current_flr = (float) current_fl_size / (float)last_bgc_fl_size;
                if (current_flr < 0.4)
                {
                    dprintf (BGC_TUNING_LOG, ("BTL%d h%d last fl %zd, curr fl %zd (%.3f) d1",
                            gen_number, i, last_bgc_fl_size, current_fl_size, current_flr));
                    return true;
                }
            }
        }
    }

    return false;
}

void gc_heap::bgc_tuning::update_bgc_start (int gen_number, size_t num_gen1s_since_end)
{
    int tuning_data_index = gen_number - max_generation;
    tuning_calculation* current_gen_calc = &gen_calc[tuning_data_index];
    tuning_stats* current_gen_stats = &gen_stats[tuning_data_index];

    size_t total_generation_size = get_total_generation_size (gen_number);
    ptrdiff_t current_bgc_fl_size = get_total_generation_fl_size (gen_number);

    double physical_gen_flr = (double)current_bgc_fl_size * 100.0 / (double)total_generation_size;

    ptrdiff_t artificial_additional_fl = 0;

    if (fl_tuning_triggered)
    {
        artificial_additional_fl = ((current_gen_calc->end_gen_size_goal > total_generation_size) ? (current_gen_calc->end_gen_size_goal - total_generation_size) : 0);
        total_generation_size += artificial_additional_fl;
        current_bgc_fl_size += artificial_additional_fl;
    }

    current_gen_calc->current_bgc_start_flr = (double)current_bgc_fl_size * 100.0 / (double)total_generation_size;

    size_t current_alloc = get_total_servo_alloc (gen_number);
    dprintf (BGC_TUNING_LOG, ("BTL%d: st a: %zd, la: %zd",
        gen_number, current_alloc, current_gen_stats->last_alloc));
    current_gen_stats->last_alloc_end_to_start = current_alloc - current_gen_stats->last_alloc;
    current_gen_stats->last_alloc = current_alloc;

    current_gen_calc->actual_alloc_to_trigger = current_alloc - current_gen_calc->last_bgc_end_alloc;

    dprintf (BGC_TUNING_LOG, ("BTL%d: st: %zd g1s (%zd->%zd/gen1) since end, flr: %.3f(afl: %zd, %.3f)",
             gen_number, actual_num_gen1s_to_trigger,
             current_gen_stats->last_alloc_end_to_start,
             (num_gen1s_since_end ? (current_gen_stats->last_alloc_end_to_start / num_gen1s_since_end) : 0),
             current_gen_calc->current_bgc_start_flr, artificial_additional_fl, physical_gen_flr));
}

void gc_heap::bgc_tuning::record_bgc_start()
{
    if (!bgc_tuning::enable_fl_tuning)
        return;

    uint64_t elapsed_time_so_far = GetHighPrecisionTimeStamp() - process_start_time;

    // Note that younger gen's collection count is always updated with older gen's collections.
    // So to calcuate the actual # of gen1 occurred we really should take the # of gen2s into
    // account (and deduct from gen1's collection count). But right now I am using it for stats.
    size_t current_gen1_index = get_current_gc_index (max_generation - 1);

    dprintf (BGC_TUNING_LOG, ("BTL: g2t[st][g1 %zd]: %0.3f minutes",
        current_gen1_index,
        (double)elapsed_time_so_far / (double)1000000 / (double)60));

    actual_num_gen1s_to_trigger = current_gen1_index - gen1_index_last_bgc_end;
    gen1_index_last_bgc_start = current_gen1_index;

    update_bgc_start (max_generation, actual_num_gen1s_to_trigger);
    update_bgc_start (loh_generation, actual_num_gen1s_to_trigger);
}

double convert_range (double lower, double upper, double num, double percentage)
{
    double d = num - lower;
    if (d < 0.0)
        return 0.0;
    else
    {
        d = min ((upper - lower), d);
        return (d * percentage);
    }
}

double calculate_gradual_d (double delta_double, double step)
{
    bool changed_sign = false;
    if (delta_double < 0.0)
    {
        delta_double = -delta_double;
        changed_sign = true;
    }
    double res = 0;
    double current_lower_limit = 0;
    double current_ratio = 1.0;
    // Given a step, we will gradually reduce the weight of the portion
    // in each step.
    // We reduce by *0.6 each time so there will be 3 iterations:
    // 1->0.6->0.36 (next one would be 0.216 and terminate the loop)
    // This will produce a result that's between 0 and 0.098.
    while (current_ratio > 0.22)
    {
        res += convert_range (current_lower_limit, (current_lower_limit + step), delta_double, current_ratio);
        current_lower_limit += step;
        current_ratio *= 0.6;
    }

    if (changed_sign)
        res = -res;

    return res;
}

void gc_heap::bgc_tuning::update_bgc_sweep_start (int gen_number, size_t num_gen1s_since_start)
{
    int tuning_data_index = gen_number - max_generation;
    tuning_calculation* current_gen_calc = &gen_calc[tuning_data_index];
    tuning_stats* current_gen_stats = &gen_stats[tuning_data_index];

    size_t total_generation_size = 0;
    ptrdiff_t current_bgc_fl_size = 0;

    total_generation_size = get_total_generation_size (gen_number);
    current_bgc_fl_size = get_total_generation_fl_size (gen_number);

    double physical_gen_flr = (double)current_bgc_fl_size * 100.0 / (double)total_generation_size;

    ptrdiff_t artificial_additional_fl = 0;
    if (fl_tuning_triggered)
    {
        artificial_additional_fl = ((current_gen_calc->end_gen_size_goal > total_generation_size) ? (current_gen_calc->end_gen_size_goal - total_generation_size) : 0);
        total_generation_size += artificial_additional_fl;
        current_bgc_fl_size += artificial_additional_fl;
    }

    current_gen_calc->current_bgc_sweep_flr = (double)current_bgc_fl_size * 100.0 / (double)total_generation_size;

    size_t current_alloc = get_total_servo_alloc (gen_number);
    dprintf (BGC_TUNING_LOG, ("BTL%d: sw a: %zd, la: %zd",
        gen_number, current_alloc, current_gen_stats->last_alloc));
    current_gen_stats->last_alloc_start_to_sweep = current_alloc - current_gen_stats->last_alloc;
    // We are resetting gen2 alloc at sweep start.
    current_gen_stats->last_alloc = 0;

#ifdef SIMPLE_DPRINTF
    dprintf (BGC_TUNING_LOG, ("BTL%d: sflr: %.3f%%->%.3f%% (%zd->%zd, %zd->%zd) (%zd:%zd-%zd/gen1) since start (afl: %zd, %.3f)",
             gen_number,
             current_gen_calc->last_bgc_flr, current_gen_calc->current_bgc_sweep_flr,
             current_gen_calc->last_bgc_size, total_generation_size,
             current_gen_stats->last_bgc_fl_size, current_bgc_fl_size,
             num_gen1s_since_start, current_gen_stats->last_alloc_start_to_sweep,
             (num_gen1s_since_start? (current_gen_stats->last_alloc_start_to_sweep / num_gen1s_since_start) : 0),
             artificial_additional_fl, physical_gen_flr));
#endif //SIMPLE_DPRINTF
}

void gc_heap::bgc_tuning::record_bgc_sweep_start()
{
    if (!bgc_tuning::enable_fl_tuning)
        return;

    size_t current_gen1_index = get_current_gc_index (max_generation - 1);
    size_t num_gen1s_since_start = current_gen1_index - gen1_index_last_bgc_start;
    gen1_index_last_bgc_sweep = current_gen1_index;

    uint64_t elapsed_time_so_far = GetHighPrecisionTimeStamp() - process_start_time;
    dprintf (BGC_TUNING_LOG, ("BTL: g2t[sw][g1 %zd]: %0.3f minutes",
        current_gen1_index,
        (double)elapsed_time_so_far / (double)1000000 / (double)60));

    update_bgc_sweep_start (max_generation, num_gen1s_since_start);
    update_bgc_sweep_start (loh_generation, num_gen1s_since_start);
}

void gc_heap::bgc_tuning::calculate_tuning (int gen_number, bool use_this_loop_p)
{
    BOOL use_kd_p = enable_kd;
    BOOL use_ki_p = enable_ki;
    BOOL use_smooth_p = enable_smooth;
    BOOL use_tbh_p = enable_tbh;
    BOOL use_ff_p = enable_ff;

    int tuning_data_index = gen_number - max_generation;
    tuning_calculation* current_gen_calc = &gen_calc[tuning_data_index];
    tuning_stats* current_gen_stats = &gen_stats[tuning_data_index];
    bgc_size_data* data = &current_bgc_end_data[tuning_data_index];

    size_t total_generation_size = data->gen_size;
    size_t current_bgc_fl = data->gen_fl_size;

    size_t current_bgc_surv_size = get_total_surv_size (gen_number);
    size_t current_bgc_begin_data_size = get_total_begin_data_size (gen_number);

    // This is usually 0 unless a GC happened where we joined at the end of sweep
    size_t current_alloc = get_total_servo_alloc (gen_number);
    //dprintf (BGC_TUNING_LOG, ("BTL%d: current fl alloc: %zd, last recorded alloc: %zd, last_bgc_end_alloc: %zd",
    dprintf (BGC_TUNING_LOG, ("BTL%d: en a: %zd, la: %zd, lbgca: %zd",
        gen_number, current_alloc, current_gen_stats->last_alloc, current_gen_calc->last_bgc_end_alloc));

    double current_bgc_surv_rate = (current_bgc_begin_data_size == 0) ?
                                    0 : ((double)current_bgc_surv_size * 100.0 / (double)current_bgc_begin_data_size);

    current_gen_stats->last_alloc_sweep_to_end = current_alloc - current_gen_stats->last_alloc;

    size_t gen1_index = get_current_gc_index (max_generation - 1);
    size_t gen2_index = get_current_gc_index (max_generation);

    size_t num_gen1s_since_sweep = gen1_index - gen1_index_last_bgc_sweep;
    size_t num_gen1s_bgc_end = gen1_index - gen1_index_last_bgc_end;

    size_t gen_end_size_goal = current_gen_calc->end_gen_size_goal;
    double gen_sweep_flr_goal = current_gen_calc->sweep_flr_goal;
    size_t last_gen_alloc_to_trigger = current_gen_calc->alloc_to_trigger;
    size_t gen_actual_alloc_to_trigger = current_gen_calc->actual_alloc_to_trigger;
    size_t last_gen_alloc_to_trigger_0 = current_gen_calc->alloc_to_trigger_0;

    double current_end_to_sweep_flr = current_gen_calc->last_bgc_flr - current_gen_calc->current_bgc_sweep_flr;
    bool current_sweep_above_p = (current_gen_calc->current_bgc_sweep_flr > gen_sweep_flr_goal);

#ifdef SIMPLE_DPRINTF
    dprintf (BGC_TUNING_LOG, ("BTL%d: sflr: c %.3f (%s), p %s, palloc: %zd, aalloc %zd(%s)",
        gen_number,
        current_gen_calc->current_bgc_sweep_flr,
        (current_sweep_above_p ? "above" : "below"),
        (current_gen_calc->last_sweep_above_p ? "above" : "below"),
        last_gen_alloc_to_trigger,
        current_gen_calc->actual_alloc_to_trigger,
        (use_this_loop_p ? "this" : "last")));

    dprintf (BGC_TUNING_LOG, ("BTL%d-en[g1: %zd, g2: %zd]: end fl: %zd (%zd: S-%zd, %.3f%%->%.3f%%)",
            gen_number,
            gen1_index, gen2_index, current_bgc_fl,
            total_generation_size, current_bgc_surv_size,
            current_gen_stats->last_bgc_surv_rate, current_bgc_surv_rate));

    dprintf (BGC_TUNING_LOG, ("BTLS%d sflr: %.3f, end-start: %zd(%zd), start-sweep: %zd(%zd), sweep-end: %zd(%zd)",
            gen_number,
            current_gen_calc->current_bgc_sweep_flr,
            (gen1_index_last_bgc_start - gen1_index_last_bgc_end), current_gen_stats->last_alloc_end_to_start,
            (gen1_index_last_bgc_sweep - gen1_index_last_bgc_start), current_gen_stats->last_alloc_start_to_sweep,
            num_gen1s_since_sweep, current_gen_stats->last_alloc_sweep_to_end));
#endif //SIMPLE_DPRINTF

    size_t saved_alloc_to_trigger = 0;

    // during our calculation alloc can be negative so use double here.
    double current_alloc_to_trigger = 0.0;

    if (!fl_tuning_triggered && use_tbh_p)
    {
        current_gen_calc->alloc_to_trigger_0 = current_gen_calc->actual_alloc_to_trigger;
        dprintf (BGC_TUNING_LOG, ("BTL%d[g1: %zd]: not in FL tuning yet, setting alloc_to_trigger_0 to %zd",
                 gen_number,
                 gen1_index, current_gen_calc->alloc_to_trigger_0));
    }

    if (fl_tuning_triggered)
    {
        BOOL tuning_kd_finished_p = FALSE;

        // We shouldn't have an alloc_to_trigger that's > what's consumed before sweep happens.
        double max_alloc_to_trigger = ((double)current_bgc_fl * (100 - gen_sweep_flr_goal) / 100.0);
        double min_alloc_to_trigger = (double)current_bgc_fl * 0.05;

        {
            if (current_gen_calc->current_bgc_sweep_flr < 0.0)
            {
                dprintf (BGC_TUNING_LOG, ("BTL%d: sflr is %.3f!!! < 0, make it 0", gen_number, current_gen_calc->current_bgc_sweep_flr));
                current_gen_calc->current_bgc_sweep_flr = 0.0;
            }

            double adjusted_above_goal_kp = above_goal_kp;
            double above_goal_distance = current_gen_calc->current_bgc_sweep_flr - gen_sweep_flr_goal;
            if (use_ki_p)
            {
                if (current_gen_calc->above_goal_accu_error > max_alloc_to_trigger)
                {
                    dprintf (BGC_TUNING_LOG, ("g%d: ae TB! %.1f->%.1f", gen_number, current_gen_calc->above_goal_accu_error, max_alloc_to_trigger));
                }
                else if (current_gen_calc->above_goal_accu_error < min_alloc_to_trigger)
                {
                    dprintf (BGC_TUNING_LOG, ("g%d: ae TS! %.1f->%.1f", gen_number, current_gen_calc->above_goal_accu_error, min_alloc_to_trigger));
                }

                current_gen_calc->above_goal_accu_error = min (max_alloc_to_trigger, current_gen_calc->above_goal_accu_error);
                current_gen_calc->above_goal_accu_error = max (min_alloc_to_trigger, current_gen_calc->above_goal_accu_error);

                double above_goal_ki_gain = above_goal_ki * above_goal_distance * current_bgc_fl;
                double temp_accu_error = current_gen_calc->above_goal_accu_error + above_goal_ki_gain;
                // anti-windup
                if ((temp_accu_error > min_alloc_to_trigger) &&
                    (temp_accu_error < max_alloc_to_trigger))
                {
                    current_gen_calc->above_goal_accu_error = temp_accu_error;
                }
                else
                {
                    //dprintf (BGC_TUNING_LOG, ("alloc accu err + %.1f=%.1f, exc",
                    dprintf (BGC_TUNING_LOG, ("g%d: aae + %.1f=%.1f, exc", gen_number,
                            above_goal_ki_gain,
                            temp_accu_error));
                }
            }

            // First we do the PI loop.
            {
                saved_alloc_to_trigger = current_gen_calc->alloc_to_trigger;
                current_alloc_to_trigger = adjusted_above_goal_kp * above_goal_distance * current_bgc_fl;
                // la is last alloc_to_trigger, +%zd is the diff between la and the new alloc.
                // laa is the last actual alloc (gen_actual_alloc_to_trigger), +%zd is the diff between la and laa.
                dprintf (BGC_TUNING_LOG, ("BTL%d: sflr %.3f above * %.4f * %zd = %zd bytes in alloc, la: %zd(+%zd), laa: %zd(+%zd)",
                        gen_number,
                        (current_gen_calc->current_bgc_sweep_flr - (double)gen_sweep_flr_goal),
                        adjusted_above_goal_kp,
                        current_bgc_fl,
                        (size_t)current_alloc_to_trigger,
                        saved_alloc_to_trigger,
                        (size_t)(current_alloc_to_trigger - (double)saved_alloc_to_trigger),
                        gen_actual_alloc_to_trigger,
                        (gen_actual_alloc_to_trigger - saved_alloc_to_trigger)));

                if (use_ki_p)
                {
                    current_alloc_to_trigger += current_gen_calc->above_goal_accu_error;
                    dprintf (BGC_TUNING_LOG, ("BTL%d: +accu err %zd=%zd",
                            gen_number,
                            (size_t)(current_gen_calc->above_goal_accu_error),
                            (size_t)current_alloc_to_trigger));
                }
            }

            if (use_tbh_p)
            {
                if (current_gen_calc->last_sweep_above_p != current_sweep_above_p)
                {
                    size_t new_alloc_to_trigger_0 = (last_gen_alloc_to_trigger + last_gen_alloc_to_trigger_0) / 2;
                    dprintf (BGC_TUNING_LOG, ("BTL%d: tbh crossed SP, setting both to %zd", gen_number, new_alloc_to_trigger_0));
                    current_gen_calc->alloc_to_trigger_0 = new_alloc_to_trigger_0;
                    current_gen_calc->alloc_to_trigger = new_alloc_to_trigger_0;
                }

                tuning_kd_finished_p = TRUE;
            }
        }

        if (!tuning_kd_finished_p)
        {
            if (use_kd_p)
            {
                saved_alloc_to_trigger = last_gen_alloc_to_trigger;
                size_t alloc_delta = saved_alloc_to_trigger - gen_actual_alloc_to_trigger;
                double adjust_ratio = (double)alloc_delta / (double)gen_actual_alloc_to_trigger;
                double saved_adjust_ratio = adjust_ratio;
                if (enable_gradual_d)
                {
                    adjust_ratio = calculate_gradual_d (adjust_ratio, above_goal_kd);
                    dprintf (BGC_TUNING_LOG, ("BTL%d: gradual kd - reduced from %.3f to %.3f",
                            gen_number, saved_adjust_ratio, adjust_ratio));
                }
                else
                {
                    double kd = above_goal_kd;
                    double neg_kd = 0 - kd;
                    if (adjust_ratio > kd) adjust_ratio = kd;
                    if (adjust_ratio < neg_kd) adjust_ratio = neg_kd;
                    dprintf (BGC_TUNING_LOG, ("BTL%d: kd - reduced from %.3f to %.3f",
                            gen_number, saved_adjust_ratio, adjust_ratio));
                }

                current_gen_calc->alloc_to_trigger = (size_t)((double)gen_actual_alloc_to_trigger * (1 + adjust_ratio));

                dprintf (BGC_TUNING_LOG, ("BTL%d: kd %.3f, reduced it to %.3f * %zd, adjust %zd->%zd",
                        gen_number, saved_adjust_ratio,
                        adjust_ratio, gen_actual_alloc_to_trigger,
                        saved_alloc_to_trigger, current_gen_calc->alloc_to_trigger));
            }

            if (use_smooth_p && use_this_loop_p)
            {
                saved_alloc_to_trigger = current_gen_calc->alloc_to_trigger;
                size_t gen_smoothed_alloc_to_trigger = current_gen_calc->smoothed_alloc_to_trigger;
                double current_num_gen1s_smooth_factor = (num_gen1s_smooth_factor > (double)num_bgcs_since_tuning_trigger) ?
                                                        (double)num_bgcs_since_tuning_trigger : num_gen1s_smooth_factor;
                current_gen_calc->smoothed_alloc_to_trigger = (size_t)((double)saved_alloc_to_trigger / current_num_gen1s_smooth_factor +
                    ((double)gen_smoothed_alloc_to_trigger / current_num_gen1s_smooth_factor) * (current_num_gen1s_smooth_factor - 1.0));

                dprintf (BGC_TUNING_LOG, ("BTL%d: smoothed %zd / %.3f + %zd / %.3f * %.3f adjust %zd->%zd",
                    gen_number, saved_alloc_to_trigger, current_num_gen1s_smooth_factor,
                    gen_smoothed_alloc_to_trigger, current_num_gen1s_smooth_factor,
                    (current_num_gen1s_smooth_factor - 1.0),
                    saved_alloc_to_trigger, current_gen_calc->smoothed_alloc_to_trigger));
                current_gen_calc->alloc_to_trigger = current_gen_calc->smoothed_alloc_to_trigger;
            }
        }

        if (use_ff_p)
        {
            double next_end_to_sweep_flr = data->gen_flr - gen_sweep_flr_goal;

            if (next_end_to_sweep_flr > 0.0)
            {
                saved_alloc_to_trigger = current_gen_calc->alloc_to_trigger;
                double ff_ratio = next_end_to_sweep_flr / current_end_to_sweep_flr - 1;

                if (use_this_loop_p)
                {
                    // if we adjust down we want ff to be bigger, so the alloc will be even smaller;
                    // if we adjust up want ff to be smaller, so the alloc will also be smaller;
                    // the idea is we want to be slower at increase than decrease
                    double ff_step = above_goal_ff * 0.5;
                    double adjusted_above_goal_ff = above_goal_ff;
                    if (ff_ratio > 0)
                        adjusted_above_goal_ff -= ff_step;
                    else
                        adjusted_above_goal_ff += ff_step;

                    double adjusted_ff_ratio = ff_ratio * adjusted_above_goal_ff;
                    current_gen_calc->alloc_to_trigger = saved_alloc_to_trigger + (size_t)((double)saved_alloc_to_trigger * adjusted_ff_ratio);
                    dprintf (BGC_TUNING_LOG, ("BTL%d: ff (%.3f / %.3f - 1) * %.3f = %.3f adjust %zd->%zd",
                        gen_number, next_end_to_sweep_flr, current_end_to_sweep_flr, adjusted_above_goal_ff, adjusted_ff_ratio,
                        saved_alloc_to_trigger, current_gen_calc->alloc_to_trigger));
                }
            }
        }

        if (use_this_loop_p)
        {
            // apply low/high caps.
            if (current_alloc_to_trigger > max_alloc_to_trigger)
            {
                dprintf (BGC_TUNING_LOG, ("BTL%d: TB! %.1f -> %.1f",
                    gen_number, current_alloc_to_trigger, max_alloc_to_trigger));
                current_alloc_to_trigger = max_alloc_to_trigger;
            }

            if (current_alloc_to_trigger < min_alloc_to_trigger)
            {
                dprintf (BGC_TUNING_LOG, ("BTL%d: TS! %zd -> %zd",
                        gen_number, (ptrdiff_t)current_alloc_to_trigger, (size_t)min_alloc_to_trigger));
                current_alloc_to_trigger = min_alloc_to_trigger;
            }

            current_gen_calc->alloc_to_trigger = (size_t)current_alloc_to_trigger;
        }
        else
        {
            // we can't do the above comparison - we could be in the situation where
            // we haven't done any alloc.
            dprintf (BGC_TUNING_LOG, ("BTL%d: ag, revert %zd->%zd",
                gen_number, current_gen_calc->alloc_to_trigger, last_gen_alloc_to_trigger));
            current_gen_calc->alloc_to_trigger = last_gen_alloc_to_trigger;
        }
    }

    // This is only executed once to get the tuning started.
    if (next_bgc_p)
    {
        size_t first_alloc = (size_t)((double)current_gen_calc->first_alloc_to_trigger * 0.75);
        // The initial conditions can be quite erratic so check to see if the first alloc we set was reasonable - take 5% of the FL
        size_t min_first_alloc = current_bgc_fl / 20;

        current_gen_calc->alloc_to_trigger = max (first_alloc, min_first_alloc);

        dprintf (BGC_TUNING_LOG, ("BTL%d[g1: %zd]: BGC end, trigger FL, set gen%d alloc to max (0.75 of first: %zd, 5%% fl: %zd), actual alloc: %zd",
            gen_number, gen1_index, gen_number,
            first_alloc, min_first_alloc,
            current_gen_calc->actual_alloc_to_trigger));
    }

    dprintf (BGC_TUNING_LOG, ("BTL%d* %zd, %.3f, %.3f, %.3f, %.3f, %.3f, %zd, %zd, %zd, %zd",
                              gen_number,
                              total_generation_size,
                              current_gen_calc->current_bgc_start_flr,
                              current_gen_calc->current_bgc_sweep_flr,
                              current_bgc_end_data[tuning_data_index].gen_flr,
                              current_gen_stats->last_gen_increase_flr,
                              current_bgc_surv_rate,
                              actual_num_gen1s_to_trigger,
                              num_gen1s_bgc_end,
                              gen_actual_alloc_to_trigger,
                              current_gen_calc->alloc_to_trigger));

    gen1_index_last_bgc_end = gen1_index;

    current_gen_calc->last_bgc_size = total_generation_size;
    current_gen_calc->last_bgc_flr = current_bgc_end_data[tuning_data_index].gen_flr;
    current_gen_calc->last_sweep_above_p = current_sweep_above_p;
    current_gen_calc->last_bgc_end_alloc = current_alloc;

    current_gen_stats->last_bgc_physical_size = data->gen_physical_size;
    current_gen_stats->last_alloc_end_to_start = 0;
    current_gen_stats->last_alloc_start_to_sweep = 0;
    current_gen_stats->last_alloc_sweep_to_end = 0;
    current_gen_stats->last_alloc = current_alloc;
    current_gen_stats->last_bgc_fl_size = current_bgc_end_data[tuning_data_index].gen_fl_size;
    current_gen_stats->last_bgc_surv_rate = current_bgc_surv_rate;
    current_gen_stats->last_gen_increase_flr = 0;
}

// Note that in this method for the !use_this_loop_p generation we will adjust
// its sweep_flr accordingly. And the inner loop will not need to know about this.
void gc_heap::bgc_tuning::init_bgc_end_data (int gen_number, bool use_this_loop_p)
{
    int index = gen_number - max_generation;
    bgc_size_data* data = &current_bgc_end_data[index];

    size_t physical_size = get_total_generation_size (gen_number);
    ptrdiff_t physical_fl_size = get_total_generation_fl_size (gen_number);
    data->gen_actual_phys_fl_size = physical_fl_size;

    if (fl_tuning_triggered && !use_this_loop_p)
    {
        tuning_calculation* current_gen_calc = &gen_calc[gen_number - max_generation];

        if (current_gen_calc->actual_alloc_to_trigger > current_gen_calc->alloc_to_trigger)
        {
            dprintf (BGC_TUNING_LOG, ("BTL%d: gen alloc also exceeded %zd (la: %zd), no action",
                gen_number, current_gen_calc->actual_alloc_to_trigger, current_gen_calc->alloc_to_trigger));
        }
        else
        {
            // We will deduct the missing portion from alloc to fl, simulating that we consumed it.
            size_t remaining_alloc = current_gen_calc->alloc_to_trigger -
                                     current_gen_calc->actual_alloc_to_trigger;

            // now re-calc current_bgc_sweep_flr
            // TODO: note that I am assuming the physical size at sweep was <= end_gen_size_goal which
            // not have been the case.
            size_t gen_size = current_gen_calc->end_gen_size_goal;
            double sweep_flr = current_gen_calc->current_bgc_sweep_flr;
            size_t sweep_fl_size = (size_t)((double)gen_size * sweep_flr / 100.0);

            if (sweep_fl_size < remaining_alloc)
            {
                dprintf (BGC_TUNING_LOG, ("BTL%d: sweep fl %zd < remain alloc %zd", gen_number, sweep_fl_size, remaining_alloc));
                // TODO: this is saying that we didn't have enough fl to accommodate the
                // remaining alloc which is suspicious. To set remaining_alloc to
                // something slightly smaller is only so that we could continue with
                // our calculation but this is something we should look into.
                remaining_alloc = sweep_fl_size - (10 * 1024);
            }

            size_t new_sweep_fl_size = sweep_fl_size - remaining_alloc;
            ptrdiff_t signed_new_sweep_fl_size = sweep_fl_size - remaining_alloc;

            double new_current_bgc_sweep_flr = (double)new_sweep_fl_size * 100.0 / (double)gen_size;
            double signed_new_current_bgc_sweep_flr = (double)signed_new_sweep_fl_size * 100.0 / (double)gen_size;

            dprintf (BGC_TUNING_LOG, ("BTL%d: sg: %zd(%zd), sfl: %zd->%zd(%zd)(%.3f->%.3f(%.3f)), la: %zd, aa: %zd",
                gen_number, gen_size, physical_size, sweep_fl_size,
                new_sweep_fl_size, signed_new_sweep_fl_size,
                sweep_flr, new_current_bgc_sweep_flr, signed_new_current_bgc_sweep_flr,
                current_gen_calc->alloc_to_trigger, current_gen_calc->actual_alloc_to_trigger));

            current_gen_calc->actual_alloc_to_trigger = current_gen_calc->alloc_to_trigger;
            current_gen_calc->current_bgc_sweep_flr = new_current_bgc_sweep_flr;

            // TODO: NOTE this is duplicated in calculate_tuning except I am not * 100.0 here.
            size_t current_bgc_surv_size = get_total_surv_size (gen_number);
            size_t current_bgc_begin_data_size = get_total_begin_data_size (gen_number);
            double current_bgc_surv_rate = (current_bgc_begin_data_size == 0) ?
                                            0 : ((double)current_bgc_surv_size / (double)current_bgc_begin_data_size);

            size_t remaining_alloc_surv = (size_t)((double)remaining_alloc * current_bgc_surv_rate);
            physical_fl_size -= remaining_alloc_surv;
            dprintf (BGC_TUNING_LOG, ("BTL%d: asfl %zd-%zd=%zd, flr %.3f->%.3f, %.3f%% s, fl %zd-%zd->%zd",
                gen_number, sweep_fl_size, remaining_alloc, new_sweep_fl_size,
                sweep_flr, current_gen_calc->current_bgc_sweep_flr,
                (current_bgc_surv_rate * 100.0),
                (physical_fl_size + remaining_alloc_surv),
                remaining_alloc_surv, physical_fl_size));
        }
    }

    double physical_gen_flr = (double)physical_fl_size * 100.0 / (double)physical_size;
    data->gen_physical_size = physical_size;
    data->gen_physical_fl_size = physical_fl_size;
    data->gen_physical_flr = physical_gen_flr;
}

void gc_heap::bgc_tuning::calc_end_bgc_fl (int gen_number)
{
    int index = gen_number - max_generation;
    bgc_size_data* data = &current_bgc_end_data[index];

    tuning_calculation* current_gen_calc = &gen_calc[gen_number - max_generation];

    size_t virtual_size = current_gen_calc->end_gen_size_goal;
    size_t physical_size = data->gen_physical_size;
    ptrdiff_t physical_fl_size = data->gen_physical_fl_size;
    ptrdiff_t virtual_fl_size = (ptrdiff_t)virtual_size - (ptrdiff_t)physical_size;
    ptrdiff_t end_gen_fl_size = physical_fl_size + virtual_fl_size;

    if (end_gen_fl_size < 0)
    {
        end_gen_fl_size = 0;
    }

    data->gen_size = virtual_size;
    data->gen_fl_size = end_gen_fl_size;
    data->gen_flr = (double)(data->gen_fl_size) * 100.0 / (double)(data->gen_size);

    dprintf (BGC_TUNING_LOG, ("BTL%d: vfl: %zd, size %zd->%zd, fl %zd->%zd, flr %.3f->%.3f",
        gen_number, virtual_fl_size,
        data->gen_physical_size, data->gen_size,
        data->gen_physical_fl_size, data->gen_fl_size,
        data->gen_physical_flr, data->gen_flr));
}

// reduce_p is for NGC2s. we want to reduce the ki so we don't overshoot.
double gc_heap::bgc_tuning::calculate_ml_tuning (uint64_t current_available_physical, bool reduce_p,
                                                 ptrdiff_t* _vfl_from_kp, ptrdiff_t* _vfl_from_ki)
{
    ptrdiff_t error = (ptrdiff_t)(current_available_physical - available_memory_goal);

    // This is questionable as gen0/1 and other processes are consuming memory
    // too
    size_t gen2_physical_size = current_bgc_end_data[0].gen_physical_size;
    size_t gen3_physical_size = current_bgc_end_data[1].gen_physical_size;

    double max_output = (double)(total_physical_mem - available_memory_goal -
                                 gen2_physical_size - gen3_physical_size);

    double error_ratio = (double)error / (double)total_physical_mem;

    // do we want this to contribute to the integral term?
    bool include_in_i_p = ((error_ratio > 0.005) || (error_ratio < -0.005));

    dprintf (BGC_TUNING_LOG, ("total phy %zd, mem goal: %zd, curr phy: %zd, g2 phy: %zd, g3 phy: %zd",
            (size_t)total_physical_mem, (size_t)available_memory_goal,
            (size_t)current_available_physical,
            gen2_physical_size, gen3_physical_size));
    dprintf (BGC_TUNING_LOG, ("BTL: Max output: %zd, ER %zd / %zd = %.3f, %s",
            (size_t)max_output,
            error, available_memory_goal, error_ratio,
            (include_in_i_p ? "inc" : "exc")));

    if (include_in_i_p)
    {
        double error_ki = ml_ki * (double)error;
        double temp_accu_error = accu_error + error_ki;
        // anti-windup
        if ((temp_accu_error > 0) && (temp_accu_error < max_output))
            accu_error = temp_accu_error;
        else
        {
            //dprintf (BGC_TUNING_LOG, ("ml accu err + %zd=%zd, exc",
            dprintf (BGC_TUNING_LOG, ("mae + %zd=%zd, exc",
                    (size_t)error_ki, (size_t)temp_accu_error));
        }
    }

    if (reduce_p)
    {
        double saved_accu_error = accu_error;
        accu_error = accu_error * 2.0 / 3.0;
        panic_activated_p = false;
        accu_error_panic = 0;
        dprintf (BGC_TUNING_LOG, ("BTL reduced accu ki %zd->%zd", (ptrdiff_t)saved_accu_error, (ptrdiff_t)accu_error));
    }

    if (panic_activated_p)
        accu_error_panic += (double)error;
    else
        accu_error_panic = 0.0;

    double vfl_from_kp = (double)error * ml_kp;
    double total_virtual_fl_size = vfl_from_kp + accu_error;
    // limit output
    if (total_virtual_fl_size < 0)
    {
        dprintf (BGC_TUNING_LOG, ("BTL vfl %zd < 0", (size_t)total_virtual_fl_size));
        total_virtual_fl_size = 0;
    }
    else if (total_virtual_fl_size > max_output)
    {
        dprintf (BGC_TUNING_LOG, ("BTL vfl %zd > max", (size_t)total_virtual_fl_size));
        total_virtual_fl_size = max_output;
    }

    *_vfl_from_kp = (ptrdiff_t)vfl_from_kp;
    *_vfl_from_ki = (ptrdiff_t)accu_error;
    return total_virtual_fl_size;
}

void gc_heap::bgc_tuning::set_total_gen_sizes (bool use_gen2_loop_p, bool use_gen3_loop_p)
{
    size_t gen2_physical_size = current_bgc_end_data[0].gen_physical_size;
    size_t gen3_physical_size = 0;
    ptrdiff_t gen3_virtual_fl_size = 0;
    gen3_physical_size = current_bgc_end_data[1].gen_physical_size;
    double gen2_size_ratio = (double)gen2_physical_size / ((double)gen2_physical_size + (double)gen3_physical_size);

    // We know how far we are from the memory load goal, assuming that the memory is only
    // used by gen2/3 (which is obviously not the case, but that's why we are not setting the
    // memory goal at 90+%. Assign the memory proportionally to them.
    //
    // We use entry memory load info because that seems to be more closedly correlated to what the VMM decides
    // in memory load.
    uint32_t current_memory_load = settings.entry_memory_load;
    uint64_t current_available_physical = settings.entry_available_physical_mem;

    panic_activated_p = (current_memory_load >= (memory_load_goal + memory_load_goal_slack));

    if (panic_activated_p)
    {
        dprintf (BGC_TUNING_LOG, ("BTL: exceeded slack %zd >= (%zd + %zd)",
            (size_t)current_memory_load, (size_t)memory_load_goal,
            (size_t)memory_load_goal_slack));
    }

    ptrdiff_t vfl_from_kp = 0;
    ptrdiff_t vfl_from_ki = 0;
    double total_virtual_fl_size = calculate_ml_tuning (current_available_physical, false, &vfl_from_kp, &vfl_from_ki);

    if (use_gen2_loop_p || use_gen3_loop_p)
    {
        if (use_gen2_loop_p)
        {
            gen2_ratio_correction += ratio_correction_step;
        }
        else
        {
            gen2_ratio_correction -= ratio_correction_step;
        }

        dprintf (BGC_TUNING_LOG, ("BTL: rc: g2 ratio %.3f%% + %d%% = %.3f%%",
            (gen2_size_ratio * 100.0), (int)(gen2_ratio_correction * 100.0), ((gen2_size_ratio + gen2_ratio_correction) * 100.0)));

        gen2_ratio_correction = min (0.99, gen2_ratio_correction);
        gen2_ratio_correction = max (-0.99, gen2_ratio_correction);

        dprintf (BGC_TUNING_LOG, ("BTL: rc again: g2 ratio %.3f%% + %d%% = %.3f%%",
            (gen2_size_ratio * 100.0), (int)(gen2_ratio_correction * 100.0), ((gen2_size_ratio + gen2_ratio_correction) * 100.0)));

        gen2_size_ratio += gen2_ratio_correction;

        if (gen2_size_ratio <= 0.0)
        {
            gen2_size_ratio = 0.01;
            dprintf (BGC_TUNING_LOG, ("BTL: rc: g2 ratio->0.01"));
        }

        if (gen2_size_ratio >= 1.0)
        {
            gen2_size_ratio = 0.99;
            dprintf (BGC_TUNING_LOG, ("BTL: rc: g2 ratio->0.99"));
        }
    }

    ptrdiff_t gen2_virtual_fl_size = (ptrdiff_t)(total_virtual_fl_size * gen2_size_ratio);
    gen3_virtual_fl_size = (ptrdiff_t)(total_virtual_fl_size * (1.0 - gen2_size_ratio));
    if (gen2_virtual_fl_size < 0)
    {
        ptrdiff_t saved_gen2_virtual_fl_size = gen2_virtual_fl_size;
        ptrdiff_t half_gen2_physical_size = (ptrdiff_t)((double)gen2_physical_size * 0.5);
        if (-gen2_virtual_fl_size > half_gen2_physical_size)
        {
            gen2_virtual_fl_size = -half_gen2_physical_size;
        }

        dprintf (BGC_TUNING_LOG, ("BTL2: n_vfl %zd(%zd)->%zd", saved_gen2_virtual_fl_size, half_gen2_physical_size, gen2_virtual_fl_size));
        gen2_virtual_fl_size = 0;
    }

    if (gen3_virtual_fl_size < 0)
    {
        ptrdiff_t saved_gen3_virtual_fl_size = gen3_virtual_fl_size;
        ptrdiff_t half_gen3_physical_size = (ptrdiff_t)((double)gen3_physical_size * 0.5);
        if (-gen3_virtual_fl_size > half_gen3_physical_size)
        {
            gen3_virtual_fl_size = -half_gen3_physical_size;
        }

        dprintf (BGC_TUNING_LOG, ("BTL3: n_vfl %zd(%zd)->%zd", saved_gen3_virtual_fl_size, half_gen3_physical_size, gen3_virtual_fl_size));
        gen3_virtual_fl_size = 0;
    }

    gen_calc[0].end_gen_size_goal = gen2_physical_size + gen2_virtual_fl_size;
    gen_calc[1].end_gen_size_goal = gen3_physical_size + gen3_virtual_fl_size;

    // We calculate the end info here because the ff in fl servo loop is using this.
    calc_end_bgc_fl (max_generation);
    calc_end_bgc_fl (loh_generation);

#ifdef SIMPLE_DPRINTF
    dprintf (BGC_TUNING_LOG, ("BTL: ml: %d (g: %d)(%s), a: %zd (g: %zd, elg: %zd+%zd=%zd, %zd+%zd=%zd, pi=%zd), vfl: %zd=%zd+%zd",
        current_memory_load, memory_load_goal,
        ((current_available_physical > available_memory_goal) ? "above" : "below"),
        current_available_physical, available_memory_goal,
        gen2_physical_size, gen2_virtual_fl_size, gen_calc[0].end_gen_size_goal,
        gen3_physical_size, gen3_virtual_fl_size, gen_calc[1].end_gen_size_goal,
        (ptrdiff_t)accu_error_panic,
        (ptrdiff_t)total_virtual_fl_size, vfl_from_kp, vfl_from_ki));
#endif //SIMPLE_DPRINTF
}

bool gc_heap::bgc_tuning::should_trigger_ngc2()
{
    return panic_activated_p;
}

// This is our outer ml servo loop where we calculate the control for the inner fl servo loop.
void gc_heap::bgc_tuning::convert_to_fl (bool use_gen2_loop_p, bool use_gen3_loop_p)
{
    size_t current_bgc_count = full_gc_counts[gc_type_background];

#ifdef MULTIPLE_HEAPS
    for (int i = 0; i < gc_heap::n_heaps; i++)
    {
        gc_heap* hp = gc_heap::g_heaps[i];
        hp->bgc_maxgen_end_fl_size = generation_free_list_space (hp->generation_of (max_generation));
    }
#else
    bgc_maxgen_end_fl_size = generation_free_list_space (generation_of (max_generation));
#endif //MULTIPLE_HEAPS

    init_bgc_end_data (max_generation, use_gen2_loop_p);
    init_bgc_end_data (loh_generation, use_gen3_loop_p);
    set_total_gen_sizes (use_gen2_loop_p, use_gen3_loop_p);

    dprintf (BGC_TUNING_LOG, ("BTL: gen2 %zd, fl %zd(%.3f)->%zd; gen3 %zd, fl %zd(%.3f)->%zd, %zd BGCs",
        current_bgc_end_data[0].gen_size, current_bgc_end_data[0].gen_fl_size,
        current_bgc_end_data[0].gen_flr, gen_calc[0].end_gen_size_goal,
        current_bgc_end_data[1].gen_size, current_bgc_end_data[1].gen_fl_size,
        current_bgc_end_data[1].gen_flr, gen_calc[1].end_gen_size_goal,
        current_bgc_count));
}

void gc_heap::bgc_tuning::record_and_adjust_bgc_end()
{
    if (!bgc_tuning::enable_fl_tuning)
        return;

    uint64_t elapsed_time_so_far = GetHighPrecisionTimeStamp() - process_start_time;
    size_t current_gen1_index = get_current_gc_index (max_generation - 1);
    dprintf (BGC_TUNING_LOG, ("BTL: g2t[en][g1 %zd]: %0.3f minutes",
        current_gen1_index,
        (double)elapsed_time_so_far / (double)1000000 / (double)60));

    if (fl_tuning_triggered)
    {
        num_bgcs_since_tuning_trigger++;
    }

    bool use_gen2_loop_p = (settings.reason == reason_bgc_tuning_soh);
    bool use_gen3_loop_p = (settings.reason == reason_bgc_tuning_loh);
    dprintf (BGC_TUNING_LOG, ("BTL: reason: %d, gen2 loop: %s; gen3 loop: %s, promoted %zd bytes",
        (((settings.reason != reason_bgc_tuning_soh) && (settings.reason != reason_bgc_tuning_loh)) ?
            saved_bgc_tuning_reason : settings.reason),
        (use_gen2_loop_p ? "yes" : "no"),
        (use_gen3_loop_p ? "yes" : "no"),
        get_total_bgc_promoted()));

    convert_to_fl (use_gen2_loop_p, use_gen3_loop_p);

    calculate_tuning (max_generation, true);

    if (total_uoh_a_last_bgc > 0)
    {
        calculate_tuning (loh_generation, true);
    }
    else
    {
        dprintf (BGC_TUNING_LOG, ("BTL: gen3 not allocated"));
    }

    if (next_bgc_p)
    {
        next_bgc_p = false;
        fl_tuning_triggered = true;
        dprintf (BGC_TUNING_LOG, ("BTL: FL tuning ENABLED!!!"));
    }

    saved_bgc_tuning_reason = -1;
}
#endif //BGC_SERVO_TUNING
#endif //BACKGROUND_GC

//Clear the cards [start_card, end_card[
void gc_heap::clear_cards (size_t start_card, size_t end_card)
{
    if (start_card < end_card)
    {
        size_t start_word = card_word (start_card);
        size_t end_word = card_word (end_card);
        if (start_word < end_word)
        {
            // Figure out the bit positions of the cards within their words
            unsigned bits = card_bit (start_card);
            card_table [start_word] &= lowbits (~0, bits);
            for (size_t i = start_word+1; i < end_word; i++)
                card_table [i] = 0;
            bits = card_bit (end_card);
            // Don't write beyond end_card (and possibly uncommitted card table space).
            if (bits != 0)
            {
                card_table [end_word] &= highbits (~0, bits);
            }
        }
        else
        {
            // If the start and end cards are in the same word, just clear the appropriate card
            // bits in that word.
            card_table [start_word] &= (lowbits (~0, card_bit (start_card)) |
                                        highbits (~0, card_bit (end_card)));
        }
#if defined(_DEBUG) && defined(VERIFY_HEAP)
        if (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_GC)
        {
            size_t  card = start_card;
            while (card < end_card)
            {
                assert (!(card_set_p (card)));
                card++;
            }
        }
#endif //_DEBUG && VERIFY_HEAP
        dprintf (3,("Cleared cards [%zx:%zx, %zx:%zx[",
                  start_card, (size_t)card_address (start_card),
                  end_card, (size_t)card_address (end_card)));
    }
}

void gc_heap::clear_card_for_addresses (uint8_t* start_address, uint8_t* end_address)
{
    size_t   start_card = card_of (align_on_card (start_address));
    size_t   end_card = card_of (align_lower_card (end_address));
    clear_cards (start_card, end_card);
}

// copy [srccard, ...[ to [dst_card, end_card[
// This will set the same bit twice. Can be optimized.
inline
void gc_heap::copy_cards (size_t dst_card,
                          size_t src_card,
                          size_t end_card,
                          BOOL nextp)
{
    // If the range is empty, this function is a no-op - with the subtlety that
    // either of the accesses card_table[srcwrd] or card_table[dstwrd] could be
    // outside the committed region.  To avoid the access, leave early.
    if (!(dst_card < end_card))
        return;

    unsigned int srcbit = card_bit (src_card);
    unsigned int dstbit = card_bit (dst_card);
    size_t srcwrd = card_word (src_card);
    size_t dstwrd = card_word (dst_card);
    unsigned int srctmp = card_table[srcwrd];
    unsigned int dsttmp = card_table[dstwrd];

    for (size_t card = dst_card; card < end_card; card++)
    {
        if (srctmp & (1 << srcbit))
            dsttmp |= 1 << dstbit;
        else
            dsttmp &= ~(1 << dstbit);
        if (!(++srcbit % 32))
        {
            srctmp = card_table[++srcwrd];
            srcbit = 0;
        }

        if (nextp)
        {
            if (srctmp & (1 << srcbit))
                dsttmp |= 1 << dstbit;
        }

        if (!(++dstbit % 32))
        {
            card_table[dstwrd] = dsttmp;

#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
            if (dsttmp != 0)
            {
                card_bundle_set(cardw_card_bundle(dstwrd));
            }
#endif

            dstwrd++;
            dsttmp = card_table[dstwrd];
            dstbit = 0;
        }
    }

    card_table[dstwrd] = dsttmp;

#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
    if (dsttmp != 0)
    {
        card_bundle_set(cardw_card_bundle(dstwrd));
    }
#endif
}

void gc_heap::copy_cards_for_addresses (uint8_t* dest, uint8_t* src, size_t len)
{
    ptrdiff_t relocation_distance = src - dest;
    size_t start_dest_card = card_of (align_on_card (dest));
    size_t end_dest_card = card_of (dest + len - 1);
    size_t dest_card = start_dest_card;
    size_t src_card = card_of (card_address (dest_card)+relocation_distance);
    dprintf (3,("Copying cards [%zx:%zx->%zx:%zx, ",
                 src_card, (size_t)src, dest_card, (size_t)dest));
    dprintf (3,(" %zx->%zx:%zx[",
              (size_t)src+len, end_dest_card, (size_t)dest+len));

    dprintf (3, ("dest: %p, src: %p, len: %zx, reloc: %zx, align_on_card(dest) is %p",
        dest, src, len, relocation_distance, (align_on_card (dest))));

    dprintf (3, ("start_dest_card: %zx (address: %p), end_dest_card: %zx(addr: %p), card_of (dest): %zx",
        start_dest_card, card_address (start_dest_card), end_dest_card, card_address (end_dest_card), card_of (dest)));

    //First card has two boundaries
    if (start_dest_card != card_of (dest))
    {
        if ((card_of (card_address (start_dest_card) + relocation_distance) <= card_of (src + len - 1))&&
            card_set_p (card_of (card_address (start_dest_card) + relocation_distance)))
        {
            dprintf (3, ("card_address (start_dest_card) + reloc is %p, card: %zx(set), src+len-1: %p, card: %zx",
                    (card_address (start_dest_card) + relocation_distance),
                    card_of (card_address (start_dest_card) + relocation_distance),
                    (src + len - 1),
                    card_of (src + len - 1)));

            dprintf (3, ("setting card: %zx", card_of (dest)));
            set_card (card_of (dest));
        }
    }

    if (card_set_p (card_of (src)))
        set_card (card_of (dest));


    copy_cards (dest_card, src_card, end_dest_card,
                ((dest - align_lower_card (dest)) != (src - align_lower_card (src))));

    //Last card has two boundaries.
    if ((card_of (card_address (end_dest_card) + relocation_distance) >= card_of (src)) &&
        card_set_p (card_of (card_address (end_dest_card) + relocation_distance)))
    {
        dprintf (3, ("card_address (end_dest_card) + reloc is %p, card: %zx(set), src: %p, card: %zx",
                (card_address (end_dest_card) + relocation_distance),
                card_of (card_address (end_dest_card) + relocation_distance),
                src,
                card_of (src)));

        dprintf (3, ("setting card: %zx", end_dest_card));
        set_card (end_dest_card);
    }

    if (card_set_p (card_of (src + len - 1)))
        set_card (end_dest_card);

#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
    card_bundles_set(cardw_card_bundle(card_word(card_of(dest))), cardw_card_bundle(align_cardw_on_bundle(card_word(end_dest_card))));
#endif
}

#ifdef BACKGROUND_GC
// this does not need the Interlocked version of mark_array_set_marked.
void gc_heap::copy_mark_bits_for_addresses (uint8_t* dest, uint8_t* src, size_t len)
{
    dprintf (3, ("Copying mark_bits for addresses [%zx->%zx, %zx->%zx[",
                 (size_t)src, (size_t)dest,
                 (size_t)src+len, (size_t)dest+len));

    uint8_t* src_o = src;
    uint8_t* dest_o;
    uint8_t* src_end = src + len;
    int align_const = get_alignment_constant (TRUE);
    ptrdiff_t reloc = dest - src;

    while (src_o < src_end)
    {
        uint8_t*  next_o = src_o + Align (size (src_o), align_const);

        if (background_object_marked (src_o, TRUE))
        {
            dest_o = src_o + reloc;
            background_mark (dest_o,
                             background_saved_lowest_address,
                             background_saved_highest_address);
            dprintf (3, ("bc*%zx*bc, b*%zx*b", (size_t)src_o, (size_t)(dest_o)));
        }

        src_o = next_o;
    }
}
#endif //BACKGROUND_GC

void gc_heap::fix_brick_to_highest (uint8_t* o, uint8_t* next_o)
{
    size_t new_current_brick = brick_of (o);
    set_brick (new_current_brick,
               (o - brick_address (new_current_brick)));
    size_t b = 1 + new_current_brick;
    size_t limit = brick_of (next_o);
    //dprintf(3,(" fixing brick %zx to point to object %zx, till %zx(%zx)",
    dprintf(3,("b:%zx->%zx-%zx",
               new_current_brick, (size_t)o, (size_t)next_o));
    while (b < limit)
    {
        set_brick (b,(new_current_brick - b));
        b++;
    }
}

// start can not be >= heap_segment_allocated for the segment.
uint8_t* gc_heap::find_first_object (uint8_t* start, uint8_t* first_object)
{
    size_t brick = brick_of (start);
    uint8_t* o = 0;
    //last_object == null -> no search shortcut needed
    if ((brick == brick_of (first_object) || (start <= first_object)))
    {
        o = first_object;
    }
    else
    {
        ptrdiff_t  min_brick = (ptrdiff_t)brick_of (first_object);
        ptrdiff_t  prev_brick = (ptrdiff_t)brick - 1;
        int         brick_entry = 0;
        while (1)
        {
            if (prev_brick < min_brick)
            {
                break;
            }
            if ((brick_entry = get_brick_entry(prev_brick)) >= 0)
            {
                break;
            }
            assert (! ((brick_entry == 0)));
            prev_brick = (brick_entry + prev_brick);

        }
        o = ((prev_brick < min_brick) ? first_object :
                      brick_address (prev_brick) + brick_entry - 1);
        assert (o <= start);
    }

    assert (Align (size (o)) >= Align (min_obj_size));
    uint8_t*  next_o = o + Align (size (o));
    size_t curr_cl = (size_t)next_o / brick_size;
    size_t min_cl = (size_t)first_object / brick_size;

#ifdef TRACE_GC
    unsigned int n_o = 1;
#endif //TRACE_GC

    uint8_t* next_b = min (align_lower_brick (next_o) + brick_size, start+1);

    while (next_o <= start)
    {
        do
        {
#ifdef TRACE_GC
            n_o++;
#endif //TRACE_GC
            o = next_o;
            assert (Align (size (o)) >= Align (min_obj_size));
            next_o = o + Align (size (o));
            Prefetch (next_o);
        }while (next_o < next_b);

        if (((size_t)next_o / brick_size) != curr_cl)
        {
            if (curr_cl >= min_cl)
            {
                fix_brick_to_highest (o, next_o);
            }
            curr_cl = (size_t) next_o / brick_size;
        }
        next_b = min (align_lower_brick (next_o) + brick_size, start+1);
    }

    size_t bo = brick_of (o);
    //dprintf (3, ("Looked at %u objects, fixing brick [%zx-[%zx",
    dprintf (3, ("%u o, [%zx-[%zx",
        n_o, bo, brick));
    if (bo < brick)
    {
        set_brick (bo, (o - brick_address(bo)));
        size_t b = 1 + bo;
        int x = -1;
        while (b < brick)
        {
            set_brick (b,x--);
            b++;
        }
    }

    return o;
}

#ifdef CARD_BUNDLE
// Find the first non-zero card word between cardw and cardw_end.
// The index of the word we find is returned in cardw.
BOOL gc_heap::find_card_dword (size_t& cardw, size_t cardw_end)
{
    dprintf (3, ("gc: %zd, find_card_dword cardw: %zx, cardw_end: %zx",
                 dd_collection_count (dynamic_data_of (0)), cardw, cardw_end));

    if (card_bundles_enabled())
    {
        size_t cardb = cardw_card_bundle (cardw);
        size_t end_cardb = cardw_card_bundle (align_cardw_on_bundle (cardw_end));
        while (1)
        {
            // Find a non-zero bundle
            while (cardb < end_cardb)
            {
                uint32_t cbw = card_bundle_table[card_bundle_word(cardb)] >> card_bundle_bit (cardb);
                DWORD bit_index;
                if (BitScanForward (&bit_index, cbw))
                {
                    cardb += bit_index;
                    break;
                }
                else
                {
                    cardb += sizeof(cbw)*8 - card_bundle_bit (cardb);
                }
            }
            if (cardb >= end_cardb)
                return FALSE;

            uint32_t* card_word = &card_table[max(card_bundle_cardw (cardb),cardw)];
            uint32_t* card_word_end = &card_table[min(card_bundle_cardw (cardb+1),cardw_end)];
            while ((card_word < card_word_end) && !(*card_word))
            {
                card_word++;
            }

            if (card_word != card_word_end)
            {
                cardw = (card_word - &card_table[0]);
                return TRUE;
            }
            // explore the beginning of the card bundle so we can possibly clear it
            if (cardw == (card_bundle_cardw (cardb) + 1) && !card_table[cardw-1])
            {
                cardw--;
            }
            // explore the end of the card bundle so we can possibly clear it
            card_word_end = &card_table[card_bundle_cardw (cardb+1)];
            while ((card_word < card_word_end) && !(*card_word))
            {
                card_word++;
            }
            if ((cardw <= card_bundle_cardw (cardb)) &&
                (card_word == card_word_end))
            {
                // a whole bundle was explored and is empty
                dprintf  (3, ("gc: %zd, find_card_dword clear bundle: %zx cardw:[%zx,%zx[",
                        dd_collection_count (dynamic_data_of (0)),
                        cardb, card_bundle_cardw (cardb),
                        card_bundle_cardw (cardb+1)));
                card_bundle_clear (cardb);
            }

            cardb++;
        }
    }
    else
    {
        uint32_t* card_word = &card_table[cardw];
        uint32_t* card_word_end = &card_table [cardw_end];

        while (card_word < card_word_end)
        {
            if ((*card_word) != 0)
            {
                cardw = (card_word - &card_table [0]);
                return TRUE;
            }

            card_word++;
        }
        return FALSE;

    }
}
#endif //CARD_BUNDLE

// Find cards that are set between two points in a card table.
// Parameters
//     card_table    : The card table.
//     card          : [in/out] As input, the card to start searching from.
//                              As output, the first card that's set.
//     card_word_end : The card word at which to stop looking.
//     end_card      : [out] The last card which is set.
BOOL gc_heap::find_card(uint32_t* card_table,
                        size_t&   card,
                        size_t    card_word_end,
                        size_t&   end_card)
{
    uint32_t* last_card_word;
    uint32_t card_word_value;
    uint32_t bit_position;

    if (card_word (card) >= card_word_end)
        return FALSE;

    // Find the first card which is set
    last_card_word = &card_table [card_word (card)];
    bit_position = card_bit (card);
#ifdef CARD_BUNDLE
    // if we have card bundles, consult them before fetching a new card word
    if (bit_position == 0)
    {
        card_word_value = 0;
    }
    else
#endif
    {
        card_word_value = (*last_card_word) >> bit_position;
    }
    if (!card_word_value)
    {
#ifdef CARD_BUNDLE
        // Using the card bundle, go through the remaining card words between here and
        // card_word_end until we find one that is non-zero.
        size_t lcw = card_word(card) + (bit_position != 0);
        if (gc_heap::find_card_dword (lcw, card_word_end) == FALSE)
        {
            return FALSE;
        }
        else
        {
            last_card_word = &card_table [lcw];
            card_word_value = *last_card_word;
        }
        bit_position = 0;
#else //CARD_BUNDLE
        // Go through the remaining card words between here and card_word_end until we find
        // one that is non-zero.
        do
        {
            ++last_card_word;
        }

        while ((last_card_word < &card_table [card_word_end]) && !(*last_card_word));
        if (last_card_word < &card_table [card_word_end])
        {
            card_word_value = *last_card_word;
        }
        else
        {
            // We failed to find any non-zero card words before we got to card_word_end
            return FALSE;
        }
#endif //CARD_BUNDLE
    }

    // Look for the lowest bit set
    if (card_word_value)
    {
        DWORD bit_index;
        uint8_t res = BitScanForward (&bit_index, card_word_value);
        assert (res != 0);
        card_word_value >>= bit_index;
        bit_position += bit_index;
    }

    // card is the card word index * card size + the bit index within the card
    card = (last_card_word - &card_table[0]) * card_word_width + bit_position;

    do
    {
        // Keep going until we get to an un-set card.
        bit_position++;
        card_word_value = card_word_value / 2;

        // If we reach the end of the card word and haven't hit a 0 yet, start going
        // card word by card word until we get to one that's not fully set (0xFFFF...)
        // or we reach card_word_end.
        if ((bit_position == card_word_width) && (last_card_word < &card_table [card_word_end-1]))
        {
            do
            {
                card_word_value = *(++last_card_word);
            } while ((last_card_word < &card_table [card_word_end-1]) &&
                     (card_word_value == ~0u /* (1 << card_word_width)-1 */));
            bit_position = 0;
        }
    } while (card_word_value & 1);

    end_card = (last_card_word - &card_table [0])* card_word_width + bit_position;

    //dprintf (3, ("find_card: [%zx, %zx[ set", card, end_card));
    dprintf (3, ("fc: [%zx, %zx[", card, end_card));
    return TRUE;
}


//because of heap expansion, computing end is complicated.
uint8_t* compute_next_end (heap_segment* seg, uint8_t* low)
{
    if ((low >=  heap_segment_mem (seg)) &&
        (low < heap_segment_allocated (seg)))
        return low;
    else
        return heap_segment_allocated (seg);
}


#ifndef USE_REGIONS
uint8_t*
gc_heap::compute_next_boundary (int gen_number,
                                BOOL relocating)
{
    //when relocating, the fault line is the plan start of the younger
    //generation because the generation is promoted.
    if (relocating && (gen_number == (settings.condemned_generation + 1)))
    {
        generation* gen = generation_of (gen_number - 1);
        uint8_t* gen_alloc = generation_plan_allocation_start (gen);
        assert (gen_alloc);
        return gen_alloc;
    }
    else
    {
        assert (gen_number > settings.condemned_generation);
        return generation_allocation_start (generation_of (gen_number - 1 ));
    }
}
#endif //!USE_REGIONS

// For regions -
// n_gen means it's pointing into the condemned regions so it's incremented
// if the child object's region is <= condemned_gen.
// cg_pointers_found means it's pointing into a lower generation so it's incremented
// if the child object's region is < current_gen.
inline void
gc_heap::mark_through_cards_helper (uint8_t** poo, size_t& n_gen,
                                    size_t& cg_pointers_found,
                                    card_fn fn, uint8_t* nhigh,
                                    uint8_t* next_boundary,
                                    int condemned_gen,
                                    // generation of the parent object
                                    int current_gen
                                    CARD_MARKING_STEALING_ARG(gc_heap* hpt))
{
#if defined(FEATURE_CARD_MARKING_STEALING) && defined(MULTIPLE_HEAPS)
    int thread = hpt->heap_number;
#else
    THREAD_FROM_HEAP;
#ifdef MULTIPLE_HEAPS
    gc_heap* hpt = this;
#endif //MULTIPLE_HEAPS
#endif //FEATURE_CARD_MARKING_STEALING && MULTIPLE_HEAPS

#ifdef USE_REGIONS
    assert (nhigh == 0);
    assert (next_boundary == 0);
    uint8_t* child_object = *poo;
    if ((child_object < ephemeral_low) || (ephemeral_high <= child_object))
        return;

    int child_object_gen = get_region_gen_num (child_object);
    int saved_child_object_gen = child_object_gen;
    uint8_t* saved_child_object = child_object;

    if (child_object_gen <= condemned_gen)
    {
        n_gen++;
        call_fn(hpt,fn) (poo THREAD_NUMBER_ARG);
    }

    if (fn == &gc_heap::relocate_address)
    {
        child_object_gen = get_region_plan_gen_num (*poo);
    }

    if (child_object_gen < current_gen)
    {
        cg_pointers_found++;
        dprintf (4, ("cg pointer %zx found, %zd so far",
                        (size_t)*poo, cg_pointers_found ));
    }
#else //USE_REGIONS
    assert (condemned_gen == -1);
    if ((gc_low <= *poo) && (gc_high > *poo))
    {
        n_gen++;
        call_fn(hpt,fn) (poo THREAD_NUMBER_ARG);
    }
#ifdef MULTIPLE_HEAPS
    else if (*poo)
    {
        gc_heap* hp = heap_of_gc (*poo);
        if (hp != this)
        {
            if ((hp->gc_low <= *poo) &&
                (hp->gc_high > *poo))
            {
                n_gen++;
                call_fn(hpt,fn) (poo THREAD_NUMBER_ARG);
            }
            if ((fn == &gc_heap::relocate_address) ||
                ((hp->ephemeral_low <= *poo) &&
                 (hp->ephemeral_high > *poo)))
            {
                cg_pointers_found++;
            }
        }
    }
#endif //MULTIPLE_HEAPS
    if ((next_boundary <= *poo) && (nhigh > *poo))
    {
        cg_pointers_found ++;
        dprintf (4, ("cg pointer %zx found, %zd so far",
                     (size_t)*poo, cg_pointers_found ));
    }
#endif //USE_REGIONS
}

BOOL gc_heap::card_transition (uint8_t* po, uint8_t* end, size_t card_word_end,
                               size_t& cg_pointers_found,
                               size_t& n_eph, size_t& n_card_set,
                               size_t& card, size_t& end_card,
                               BOOL& foundp, uint8_t*& start_address,
                               uint8_t*& limit, size_t& n_cards_cleared
                               CARD_MARKING_STEALING_ARGS(card_marking_enumerator& card_mark_enumerator, heap_segment* seg, size_t &card_word_end_out))
{
    dprintf (3, ("pointer %zx past card %zx, cg %zd", (size_t)po, (size_t)card, cg_pointers_found));
    BOOL passed_end_card_p = FALSE;
    foundp = FALSE;

    if (cg_pointers_found == 0)
    {
        //dprintf(3,(" Clearing cards [%zx, %zx[ ",
        dprintf(3,(" CC [%zx, %zx[ ",
                (size_t)card_address(card), (size_t)po));
        uint8_t* card_clearing_limit = po;
#ifdef FEATURE_CARD_MARKING_STEALING
        card_clearing_limit = min (limit, po);
#endif // FEATURE_CARD_MARKING_STEALING
        clear_cards (card, card_of (card_clearing_limit));
        n_card_set -= (card_of (card_clearing_limit) - card);
        n_cards_cleared += (card_of (card_clearing_limit) - card);
    }
    n_eph +=cg_pointers_found;
    cg_pointers_found = 0;
    card = card_of (po);
    if (card >= end_card)
    {
        passed_end_card_p = TRUE;
        dprintf (3, ("card %zx exceeding end_card %zx",
                    (size_t)card, (size_t)end_card));
        foundp = find_card (card_table, card, card_word_end, end_card);
        if (foundp)
        {
            n_card_set+= end_card - card;
            start_address = card_address (card);
            dprintf (3, ("NewC: %zx, start: %zx, end: %zx",
                        (size_t)card, (size_t)start_address,
                        (size_t)card_address (end_card)));
        }
        limit = min (end, card_address (end_card));

#ifdef FEATURE_CARD_MARKING_STEALING
        // the card bit @ end_card should not be set
        // if end_card is still shy of the limit set by card_word_end
        assert(!((card_word(end_card) < card_word_end) &&
            card_set_p(end_card)));
        if (!foundp)
        {
            card_word_end_out = 0;
            foundp = find_next_chunk(card_mark_enumerator, seg, n_card_set, start_address, limit, card, end_card, card_word_end_out);
        }
#else
        // the card bit @ end_card should not be set -
        // find_card is supposed to terminate only when it finds a 0 bit
        // or the end of the segment
        assert (!((limit < end) &&
                card_set_p (end_card)));
#endif
    }

    return passed_end_card_p;
}

#ifdef FEATURE_CARD_MARKING_STEALING
bool card_marking_enumerator::move_next(heap_segment* seg, uint8_t*& low, uint8_t*& high)
{
    if (segment == nullptr)
        return false;

    uint32_t chunk_index = old_chunk_index;
    old_chunk_index = INVALID_CHUNK_INDEX;
    if (chunk_index == INVALID_CHUNK_INDEX)
        chunk_index = Interlocked::Increment((volatile int32_t *)chunk_index_counter);

    while (true)
    {
        uint32_t chunk_index_within_seg = chunk_index - segment_start_chunk_index;

        uint8_t* start = heap_segment_mem(segment);
        uint8_t* end = compute_next_end(segment, gc_low);

        uint8_t* aligned_start = (uint8_t*)((size_t)start & ~(CARD_MARKING_STEALING_GRANULARITY - 1));
        size_t seg_size = end - aligned_start;
        uint32_t chunk_count_within_seg = (uint32_t)((seg_size + (CARD_MARKING_STEALING_GRANULARITY - 1)) / CARD_MARKING_STEALING_GRANULARITY);
        if (chunk_index_within_seg < chunk_count_within_seg)
        {
            if (seg == segment)
            {
                low = (chunk_index_within_seg == 0) ? start : (aligned_start + (size_t)chunk_index_within_seg * CARD_MARKING_STEALING_GRANULARITY);
                high = (chunk_index_within_seg + 1 == chunk_count_within_seg) ? end : (aligned_start + (size_t)(chunk_index_within_seg + 1) * CARD_MARKING_STEALING_GRANULARITY);
                chunk_high = high;

                dprintf (3, ("cme:mn ci: %u, low: %p, high: %p", chunk_index, low, high));

                return true;
            }
            else
            {
                // we found the correct segment, but it's not the segment our caller is in

                // our caller should still be in one of the previous segments
#ifdef _DEBUG
                for (heap_segment* cur_seg = seg; cur_seg != segment; cur_seg = heap_segment_next_in_range(cur_seg))
                {
                    assert(cur_seg);
                }
#endif //_DEBUG

                // keep the chunk index for later
                old_chunk_index = chunk_index;

                dprintf (3, ("cme:mn oci: %u, seg mismatch seg: %p, segment: %p", old_chunk_index, heap_segment_mem (segment), heap_segment_mem (seg)));

                return false;
            }
        }

        segment = heap_segment_next_in_range(segment);
        segment_start_chunk_index += chunk_count_within_seg;
        if (segment == nullptr)
        {
            // keep the chunk index for later
            old_chunk_index = chunk_index;

            dprintf (3, ("cme:mn oci: %u no more segments", old_chunk_index));

            return false;
        }
    }
}

bool gc_heap::find_next_chunk(card_marking_enumerator& card_mark_enumerator, heap_segment* seg, size_t& n_card_set,
    uint8_t*& start_address, uint8_t*& limit,
    size_t& card, size_t& end_card, size_t& card_word_end)
{
    while (true)
    {
        if (card_word_end != 0 && find_card(card_table, card, card_word_end, end_card))
        {
            assert(end_card <= card_word_end * card_word_width);
            n_card_set += end_card - card;
            start_address = card_address(card);
            dprintf(3, ("NewC: %zx, start: %zx, end: %zx",
                (size_t)card, (size_t)start_address,
                (size_t)card_address(end_card)));
            limit = min(card_mark_enumerator.get_chunk_high(), card_address(end_card));
            dprintf (3, ("New run of cards on heap %d: [%zx,%zx[", heap_number, (size_t)start_address, (size_t)limit));
            return true;
        }
        // we have exhausted this chunk, get the next one
        uint8_t* chunk_low = nullptr;
        uint8_t* chunk_high = nullptr;
        if (!card_mark_enumerator.move_next(seg, chunk_low, chunk_high))
        {
            dprintf (3, ("No more chunks on heap %d\n", heap_number));
            return false;
        }
        card = max(card, card_of(chunk_low));
        card_word_end = (card_of(align_on_card_word(chunk_high)) / card_word_width);
        dprintf (3, ("Moved to next chunk on heap %d: [%zx,%zx[", heap_number, (size_t)chunk_low, (size_t)chunk_high));
    }
}
#endif // FEATURE_CARD_MARKING_STEALING

void gc_heap::mark_through_cards_for_segments (card_fn fn, BOOL relocating CARD_MARKING_STEALING_ARG(gc_heap* hpt))
{
#ifdef BACKGROUND_GC
#ifdef USE_REGIONS
    dprintf (3, ("current_sweep_pos is %p", current_sweep_pos));
#else
    dprintf (3, ("current_sweep_pos is %p, saved_sweep_ephemeral_seg is %p(%p)",
                 current_sweep_pos, saved_sweep_ephemeral_seg, saved_sweep_ephemeral_start));
#endif //USE_REGIONS
    for (int i = get_start_generation_index(); i < max_generation; i++)
    {
        heap_segment* soh_seg = heap_segment_rw (generation_start_segment (generation_of (i)));
        _ASSERTE(soh_seg != NULL);

        while (soh_seg)
        {
            dprintf (3, ("seg %p, bgc_alloc: %p, alloc: %p",
                soh_seg,
                heap_segment_background_allocated (soh_seg),
                heap_segment_allocated (soh_seg)));

            soh_seg = heap_segment_next_rw (soh_seg);
        }
    }
#endif //BACKGROUND_GC

    size_t end_card = 0;

    generation*   oldest_gen        = generation_of (max_generation);
    int           curr_gen_number   = max_generation;
    // Note - condemned_gen is only needed for regions and the other 2 are
    // only for if USE_REGIONS is not defined, but I need to pass them to a
    // function inside the macro below so just assert they are the unused values.
#ifdef USE_REGIONS
    uint8_t* low = 0;
    uint8_t*      gen_boundary      = 0;
    uint8_t*      next_boundary     = 0;
    int condemned_gen               = settings.condemned_generation;
    uint8_t*      nhigh             = 0;
#else
    uint8_t* low = gc_low;
    uint8_t* high = gc_high;
    uint8_t*      gen_boundary      = generation_allocation_start(generation_of(curr_gen_number - 1));
    uint8_t*      next_boundary     = compute_next_boundary(curr_gen_number, relocating);
    int condemned_gen = -1;
    uint8_t*      nhigh             = (relocating ?
                                       heap_segment_plan_allocated (ephemeral_heap_segment) : high);
#endif //USE_REGIONS
    heap_segment* seg               = heap_segment_rw (generation_start_segment (oldest_gen));
    _ASSERTE(seg != NULL);

    uint8_t*      beg               = get_soh_start_object (seg, oldest_gen);
    uint8_t*      end               = compute_next_end (seg, low);
    uint8_t*      last_object       = beg;

    size_t  cg_pointers_found = 0;

    size_t  card_word_end = (card_of (align_on_card_word (end)) / card_word_width);

    size_t        n_eph             = 0;
    size_t        n_gen             = 0;
    size_t        n_card_set        = 0;

    BOOL          foundp            = FALSE;
    uint8_t*      start_address     = 0;
    uint8_t*      limit             = 0;
    size_t        card              = card_of (beg);
#ifdef BACKGROUND_GC
    BOOL consider_bgc_mark_p        = FALSE;
    BOOL check_current_sweep_p      = FALSE;
    BOOL check_saved_sweep_p        = FALSE;
    should_check_bgc_mark (seg, &consider_bgc_mark_p, &check_current_sweep_p, &check_saved_sweep_p);
#endif //BACKGROUND_GC

    dprintf(3, ("CMs: %zx->%zx", (size_t)beg, (size_t)end));
    size_t total_cards_cleared = 0;

#ifdef FEATURE_CARD_MARKING_STEALING
    card_marking_enumerator card_mark_enumerator (seg, low, (VOLATILE(uint32_t)*)&card_mark_chunk_index_soh);
    card_word_end = 0;
#endif // FEATURE_CARD_MARKING_STEALING

    while (1)
    {
        if (card_of(last_object) > card)
        {
            dprintf (3, ("Found %zd cg pointers", cg_pointers_found));
            if (cg_pointers_found == 0)
            {
                uint8_t* last_object_processed = last_object;
#ifdef FEATURE_CARD_MARKING_STEALING
                last_object_processed = min(limit, last_object);
#endif // FEATURE_CARD_MARKING_STEALING
                dprintf (3, (" Clearing cards [%zx, %zx[ ", (size_t)card_address(card), (size_t)last_object_processed));

                size_t card_last_obj = card_of (last_object_processed);
                clear_cards(card, card_last_obj);

                // We need to be careful of the accounting here because we could be in the situation where there are more set cards between end of
                // last set card batch and last_object_processed. We will be clearing all of them. But we can't count the set cards we haven't
                // discovered yet or we can get a negative number for n_card_set. However, if last_object_processed lands before what end_card
                // corresponds to, we can't count the whole batch because it will be handled by a later clear_cards.
                size_t cards_to_deduct = (card_last_obj < end_card) ? (card_last_obj - card) : (end_card - card);
                n_card_set -= cards_to_deduct;
                total_cards_cleared += cards_to_deduct;
            }

            n_eph += cg_pointers_found;
            cg_pointers_found = 0;
            card = card_of (last_object);
        }

        if (card >= end_card)
        {
#ifdef FEATURE_CARD_MARKING_STEALING
            // find another chunk with some cards set
            foundp = find_next_chunk(card_mark_enumerator, seg, n_card_set, start_address, limit, card, end_card, card_word_end);
#else // FEATURE_CARD_MARKING_STEALING
            foundp = find_card(card_table, card, card_word_end, end_card);
            if (foundp)
            {
                n_card_set += end_card - card;
                start_address = max (beg, card_address (card));
            }
            limit = min (end, card_address (end_card));
#endif // FEATURE_CARD_MARKING_STEALING
        }
        if (!foundp || (last_object >= end) || (card_address (card) >= end))
        {
            if (foundp && (cg_pointers_found == 0))
            {
#ifndef USE_REGIONS
                // in the segment case, need to recompute end_card so we don't clear cards
                // for the next generation
                end_card = card_of (end);
#endif
                dprintf(3,(" Clearing cards [%zx, %zx[ ", (size_t)card_address(card),
                            (size_t)card_address(end_card)));
                clear_cards (card, end_card);
                n_card_set -= (end_card - card);
                total_cards_cleared += (end_card - card);
            }
            n_eph += cg_pointers_found;
            cg_pointers_found = 0;
#ifdef FEATURE_CARD_MARKING_STEALING
            // we have decided to move to the next segment - make sure we exhaust the chunk enumerator for this segment
            card_mark_enumerator.exhaust_segment(seg);
#endif // FEATURE_CARD_MARKING_STEALING

            seg = heap_segment_next_in_range (seg);
#ifdef USE_REGIONS
            if (!seg)
            {
                curr_gen_number--;
                if (curr_gen_number > condemned_gen)
                {
                    // Switch to regions for this generation.
                    seg = generation_start_segment (generation_of (curr_gen_number));
#ifdef FEATURE_CARD_MARKING_STEALING
                    card_mark_enumerator.switch_to_segment(seg);
#endif // FEATURE_CARD_MARKING_STEALING
                    dprintf (REGIONS_LOG, ("h%d switching to gen%d start seg %zx",
                        heap_number, curr_gen_number, (size_t)seg));
                }
            }
#endif //USE_REGIONS

            if (seg)
            {
#ifdef BACKGROUND_GC
                should_check_bgc_mark (seg, &consider_bgc_mark_p, &check_current_sweep_p, &check_saved_sweep_p);
#endif //BACKGROUND_GC
                beg = heap_segment_mem (seg);
#ifdef USE_REGIONS
                end = heap_segment_allocated (seg);
#else
                end = compute_next_end (seg, low);
#endif //USE_REGIONS
#ifdef FEATURE_CARD_MARKING_STEALING
                card_word_end = 0;
#else // FEATURE_CARD_MARKING_STEALING
                card_word_end = card_of (align_on_card_word (end)) / card_word_width;
#endif // FEATURE_CARD_MARKING_STEALING
                card = card_of (beg);
                last_object = beg;
                end_card = 0;
                continue;
            }
            else
            {
                break;
            }
        }

        assert (card_set_p (card));
        {
            uint8_t* o = last_object;

            o = find_first_object (start_address, last_object);
            // Never visit an object twice.
            assert (o >= last_object);

#ifndef USE_REGIONS
            //dprintf(3,("Considering card %zx start object: %zx, %zx[ boundary: %zx",
            dprintf(3, ("c: %zx, o: %zx, l: %zx[ boundary: %zx",
                   card, (size_t)o, (size_t)limit, (size_t)gen_boundary));
#endif //USE_REGIONS

            while (o < limit)
            {
                assert (Align (size (o)) >= Align (min_obj_size));
                size_t s = size (o);

                // next_o is the next object in the heap walk
                uint8_t* next_o =  o + Align (s);

                // while cont_o is the object we should continue with at the end_object label
                uint8_t* cont_o = next_o;

                Prefetch (next_o);

#ifndef USE_REGIONS
                if ((o >= gen_boundary) &&
                    (seg == ephemeral_heap_segment))
                {
                    dprintf (3, ("switching gen boundary %zx", (size_t)gen_boundary));
                    curr_gen_number--;
                    assert ((curr_gen_number > 0));
                    gen_boundary = generation_allocation_start
                        (generation_of (curr_gen_number - 1));
                    next_boundary = (compute_next_boundary
                                     (curr_gen_number, relocating));
                }
#endif //!USE_REGIONS

                dprintf (4, ("|%zx|", (size_t)o));

                if (next_o < start_address)
                {
                    goto end_object;
                }

#ifdef BACKGROUND_GC
                if (!fgc_should_consider_object (o, seg, consider_bgc_mark_p, check_current_sweep_p, check_saved_sweep_p))
                {
                    goto end_object;
                }
#endif //BACKGROUND_GC

#ifdef COLLECTIBLE_CLASS
                if (is_collectible(o))
                {
                    BOOL passed_end_card_p = FALSE;

                    if (card_of (o) > card)
                    {
                        passed_end_card_p = card_transition (o, end, card_word_end,
                            cg_pointers_found,
                            n_eph, n_card_set,
                            card, end_card,
                            foundp, start_address,
                            limit, total_cards_cleared
                            CARD_MARKING_STEALING_ARGS(card_mark_enumerator, seg, card_word_end));
                    }

                    if ((!passed_end_card_p || foundp) && (card_of (o) == card))
                    {
                        // card is valid and it covers the head of the object
                        if (fn == &gc_heap::relocate_address)
                        {
                            cg_pointers_found++;
                        }
                        else
                        {
                            uint8_t* class_obj = get_class_object (o);
                            mark_through_cards_helper (&class_obj, n_gen,
                                                       cg_pointers_found, fn,
                                                       nhigh, next_boundary,
                                                       condemned_gen, curr_gen_number CARD_MARKING_STEALING_ARG(hpt));
                        }
                    }

                    if (passed_end_card_p)
                    {
                        if (foundp && (card_address (card) < next_o))
                        {
                            goto go_through_refs;
                        }
                        else if (foundp && (start_address < limit))
                        {
                            cont_o = find_first_object (start_address, o);
                            goto end_object;
                        }
                        else
                            goto end_limit;
                    }
                }

go_through_refs:
#endif //COLLECTIBLE_CLASS

                if (contain_pointers (o))
                {
                    dprintf(3,("Going through %zx start_address: %zx", (size_t)o, (size_t)start_address));

                    {
                        dprintf (4, ("normal object path"));
                        go_through_object
                            (method_table(o), o, s, poo,
                             start_address, use_start, (o + s),
                             {
                                 dprintf (4, ("<%zx>:%zx", (size_t)poo, (size_t)*poo));
                                 if (card_of ((uint8_t*)poo) > card)
                                 {
                                     BOOL passed_end_card_p  = card_transition ((uint8_t*)poo, end,
                                            card_word_end,
                                            cg_pointers_found,
                                            n_eph, n_card_set,
                                            card, end_card,
                                            foundp, start_address,
                                            limit, total_cards_cleared
                                            CARD_MARKING_STEALING_ARGS(card_mark_enumerator, seg, card_word_end));

                                     if (passed_end_card_p)
                                     {
                                         if (foundp && (card_address (card) < next_o))
                                         {
                                             //new_start();
                                             {
                                                 if (ppstop <= (uint8_t**)start_address)
                                                     {break;}
                                                 else if (poo < (uint8_t**)start_address)
                                                     {poo = (uint8_t**)start_address;}
                                             }
                                         }
                                         else if (foundp && (start_address < limit))
                                         {
                                             cont_o = find_first_object (start_address, o);
                                             goto end_object;
                                         }
                                         else
                                             goto end_limit;
                                     }
                                 }

                                 mark_through_cards_helper (poo, n_gen,
                                                            cg_pointers_found, fn,
                                                            nhigh, next_boundary,
                                                            condemned_gen, curr_gen_number CARD_MARKING_STEALING_ARG(hpt));
                             }
                            );
                    }
                }

            end_object:
                if (((size_t)next_o / brick_size) != ((size_t) o / brick_size))
                {
                    if (brick_table [brick_of (o)] <0)
                        fix_brick_to_highest (o, next_o);
                }
                o = cont_o;
            }
        end_limit:
            last_object = o;
        }
    }
    // compute the efficiency ratio of the card table
    if (!relocating)
    {
#ifdef FEATURE_CARD_MARKING_STEALING
        Interlocked::ExchangeAddPtr(&n_eph_soh, n_eph);
        Interlocked::ExchangeAddPtr(&n_gen_soh, n_gen);
        dprintf (3, ("h%d marking h%d Msoh: cross: %zd, useful: %zd, cards set: %zd, cards cleared: %zd, ratio: %d",
            hpt->heap_number, heap_number, n_eph, n_gen, n_card_set, total_cards_cleared,
            (n_eph ? (int)(((float)n_gen / (float)n_eph) * 100) : 0)));
        dprintf (3, ("h%d marking h%d Msoh: total cross %zd, useful: %zd, running ratio: %d",
            hpt->heap_number, heap_number, (size_t)n_eph_soh, (size_t)n_gen_soh,
            (n_eph_soh ? (int)(((float)n_gen_soh / (float)n_eph_soh) * 100) : 0)));
#else
        generation_skip_ratio = ((n_eph > MIN_SOH_CROSS_GEN_REFS) ? (int)(((float)n_gen / (float)n_eph) * 100) : 100);
        dprintf (3, ("marking h%d Msoh: cross: %zd, useful: %zd, cards set: %zd, cards cleared: %zd, ratio: %d",
            heap_number, n_eph, n_gen, n_card_set, total_cards_cleared, generation_skip_ratio));
#endif //FEATURE_CARD_MARKING_STEALING
    }
    else
    {
        dprintf (3, ("R: Msoh: cross: %zd, useful: %zd, cards set: %zd, cards cleared: %zd, ratio: %d",
            n_gen, n_eph, n_card_set, total_cards_cleared, generation_skip_ratio));
    }
}

#ifndef USE_REGIONS
#ifdef SEG_REUSE_STATS
size_t gc_heap::dump_buckets (size_t* ordered_indices, int count, size_t* total_size)
{
    size_t total_items = 0;
    *total_size = 0;
    for (int i = 0; i < count; i++)
    {
        total_items += ordered_indices[i];
        *total_size += ordered_indices[i] << (MIN_INDEX_POWER2 + i);
        dprintf (SEG_REUSE_LOG_0, ("[%d]%4d 2^%2d", heap_number, ordered_indices[i], (MIN_INDEX_POWER2 + i)));
    }
    dprintf (SEG_REUSE_LOG_0, ("[%d]Total %d items, total size is 0x%zx", heap_number, total_items, *total_size));
    return total_items;
}
#endif // SEG_REUSE_STATS

void gc_heap::count_plug (size_t last_plug_size, uint8_t*& last_plug)
{
    // detect pinned plugs
    if (!pinned_plug_que_empty_p() && (last_plug == pinned_plug (oldest_pin())))
    {
        deque_pinned_plug();
        update_oldest_pinned_plug();
        dprintf (3, ("deque pin,now oldest pin is %p", pinned_plug (oldest_pin())));
    }
    else
    {
        size_t plug_size = last_plug_size + Align(min_obj_size);
        BOOL is_padded = FALSE;

#ifdef SHORT_PLUGS
        plug_size += Align (min_obj_size);
        is_padded = TRUE;
#endif //SHORT_PLUGS

#ifdef RESPECT_LARGE_ALIGNMENT
        plug_size += switch_alignment_size (is_padded);
#endif //RESPECT_LARGE_ALIGNMENT

        total_ephemeral_plugs += plug_size;
        size_t plug_size_power2 = round_up_power2 (plug_size);
        ordered_plug_indices[relative_index_power2_plug (plug_size_power2)]++;
        dprintf (SEG_REUSE_LOG_1, ("[%d]count_plug: adding 0x%p - %zd (2^%d) to ordered plug array",
            heap_number,
            last_plug,
            plug_size,
            (relative_index_power2_plug (plug_size_power2) + MIN_INDEX_POWER2)));
    }
}

void gc_heap::count_plugs_in_brick (uint8_t* tree, uint8_t*& last_plug)
{
    assert ((tree != NULL));
    if (node_left_child (tree))
    {
        count_plugs_in_brick (tree + node_left_child (tree), last_plug);
    }

    if (last_plug != 0)
    {
        uint8_t*  plug = tree;
        size_t gap_size = node_gap_size (plug);
        uint8_t*   gap = (plug - gap_size);
        uint8_t*  last_plug_end = gap;
        size_t  last_plug_size = (last_plug_end - last_plug);
        dprintf (3, ("tree: %p, last plug: %p, gap size: %zx, gap: %p, last plug size: %zx",
            tree, last_plug, gap_size, gap, last_plug_size));

        if (tree == oldest_pinned_plug)
        {
            dprintf (3, ("tree %p is pinned, last plug is %p, size is %zx",
                tree, last_plug, last_plug_size));
            mark* m = oldest_pin();
            if (m->has_pre_plug_info())
            {
                last_plug_size += sizeof (gap_reloc_pair);
                dprintf (3, ("pin %p has pre plug, adjusting plug size to %zx", tree, last_plug_size));
            }
        }
        // Can't assert here - if it's a pinned plug it can be less.
        //assert (last_plug_size >= Align (min_obj_size));

        count_plug (last_plug_size, last_plug);
    }

    last_plug = tree;

    if (node_right_child (tree))
    {
        count_plugs_in_brick (tree + node_right_child (tree), last_plug);
    }
}

void gc_heap::build_ordered_plug_indices ()
{
    memset (ordered_plug_indices, 0, sizeof(ordered_plug_indices));
    memset (saved_ordered_plug_indices, 0, sizeof(saved_ordered_plug_indices));

    uint8_t*  start_address = generation_limit (max_generation);
    uint8_t* end_address = heap_segment_allocated (ephemeral_heap_segment);
    size_t  current_brick = brick_of (start_address);
    size_t  end_brick = brick_of (end_address - 1);
    uint8_t* last_plug = 0;

    //Look for the right pinned plug to start from.
    reset_pinned_queue_bos();
    while (!pinned_plug_que_empty_p())
    {
        mark* m = oldest_pin();
        if ((m->first >= start_address) && (m->first < end_address))
        {
            dprintf (3, ("found a pin %p between %p and %p", m->first, start_address, end_address));

            break;
        }
        else
            deque_pinned_plug();
    }

    update_oldest_pinned_plug();

    while (current_brick <= end_brick)
    {
        int brick_entry =  brick_table [ current_brick ];
        if (brick_entry >= 0)
        {
            count_plugs_in_brick (brick_address (current_brick) + brick_entry -1, last_plug);
        }

        current_brick++;
    }

    if (last_plug !=0)
    {
        count_plug (end_address - last_plug, last_plug);
    }

    // we need to make sure that after fitting all the existing plugs, we
    // have big enough free space left to guarantee that the next allocation
    // will succeed.
    size_t extra_size = END_SPACE_AFTER_GC_FL;
    total_ephemeral_plugs += extra_size;
    dprintf (SEG_REUSE_LOG_0, ("Making sure we can fit a large object after fitting all plugs"));
    ordered_plug_indices[relative_index_power2_plug (round_up_power2 (extra_size))]++;

    memcpy (saved_ordered_plug_indices, ordered_plug_indices, sizeof(ordered_plug_indices));

#ifdef SEG_REUSE_STATS
    dprintf (SEG_REUSE_LOG_0, ("Plugs:"));
    size_t total_plug_power2 = 0;
    dump_buckets (ordered_plug_indices, MAX_NUM_BUCKETS, &total_plug_power2);
    dprintf (SEG_REUSE_LOG_0, ("plugs: 0x%zx (rounded up to 0x%zx (%d%%))",
                total_ephemeral_plugs,
                total_plug_power2,
                (total_ephemeral_plugs ?
                    (total_plug_power2 * 100 / total_ephemeral_plugs) :
                    0)));
    dprintf (SEG_REUSE_LOG_0, ("-------------------"));
#endif // SEG_REUSE_STATS
}

void gc_heap::init_ordered_free_space_indices ()
{
    memset (ordered_free_space_indices, 0, sizeof(ordered_free_space_indices));
    memset (saved_ordered_free_space_indices, 0, sizeof(saved_ordered_free_space_indices));
}

void gc_heap::trim_free_spaces_indices ()
{
    trimmed_free_space_index = -1;
    size_t max_count = max_free_space_items - 1;
    size_t count = 0;
    int i = 0;
    for (i = (MAX_NUM_BUCKETS - 1); i >= 0; i--)
    {
        count += ordered_free_space_indices[i];

        if (count >= max_count)
        {
            break;
        }
    }

    ptrdiff_t extra_free_space_items = count - max_count;

    if (extra_free_space_items > 0)
    {
        ordered_free_space_indices[i] -= extra_free_space_items;
        free_space_items = max_count;
        trimmed_free_space_index = i;
    }
    else
    {
        free_space_items = count;
    }

    if (i == -1)
    {
        i = 0;
    }

    free_space_buckets = MAX_NUM_BUCKETS - i;

    for (--i; i >= 0; i--)
    {
        ordered_free_space_indices[i] = 0;
    }

    memcpy (saved_ordered_free_space_indices,
            ordered_free_space_indices,
            sizeof(ordered_free_space_indices));
}

// We fit as many plugs as we can and update the number of plugs left and the number
// of free spaces left.
BOOL gc_heap::can_fit_in_spaces_p (size_t* ordered_blocks, int small_index, size_t* ordered_spaces, int big_index)
{
    assert (small_index <= big_index);
    assert (big_index < MAX_NUM_BUCKETS);

    size_t small_blocks = ordered_blocks[small_index];

    if (small_blocks == 0)
    {
        return TRUE;
    }

    size_t big_spaces = ordered_spaces[big_index];

    if (big_spaces == 0)
    {
        return FALSE;
    }

    dprintf (SEG_REUSE_LOG_1, ("[%d]Fitting %zu 2^%d plugs into %zu 2^%d free spaces",
        heap_number,
        small_blocks, (small_index + MIN_INDEX_POWER2),
        big_spaces, (big_index + MIN_INDEX_POWER2)));

    size_t big_to_small = big_spaces << (big_index - small_index);

    ptrdiff_t extra_small_spaces = big_to_small - small_blocks;
    dprintf (SEG_REUSE_LOG_1, ("[%d]%zu 2^%d spaces can fit %zu 2^%d blocks",
        heap_number,
        big_spaces, (big_index + MIN_INDEX_POWER2), big_to_small, (small_index + MIN_INDEX_POWER2)));
    BOOL can_fit = (extra_small_spaces >= 0);

    if (can_fit)
    {
        dprintf (SEG_REUSE_LOG_1, ("[%d]Can fit with %zd 2^%d extras blocks",
            heap_number,
            extra_small_spaces, (small_index + MIN_INDEX_POWER2)));
    }

    int i = 0;

    dprintf (SEG_REUSE_LOG_1, ("[%d]Setting # of 2^%d spaces to 0", heap_number, (big_index + MIN_INDEX_POWER2)));
    ordered_spaces[big_index] = 0;
    if (extra_small_spaces > 0)
    {
        dprintf (SEG_REUSE_LOG_1, ("[%d]Setting # of 2^%d blocks to 0", heap_number, (small_index + MIN_INDEX_POWER2)));
        ordered_blocks[small_index] = 0;
        for (i = small_index; i < big_index; i++)
        {
            if (extra_small_spaces & 1)
            {
                dprintf (SEG_REUSE_LOG_1, ("[%d]Increasing # of 2^%d spaces from %zu to %zu",
                    heap_number,
                    (i + MIN_INDEX_POWER2), ordered_spaces[i], (ordered_spaces[i] + 1)));
                ordered_spaces[i] += 1;
            }
            extra_small_spaces >>= 1;
        }

        dprintf (SEG_REUSE_LOG_1, ("[%d]Finally increasing # of 2^%d spaces from %zu to %zu",
            heap_number,
            (i + MIN_INDEX_POWER2), ordered_spaces[i], (ordered_spaces[i] + extra_small_spaces)));
        ordered_spaces[i] += extra_small_spaces;
    }
    else
    {
        dprintf (SEG_REUSE_LOG_1, ("[%d]Decreasing # of 2^%d blocks from %zu to %zu",
            heap_number,
            (small_index + MIN_INDEX_POWER2),
            ordered_blocks[small_index],
            (ordered_blocks[small_index] - big_to_small)));
        ordered_blocks[small_index] -= big_to_small;
    }

#ifdef SEG_REUSE_STATS
    size_t temp;
    dprintf (SEG_REUSE_LOG_1, ("[%d]Plugs became:", heap_number));
    dump_buckets (ordered_blocks, MAX_NUM_BUCKETS, &temp);

    dprintf (SEG_REUSE_LOG_1, ("[%d]Free spaces became:", heap_number));
    dump_buckets (ordered_spaces, MAX_NUM_BUCKETS, &temp);
#endif //SEG_REUSE_STATS

    return can_fit;
}

// space_index gets updated to the biggest available space index.
BOOL gc_heap::can_fit_blocks_p (size_t* ordered_blocks, int block_index, size_t* ordered_spaces, int* space_index)
{
    assert (*space_index >= block_index);

    while (!can_fit_in_spaces_p (ordered_blocks, block_index, ordered_spaces, *space_index))
    {
        (*space_index)--;
        if (*space_index < block_index)
        {
            return FALSE;
        }
    }

    return TRUE;
}

BOOL gc_heap::can_fit_all_blocks_p (size_t* ordered_blocks, size_t* ordered_spaces, int count)
{
#ifdef FEATURE_STRUCTALIGN
    // BARTOKTODO (4841): reenable when can_fit_in_spaces_p takes alignment requirements into account
    return FALSE;
#endif // FEATURE_STRUCTALIGN
    int space_index = count - 1;
    for (int block_index = (count - 1); block_index >= 0; block_index--)
    {
        if (!can_fit_blocks_p (ordered_blocks, block_index, ordered_spaces, &space_index))
        {
            return FALSE;
        }
    }

    return TRUE;
}

void gc_heap::build_ordered_free_spaces (heap_segment* seg)
{
    assert (bestfit_seg);

    //bestfit_seg->add_buckets (MAX_NUM_BUCKETS - free_space_buckets + MIN_INDEX_POWER2,
    //                    ordered_free_space_indices + (MAX_NUM_BUCKETS - free_space_buckets),
    //                    free_space_buckets,
    //                    free_space_items);

    bestfit_seg->add_buckets (MIN_INDEX_POWER2,
                        ordered_free_space_indices,
                        MAX_NUM_BUCKETS,
                        free_space_items);

    assert (settings.condemned_generation == max_generation);

    uint8_t* first_address = heap_segment_mem (seg);
    uint8_t* end_address   = heap_segment_reserved (seg);
    //look through the pinned plugs for relevant ones.
    //Look for the right pinned plug to start from.
    reset_pinned_queue_bos();
    mark* m = 0;

    // See comment in can_expand_into_p why we need this size.
    size_t eph_gen_starts = eph_gen_starts_size + Align (min_obj_size);
    BOOL has_fit_gen_starts = FALSE;

    while (!pinned_plug_que_empty_p())
    {
        m = oldest_pin();
        if ((pinned_plug (m) >= first_address) &&
            (pinned_plug (m) < end_address) &&
            (pinned_len (m) >= eph_gen_starts))
        {

            assert ((pinned_plug (m) - pinned_len (m)) == bestfit_first_pin);
            break;
        }
        else
        {
            deque_pinned_plug();
        }
    }

    if (!pinned_plug_que_empty_p())
    {
        bestfit_seg->add ((void*)m, TRUE, TRUE);
        deque_pinned_plug();
        m = oldest_pin();
        has_fit_gen_starts = TRUE;
    }

    while (!pinned_plug_que_empty_p() &&
            ((pinned_plug (m) >= first_address) && (pinned_plug (m) < end_address)))
    {
        bestfit_seg->add ((void*)m, TRUE, FALSE);
        deque_pinned_plug();
        m = oldest_pin();
    }

    if (commit_end_of_seg)
    {
        if (!has_fit_gen_starts)
        {
            assert (bestfit_first_pin == heap_segment_plan_allocated (seg));
        }
        bestfit_seg->add ((void*)seg, FALSE, (!has_fit_gen_starts));
    }

#ifdef _DEBUG
    bestfit_seg->check();
#endif //_DEBUG
}

BOOL gc_heap::try_best_fit (BOOL end_of_segment_p)
{
    if (!end_of_segment_p)
    {
        trim_free_spaces_indices ();
    }

    BOOL can_bestfit = can_fit_all_blocks_p (ordered_plug_indices,
                                             ordered_free_space_indices,
                                             MAX_NUM_BUCKETS);

    return can_bestfit;
}

BOOL gc_heap::best_fit (size_t free_space,
                        size_t largest_free_space,
                        size_t additional_space,
                        BOOL* use_additional_space)
{
    dprintf (SEG_REUSE_LOG_0, ("gen%d: trying best fit mechanism", settings.condemned_generation));

    assert (!additional_space || (additional_space && use_additional_space));
    if (use_additional_space)
    {
        *use_additional_space = FALSE;
    }

    if (ordered_plug_indices_init == FALSE)
    {
        total_ephemeral_plugs = 0;
        build_ordered_plug_indices();
        ordered_plug_indices_init = TRUE;
    }
    else
    {
        memcpy (ordered_plug_indices, saved_ordered_plug_indices, sizeof(ordered_plug_indices));
    }

    if (total_ephemeral_plugs == END_SPACE_AFTER_GC_FL)
    {
        dprintf (SEG_REUSE_LOG_0, ("No ephemeral plugs to realloc, done"));
        size_t empty_eph = (END_SPACE_AFTER_GC_FL + (Align (min_obj_size)) * (max_generation + 1));
        BOOL can_fit_empty_eph = (largest_free_space >= empty_eph);
        if (!can_fit_empty_eph)
        {
            can_fit_empty_eph = (additional_space >= empty_eph);

            if (can_fit_empty_eph)
            {
                *use_additional_space = TRUE;
            }
        }

        return can_fit_empty_eph;
    }

    if ((total_ephemeral_plugs + approximate_new_allocation()) >= (free_space + additional_space))
    {
        dprintf (SEG_REUSE_LOG_0, ("We won't have enough free space left in this segment after fitting, done"));
        return FALSE;
    }

    if ((free_space + additional_space) == 0)
    {
        dprintf (SEG_REUSE_LOG_0, ("No free space in this segment, done"));
        return FALSE;
    }

#ifdef SEG_REUSE_STATS
    dprintf (SEG_REUSE_LOG_0, ("Free spaces:"));
    size_t total_free_space_power2 = 0;
    size_t total_free_space_items =
        dump_buckets (ordered_free_space_indices,
                      MAX_NUM_BUCKETS,
                      &total_free_space_power2);
    dprintf (SEG_REUSE_LOG_0, ("currently max free spaces is %zd", max_free_space_items));

    dprintf (SEG_REUSE_LOG_0, ("Ephemeral plugs: 0x%zx, free space: 0x%zx (rounded down to 0x%zx (%zd%%)), additional free_space: 0x%zx",
                total_ephemeral_plugs,
                free_space,
                total_free_space_power2,
                (free_space ? (total_free_space_power2 * 100 / free_space) : 0),
                additional_space));

    size_t saved_all_free_space_indices[MAX_NUM_BUCKETS];
    memcpy (saved_all_free_space_indices,
            ordered_free_space_indices,
            sizeof(saved_all_free_space_indices));

#endif // SEG_REUSE_STATS

    if (total_ephemeral_plugs > (free_space + additional_space))
    {
        return FALSE;
    }

    use_bestfit = try_best_fit(FALSE);

    if (!use_bestfit && additional_space)
    {
        int relative_free_space_index = relative_index_power2_free_space (round_down_power2 (additional_space));

        if (relative_free_space_index != -1)
        {
            int relative_plug_index = 0;
            size_t plugs_to_fit = 0;

            for (relative_plug_index = (MAX_NUM_BUCKETS - 1); relative_plug_index >= 0; relative_plug_index--)
            {
                plugs_to_fit = ordered_plug_indices[relative_plug_index];
                if (plugs_to_fit != 0)
                {
                    break;
                }
            }

            if ((relative_plug_index > relative_free_space_index) ||
                ((relative_plug_index == relative_free_space_index) &&
                (plugs_to_fit > 1)))
            {
#ifdef SEG_REUSE_STATS
                dprintf (SEG_REUSE_LOG_0, ("additional space is 2^%d but we stopped at %d 2^%d plug(s)",
                            (relative_free_space_index + MIN_INDEX_POWER2),
                            plugs_to_fit,
                            (relative_plug_index + MIN_INDEX_POWER2)));
#endif // SEG_REUSE_STATS
                goto adjust;
            }

            dprintf (SEG_REUSE_LOG_0, ("Adding end of segment (2^%d)", (relative_free_space_index + MIN_INDEX_POWER2)));
            ordered_free_space_indices[relative_free_space_index]++;
            use_bestfit = try_best_fit(TRUE);
            if (use_bestfit)
            {
                free_space_items++;
                // Since we might've trimmed away some of the free spaces we had, we should see
                // if we really need to use end of seg space - if it's the same or smaller than
                // the largest space we trimmed we can just add that one back instead of
                // using end of seg.
                if (relative_free_space_index > trimmed_free_space_index)
                {
                    *use_additional_space = TRUE;
                }
                else
                {
                    // If the addition space is <= than the last trimmed space, we
                    // should just use that last trimmed space instead.
                    saved_ordered_free_space_indices[trimmed_free_space_index]++;
                }
            }
        }
    }

adjust:

    if (!use_bestfit)
    {
        dprintf (SEG_REUSE_LOG_0, ("couldn't fit..."));

#ifdef SEG_REUSE_STATS
        size_t saved_max = max_free_space_items;
        BOOL temp_bestfit = FALSE;

        dprintf (SEG_REUSE_LOG_0, ("----Starting experiment process----"));
        dprintf (SEG_REUSE_LOG_0, ("----Couldn't fit with max free items %zd", max_free_space_items));

        // TODO: need to take the end of segment into consideration.
        while (max_free_space_items <= total_free_space_items)
        {
            max_free_space_items += max_free_space_items / 2;
            dprintf (SEG_REUSE_LOG_0, ("----Temporarily increasing max free spaces to %zd", max_free_space_items));
            memcpy (ordered_free_space_indices,
                    saved_all_free_space_indices,
                    sizeof(ordered_free_space_indices));
            if (try_best_fit(FALSE))
            {
                temp_bestfit = TRUE;
                break;
            }
        }

        if (temp_bestfit)
        {
            dprintf (SEG_REUSE_LOG_0, ("----With %zd max free spaces we could fit", max_free_space_items));
        }
        else
        {
            dprintf (SEG_REUSE_LOG_0, ("----Tried all free spaces and still couldn't fit, lost too much space"));
        }

        dprintf (SEG_REUSE_LOG_0, ("----Restoring max free spaces to %zd", saved_max));
        max_free_space_items = saved_max;
#endif // SEG_REUSE_STATS
        if (free_space_items)
        {
            max_free_space_items = min ((size_t)MAX_NUM_FREE_SPACES, free_space_items * 2);
            max_free_space_items = max (max_free_space_items, (size_t)MIN_NUM_FREE_SPACES);
        }
        else
        {
            max_free_space_items = MAX_NUM_FREE_SPACES;
        }
    }

    dprintf (SEG_REUSE_LOG_0, ("Adjusted number of max free spaces to %zd", max_free_space_items));
    dprintf (SEG_REUSE_LOG_0, ("------End of best fitting process------\n"));

    return use_bestfit;
}

BOOL gc_heap::process_free_space (heap_segment* seg,
                                  size_t free_space,
                                  size_t min_free_size,
                                  size_t min_cont_size,
                                  size_t* total_free_space,
                                  size_t* largest_free_space)
{
    *total_free_space += free_space;
    *largest_free_space = max (*largest_free_space, free_space);

#ifdef SIMPLE_DPRINTF
    dprintf (SEG_REUSE_LOG_1, ("free space len: %zx, total free space: %zx, largest free space: %zx",
                free_space, *total_free_space, *largest_free_space));
#endif //SIMPLE_DPRINTF

    if ((*total_free_space >= min_free_size) && (*largest_free_space >= min_cont_size))
    {
#ifdef SIMPLE_DPRINTF
        dprintf (SEG_REUSE_LOG_0, ("(gen%d)total free: %zx(min: %zx), largest free: %zx(min: %zx). Found segment %zx to reuse without bestfit",
            settings.condemned_generation,
            *total_free_space, min_free_size, *largest_free_space, min_cont_size,
            (size_t)seg));
#else
        UNREFERENCED_PARAMETER(seg);
#endif //SIMPLE_DPRINTF
        return TRUE;
    }

    int free_space_index = relative_index_power2_free_space (round_down_power2 (free_space));
    if (free_space_index != -1)
    {
        ordered_free_space_indices[free_space_index]++;
    }
    return FALSE;
}

BOOL gc_heap::can_expand_into_p (heap_segment* seg, size_t min_free_size, size_t min_cont_size,
                                 allocator* gen_allocator)
{
    min_cont_size += END_SPACE_AFTER_GC;
    use_bestfit = FALSE;
    commit_end_of_seg = FALSE;
    bestfit_first_pin = 0;
    uint8_t* first_address = heap_segment_mem (seg);
    uint8_t* end_address   = heap_segment_reserved (seg);
    size_t end_extra_space = end_space_after_gc();

    if ((heap_segment_reserved (seg) - end_extra_space) <= heap_segment_plan_allocated (seg))
    {
        dprintf (SEG_REUSE_LOG_0, ("can_expand_into_p: can't use segment [%p %p, has less than %zu bytes at the end",
                                   first_address, end_address, end_extra_space));
        return FALSE;
    }

    end_address -= end_extra_space;

    dprintf (SEG_REUSE_LOG_0, ("can_expand_into_p(gen%d): min free: %zx, min continuous: %zx",
        settings.condemned_generation, min_free_size, min_cont_size));
    size_t eph_gen_starts = eph_gen_starts_size;

    if (settings.condemned_generation == max_generation)
    {
        size_t free_space = 0;
        size_t largest_free_space = free_space;
        dprintf (SEG_REUSE_LOG_0, ("can_expand_into_p: gen2: testing segment [%p %p", first_address, end_address));
        //Look through the pinned plugs for relevant ones and Look for the right pinned plug to start from.
        //We are going to allocate the generation starts in the 1st free space,
        //so start from the first free space that's big enough for gen starts and a min object size.
        // If we see a free space that is >= gen starts but < gen starts + min obj size we just don't use it -
        // we could use it by allocating the last generation start a bit bigger but
        // the complexity isn't worth the effort (those plugs are from gen2
        // already anyway).
        reset_pinned_queue_bos();
        mark* m = 0;
        BOOL has_fit_gen_starts = FALSE;

        init_ordered_free_space_indices ();
        while (!pinned_plug_que_empty_p())
        {
            m = oldest_pin();
            if ((pinned_plug (m) >= first_address) &&
                (pinned_plug (m) < end_address) &&
                (pinned_len (m) >= (eph_gen_starts + Align (min_obj_size))))
            {
                break;
            }
            else
            {
                deque_pinned_plug();
            }
        }

        if (!pinned_plug_que_empty_p())
        {
            bestfit_first_pin = pinned_plug (m) - pinned_len (m);

            if (process_free_space (seg,
                                    pinned_len (m) - eph_gen_starts,
                                    min_free_size, min_cont_size,
                                    &free_space, &largest_free_space))
            {
                return TRUE;
            }

            deque_pinned_plug();
            m = oldest_pin();
            has_fit_gen_starts = TRUE;
        }

        dprintf (3, ("first pin is %p", pinned_plug (m)));

        //tally up free space
        while (!pinned_plug_que_empty_p() &&
               ((pinned_plug (m) >= first_address) && (pinned_plug (m) < end_address)))
        {
            dprintf (3, ("looking at pin %p", pinned_plug (m)));
            if (process_free_space (seg,
                                    pinned_len (m),
                                    min_free_size, min_cont_size,
                                    &free_space, &largest_free_space))
            {
                return TRUE;
            }

            deque_pinned_plug();
            m = oldest_pin();
        }

        //try to find space at the end of the segment.
        size_t end_space = (end_address - heap_segment_plan_allocated (seg));
        size_t additional_space = ((min_free_size > free_space) ? (min_free_size - free_space) : 0);
        dprintf (SEG_REUSE_LOG_0, ("end space: %zx; additional: %zx", end_space, additional_space));
        if (end_space >= additional_space)
        {
            BOOL can_fit = TRUE;
            commit_end_of_seg = TRUE;

            if (largest_free_space < min_cont_size)
            {
                if (end_space >= min_cont_size)
                {
                    additional_space = max (min_cont_size, additional_space);
                    dprintf (SEG_REUSE_LOG_0, ("(gen2)Found segment %p to reuse without bestfit, with committing end of seg for eph",
                        seg));
                }
                else
                {
                    if (settings.concurrent)
                    {
                        can_fit = FALSE;
                        commit_end_of_seg = FALSE;
                    }
                    else
                    {
                        size_t additional_space_bestfit = additional_space;
                        if (!has_fit_gen_starts)
                        {
                            if (additional_space_bestfit < (eph_gen_starts + Align (min_obj_size)))
                            {
                                dprintf (SEG_REUSE_LOG_0, ("(gen2)Couldn't fit, gen starts not allocated yet and end space is too small: %zd",
                                        additional_space_bestfit));
                                return FALSE;
                            }

                            bestfit_first_pin = heap_segment_plan_allocated (seg);
                            additional_space_bestfit -= eph_gen_starts;
                        }

                        can_fit = best_fit (free_space,
                                            largest_free_space,
                                            additional_space_bestfit,
                                            &commit_end_of_seg);

                        if (can_fit)
                        {
                            dprintf (SEG_REUSE_LOG_0, ("(gen2)Found segment %p to reuse with bestfit, %s committing end of seg",
                                seg, (commit_end_of_seg ? "with" : "without")));
                        }
                        else
                        {
                            dprintf (SEG_REUSE_LOG_0, ("(gen2)Couldn't fit, total free space is %zx", (free_space + end_space)));
                        }
                    }
                }
            }
            else
            {
                dprintf (SEG_REUSE_LOG_0, ("(gen2)Found segment %p to reuse without bestfit, with committing end of seg", seg));
            }

            assert (additional_space <= end_space);
            if (commit_end_of_seg)
            {
                if (!grow_heap_segment (seg, heap_segment_plan_allocated (seg) + additional_space))
                {
                    dprintf (2, ("Couldn't commit end of segment?!"));
                    use_bestfit = FALSE;

                    return FALSE;
                }

                if (use_bestfit)
                {
                    // We increase the index here because growing heap segment could create a discrepency with
                    // the additional space we used (could be bigger).
                    size_t free_space_end_of_seg =
                        heap_segment_committed (seg) - heap_segment_plan_allocated (seg);
                    int relative_free_space_index = relative_index_power2_free_space (round_down_power2 (free_space_end_of_seg));
                    saved_ordered_free_space_indices[relative_free_space_index]++;
                }
            }

            if (use_bestfit)
            {
                memcpy (ordered_free_space_indices,
                        saved_ordered_free_space_indices,
                        sizeof(ordered_free_space_indices));
                max_free_space_items = max ((size_t)MIN_NUM_FREE_SPACES, free_space_items * 3 / 2);
                max_free_space_items = min ((size_t)MAX_NUM_FREE_SPACES, max_free_space_items);
                dprintf (SEG_REUSE_LOG_0, ("could fit! %zd free spaces, %zd max", free_space_items, max_free_space_items));
            }

            return can_fit;
        }

        dprintf (SEG_REUSE_LOG_0, ("(gen2)Couldn't fit, total free space is %zx", (free_space + end_space)));
        return FALSE;
    }
    else
    {
        assert (settings.condemned_generation == (max_generation-1));
        size_t free_space = (end_address - heap_segment_plan_allocated (seg));
        size_t largest_free_space = free_space;
        dprintf (SEG_REUSE_LOG_0, ("can_expand_into_p: gen1: testing segment [%p %p", first_address, end_address));
        //find the first free list in range of the current segment
        uint8_t* free_list = 0;
        unsigned int a_l_idx = gen_allocator->first_suitable_bucket(eph_gen_starts);
        for (; a_l_idx < gen_allocator->number_of_buckets(); a_l_idx++)
        {
            free_list = gen_allocator->alloc_list_head_of (a_l_idx);
            while (free_list)
            {
                if ((free_list >= first_address) &&
                    (free_list < end_address) &&
                    (unused_array_size (free_list) >= eph_gen_starts))
                {
                    goto next;
                }
                else
                {
                    free_list = free_list_slot (free_list);
                }
            }
        }
next:
        if (free_list)
        {
            init_ordered_free_space_indices ();
            if (process_free_space (seg,
                                    unused_array_size (free_list) - eph_gen_starts + Align (min_obj_size),
                                    min_free_size, min_cont_size,
                                    &free_space, &largest_free_space))
            {
                return TRUE;
            }

            free_list = free_list_slot (free_list);
        }
        else
        {
            dprintf (SEG_REUSE_LOG_0, ("(gen1)Couldn't fit, no free list"));
            return FALSE;
        }

       //tally up free space
        while (1)
        {
            while (free_list)
            {
                if ((free_list >= first_address) && (free_list < end_address) &&
                    process_free_space (seg,
                                        unused_array_size (free_list),
                                        min_free_size, min_cont_size,
                                        &free_space, &largest_free_space))
                {
                    return TRUE;
                }

                free_list = free_list_slot (free_list);
            }
            a_l_idx++;
            if (a_l_idx < gen_allocator->number_of_buckets())
            {
                free_list = gen_allocator->alloc_list_head_of (a_l_idx);
            }
            else
                break;
        }

        dprintf (SEG_REUSE_LOG_0, ("(gen1)Couldn't fit, total free space is %zx", free_space));
        return FALSE;

        /*
        BOOL can_fit = best_fit (free_space, 0, NULL);
        if (can_fit)
        {
            dprintf (SEG_REUSE_LOG_0, ("(gen1)Found segment %zx to reuse with bestfit", seg));
        }
        else
        {
            dprintf (SEG_REUSE_LOG_0, ("(gen1)Couldn't fit, total free space is %zx", free_space));
        }

        return can_fit;
        */
    }
}

void gc_heap::realloc_plug (size_t last_plug_size, uint8_t*& last_plug,
                            generation* gen, uint8_t* start_address,
                            unsigned int& active_new_gen_number,
                            uint8_t*& last_pinned_gap, BOOL& leftp,
                            BOOL shortened_p
#ifdef SHORT_PLUGS
                            , mark* pinned_plug_entry
#endif //SHORT_PLUGS
                            )
{
    // detect generation boundaries
    // make sure that active_new_gen_number is not the youngest generation.
    // because the generation_limit wouldn't return the right thing in this case.
    if (!use_bestfit)
    {
        if ((active_new_gen_number > 1) &&
            (last_plug >= generation_limit (active_new_gen_number)))
        {
            assert (last_plug >= start_address);
            active_new_gen_number--;
            realloc_plan_generation_start (generation_of (active_new_gen_number), gen);
            assert (generation_plan_allocation_start (generation_of (active_new_gen_number)));
            leftp = FALSE;
        }
    }

    // detect pinned plugs
    if (!pinned_plug_que_empty_p() && (last_plug == pinned_plug (oldest_pin())))
    {
        size_t  entry = deque_pinned_plug();
        mark*  m = pinned_plug_of (entry);

        size_t saved_pinned_len = pinned_len(m);
        pinned_len(m) = last_plug - last_pinned_gap;
        //dprintf (3,("Adjusting pinned gap: [%zx, %zx[", (size_t)last_pinned_gap, (size_t)last_plug));

        if (m->has_post_plug_info())
        {
            last_plug_size += sizeof (gap_reloc_pair);
            dprintf (3, ("ra pinned %p was shortened, adjusting plug size to %zx", last_plug, last_plug_size))
        }

        last_pinned_gap = last_plug + last_plug_size;
        dprintf (3, ("ra found pin %p, len: %zx->%zx, last_p: %p, last_p_size: %zx",
            pinned_plug (m), saved_pinned_len, pinned_len (m), last_plug, last_plug_size));
        leftp = FALSE;

        //we are creating a generation fault. set the cards.
        {
            size_t end_card = card_of (align_on_card (last_plug + last_plug_size));
            size_t card = card_of (last_plug);
            while (card != end_card)
            {
                set_card (card);
                card++;
            }
        }
    }
    else if (last_plug >= start_address)
    {
#ifdef FEATURE_STRUCTALIGN
        int requiredAlignment;
        ptrdiff_t pad;
        node_aligninfo (last_plug, requiredAlignment, pad);

        // from how we previously aligned the plug's destination address,
        // compute the actual alignment offset.
        uint8_t* reloc_plug = last_plug + node_relocation_distance (last_plug);
        ptrdiff_t alignmentOffset = ComputeStructAlignPad(reloc_plug, requiredAlignment, 0);
        if (!alignmentOffset)
        {
            // allocate_in_expanded_heap doesn't expect alignmentOffset to be zero.
            alignmentOffset = requiredAlignment;
        }

        //clear the alignment info because we are reallocating
        clear_node_aligninfo (last_plug);
#else // FEATURE_STRUCTALIGN
        //clear the realignment flag because we are reallocating
        clear_node_realigned (last_plug);
#endif // FEATURE_STRUCTALIGN
        BOOL adjacentp = FALSE;
        BOOL set_padding_on_saved_p = FALSE;

        if (shortened_p)
        {
            last_plug_size += sizeof (gap_reloc_pair);

#ifdef SHORT_PLUGS
            assert (pinned_plug_entry != NULL);
            if (last_plug_size <= sizeof (plug_and_gap))
            {
                set_padding_on_saved_p = TRUE;
            }
#endif //SHORT_PLUGS

            dprintf (3, ("ra plug %p was shortened, adjusting plug size to %zx", last_plug, last_plug_size))
        }

#ifdef SHORT_PLUGS
        clear_padding_in_expand (last_plug, set_padding_on_saved_p, pinned_plug_entry);
#endif //SHORT_PLUGS

        uint8_t* new_address = allocate_in_expanded_heap(gen, last_plug_size, adjacentp, last_plug,
#ifdef SHORT_PLUGS
                                     set_padding_on_saved_p,
                                     pinned_plug_entry,
#endif //SHORT_PLUGS
                                     TRUE, active_new_gen_number REQD_ALIGN_AND_OFFSET_ARG);

        dprintf (3, ("ra NA: [%p, %p[: %zx", new_address, (new_address + last_plug_size), last_plug_size));
        assert (new_address);
        set_node_relocation_distance (last_plug, new_address - last_plug);
#ifdef FEATURE_STRUCTALIGN
        if (leftp && node_alignpad (last_plug) == 0)
#else // FEATURE_STRUCTALIGN
        if (leftp && !node_realigned (last_plug))
#endif // FEATURE_STRUCTALIGN
        {
            // TODO - temporarily disable L optimization because of a bug in it.
            //set_node_left (last_plug);
        }
        dprintf (3,(" Re-allocating %zx->%zx len %zd", (size_t)last_plug, (size_t)new_address, last_plug_size));
        leftp = adjacentp;
    }
}

void gc_heap::realloc_in_brick (uint8_t* tree, uint8_t*& last_plug,
                                uint8_t* start_address,
                                generation* gen,
                                unsigned int& active_new_gen_number,
                                uint8_t*& last_pinned_gap, BOOL& leftp)
{
    assert (tree != NULL);
    int   left_node = node_left_child (tree);
    int   right_node = node_right_child (tree);

    dprintf (3, ("ra: tree: %p, last_pin_gap: %p, last_p: %p, L: %d, R: %d",
        tree, last_pinned_gap, last_plug, left_node, right_node));

    if (left_node)
    {
        dprintf (3, ("LN: realloc %p(%p)", (tree + left_node), last_plug));
        realloc_in_brick ((tree + left_node), last_plug, start_address,
                          gen, active_new_gen_number, last_pinned_gap,
                          leftp);
    }

    if (last_plug != 0)
    {
        uint8_t*  plug = tree;

        BOOL has_pre_plug_info_p = FALSE;
        BOOL has_post_plug_info_p = FALSE;
        mark* pinned_plug_entry = get_next_pinned_entry (tree,
                                                         &has_pre_plug_info_p,
                                                         &has_post_plug_info_p,
                                                         FALSE);

        // We only care about the pre plug info 'cause that's what decides if the last plug is shortened.
        // The pinned plugs are handled in realloc_plug.
        size_t gap_size = node_gap_size (plug);
        uint8_t*   gap = (plug - gap_size);
        uint8_t*  last_plug_end = gap;
        size_t  last_plug_size = (last_plug_end - last_plug);
        // Cannot assert this - a plug could be less than that due to the shortened ones.
        //assert (last_plug_size >= Align (min_obj_size));
        dprintf (3, ("ra: plug %p, gap size: %zd, last_pin_gap: %p, last_p: %p, last_p_end: %p, shortened: %d",
            plug, gap_size, last_pinned_gap, last_plug, last_plug_end, (has_pre_plug_info_p ? 1 : 0)));
        realloc_plug (last_plug_size, last_plug, gen, start_address,
                      active_new_gen_number, last_pinned_gap,
                      leftp, has_pre_plug_info_p
#ifdef SHORT_PLUGS
                      , pinned_plug_entry
#endif //SHORT_PLUGS
                      );
    }

    last_plug = tree;

    if (right_node)
    {
        dprintf (3, ("RN: realloc %p(%p)", (tree + right_node), last_plug));
        realloc_in_brick ((tree + right_node), last_plug, start_address,
                          gen, active_new_gen_number, last_pinned_gap,
                          leftp);
    }
}

void
gc_heap::realloc_plugs (generation* consing_gen, heap_segment* seg,
                        uint8_t* start_address, uint8_t* end_address,
                        unsigned active_new_gen_number)
{
    dprintf (3, ("--- Reallocing ---"));

    if (use_bestfit)
    {
        //make sure that every generation has a planned allocation start
        int  gen_number = max_generation - 1;
        while (gen_number >= 0)
        {
            generation* gen = generation_of (gen_number);
            if (0 == generation_plan_allocation_start (gen))
            {
                generation_plan_allocation_start (gen) =
                    bestfit_first_pin + (max_generation - gen_number - 1) * Align (min_obj_size);
                generation_plan_allocation_start_size (gen) = Align (min_obj_size);
                assert (generation_plan_allocation_start (gen));
            }
            gen_number--;
        }
    }

    uint8_t* first_address = start_address;
    //Look for the right pinned plug to start from.
    reset_pinned_queue_bos();
    uint8_t* planned_ephemeral_seg_end = heap_segment_plan_allocated (seg);
    while (!pinned_plug_que_empty_p())
    {
        mark* m = oldest_pin();
        if ((pinned_plug (m) >= planned_ephemeral_seg_end) && (pinned_plug (m) < end_address))
        {
            if (pinned_plug (m) < first_address)
            {
                first_address = pinned_plug (m);
            }
            break;
        }
        else
            deque_pinned_plug();
    }

    size_t  current_brick = brick_of (first_address);
    size_t  end_brick = brick_of (end_address-1);
    uint8_t*  last_plug = 0;

    uint8_t* last_pinned_gap = heap_segment_plan_allocated (seg);
    BOOL leftp = FALSE;

    dprintf (3, ("start addr: %p, first addr: %p, current oldest pin: %p",
        start_address, first_address, pinned_plug (oldest_pin())));

    while (current_brick <= end_brick)
    {
        int   brick_entry =  brick_table [ current_brick ];
        if (brick_entry >= 0)
        {
            realloc_in_brick ((brick_address (current_brick) + brick_entry - 1),
                              last_plug, start_address, consing_gen,
                              active_new_gen_number, last_pinned_gap,
                              leftp);
        }
        current_brick++;
    }

    if (last_plug != 0)
    {
        realloc_plug (end_address - last_plug, last_plug, consing_gen,
                      start_address,
                      active_new_gen_number, last_pinned_gap,
                      leftp, FALSE
#ifdef SHORT_PLUGS
                      , NULL
#endif //SHORT_PLUGS
                      );
    }

    //Fix the old segment allocated size
    assert (last_pinned_gap >= heap_segment_mem (seg));
    assert (last_pinned_gap <= heap_segment_committed (seg));
    heap_segment_plan_allocated (seg) = last_pinned_gap;
}

void gc_heap::set_expand_in_full_gc (int condemned_gen_number)
{
    if (!should_expand_in_full_gc)
    {
        if ((condemned_gen_number != max_generation) &&
            (settings.pause_mode != pause_low_latency) &&
            (settings.pause_mode != pause_sustained_low_latency))
        {
            should_expand_in_full_gc = TRUE;
        }
    }
}

void gc_heap::save_ephemeral_generation_starts()
{
    for (int ephemeral_generation = 0; ephemeral_generation < max_generation; ephemeral_generation++)
    {
        saved_ephemeral_plan_start[ephemeral_generation] =
            generation_plan_allocation_start (generation_of (ephemeral_generation));
        saved_ephemeral_plan_start_size[ephemeral_generation] =
            generation_plan_allocation_start_size (generation_of (ephemeral_generation));
    }
}

generation* gc_heap::expand_heap (int condemned_generation,
                                  generation* consing_gen,
                                  heap_segment* new_heap_segment)
{
#ifndef _DEBUG
    UNREFERENCED_PARAMETER(condemned_generation);
#endif //!_DEBUG
    assert (condemned_generation >= (max_generation -1));
    unsigned int active_new_gen_number = max_generation; //Set one too high to get generation gap
    uint8_t*  start_address = generation_limit (max_generation);
    uint8_t*  end_address = heap_segment_allocated (ephemeral_heap_segment);
    BOOL should_promote_ephemeral = FALSE;
    ptrdiff_t eph_size = total_ephemeral_size;
#ifdef BACKGROUND_GC
    dprintf(2,("%s: ---- Heap Expansion ----", get_str_gc_type()));
#endif //BACKGROUND_GC
    settings.heap_expansion = TRUE;

    //reset the elevation state for next time.
    dprintf (2, ("Elevation: elevation = el_none"));
    if (settings.should_lock_elevation && !expand_reused_seg_p())
        settings.should_lock_elevation = FALSE;

    heap_segment* new_seg = new_heap_segment;

    if (!new_seg)
        return consing_gen;

    //copy the card and brick tables
    if (g_gc_card_table!= card_table)
        copy_brick_card_table();

    BOOL new_segment_p = (heap_segment_next (new_seg) == 0);
    dprintf (2, ("new_segment_p %zx", (size_t)new_segment_p));

    assert (generation_plan_allocation_start (generation_of (max_generation-1)));
    assert (generation_plan_allocation_start (generation_of (max_generation-1)) >=
            heap_segment_mem (ephemeral_heap_segment));
    assert (generation_plan_allocation_start (generation_of (max_generation-1)) <=
            heap_segment_committed (ephemeral_heap_segment));

    assert (generation_plan_allocation_start (youngest_generation));
    assert (generation_plan_allocation_start (youngest_generation) <
            heap_segment_plan_allocated (ephemeral_heap_segment));

    if (settings.pause_mode == pause_no_gc)
    {
        // We don't reuse for no gc, so the size used on the new eph seg is eph_size.
        if ((size_t)(heap_segment_reserved (new_seg) - heap_segment_mem (new_seg)) < (eph_size + soh_allocation_no_gc))
            should_promote_ephemeral = TRUE;
    }
    else
    {
        if (!use_bestfit)
        {
            should_promote_ephemeral = dt_low_ephemeral_space_p (tuning_deciding_promote_ephemeral);
        }
    }

    if (should_promote_ephemeral)
    {
        ephemeral_promotion = TRUE;
        get_gc_data_per_heap()->set_mechanism (gc_heap_expand, expand_new_seg_ep);
        dprintf (2, ("promoting ephemeral"));
        save_ephemeral_generation_starts();

        // We also need to adjust free_obj_space (due to padding) here because now young gens' free_obj_space will
        // belong to gen2.
        generation* max_gen = generation_of (max_generation);
        for (int i = 1; i < max_generation; i++)
        {
            generation_free_obj_space (max_gen) +=
                generation_free_obj_space (generation_of (i));
            dprintf (2, ("[h%d] maxgen freeobj + %zd=%zd",
                heap_number, generation_free_obj_space (generation_of (i)),
                generation_free_obj_space (max_gen)));
        }

        // TODO: This is actually insufficient - if BACKGROUND_GC is not defined we'd need to commit more
        // in order to accommodate eph gen starts. Also in the no_gc we should make sure used
        // is updated correctly.
        heap_segment_used (new_seg) = heap_segment_committed (new_seg);
    }
    else
    {
        // commit the new ephemeral segment all at once if it is a new one.
        if ((eph_size > 0) && new_segment_p)
        {
#ifdef FEATURE_STRUCTALIGN
            // The destination may require a larger alignment padding than the source.
            // Assume the worst possible alignment padding.
            eph_size += ComputeStructAlignPad(heap_segment_mem (new_seg), MAX_STRUCTALIGN, OBJECT_ALIGNMENT_OFFSET);
#endif // FEATURE_STRUCTALIGN
#ifdef RESPECT_LARGE_ALIGNMENT
            //Since the generation start can be larger than min_obj_size
            //The alignment could be switched.
            eph_size += switch_alignment_size(FALSE);
#endif //RESPECT_LARGE_ALIGNMENT
            //Since the generation start can be larger than min_obj_size
            //Compare the alignment of the first object in gen1
            if (grow_heap_segment (new_seg, heap_segment_mem (new_seg) + eph_size) == 0)
            {
                fgm_result.set_fgm (fgm_commit_eph_segment, eph_size, FALSE);
                return consing_gen;
            }
            heap_segment_used (new_seg) = heap_segment_committed (new_seg);
        }

        //Fix the end of the old ephemeral heap segment
        heap_segment_plan_allocated (ephemeral_heap_segment) =
            generation_plan_allocation_start (generation_of (max_generation-1));

        dprintf (3, ("Old ephemeral allocated set to %zx",
                    (size_t)heap_segment_plan_allocated (ephemeral_heap_segment)));
    }

    if (new_segment_p)
    {
        // TODO - Is this really necessary? We should think about it.
        //initialize the first brick
        size_t first_brick = brick_of (heap_segment_mem (new_seg));
        set_brick (first_brick,
                heap_segment_mem (new_seg) - brick_address (first_brick));
    }

    //From this point on, we cannot run out of memory

    //reset the allocation of the consing generation back to the end of the
    //old ephemeral segment
    generation_allocation_limit (consing_gen) =
        heap_segment_plan_allocated (ephemeral_heap_segment);
    generation_allocation_pointer (consing_gen) = generation_allocation_limit (consing_gen);
    generation_allocation_segment (consing_gen) = ephemeral_heap_segment;

    //clear the generation gap for all of the ephemeral generations
    {
        int generation_num = max_generation-1;
        while (generation_num >= 0)
        {
            generation* gen = generation_of (generation_num);
            generation_plan_allocation_start (gen) = 0;
            generation_num--;
        }
    }

    heap_segment* old_seg = ephemeral_heap_segment;
    ephemeral_heap_segment = new_seg;

    //Note: the ephemeral segment shouldn't be threaded onto the segment chain
    //because the relocation and compact phases shouldn't see it

    // set the generation members used by allocate_in_expanded_heap
    // and switch to ephemeral generation
    consing_gen = ensure_ephemeral_heap_segment (consing_gen);

    if (!should_promote_ephemeral)
    {
        realloc_plugs (consing_gen, old_seg, start_address, end_address,
                    active_new_gen_number);
    }

    if (!use_bestfit)
    {
        repair_allocation_in_expanded_heap (consing_gen);
    }

    // assert that the generation gap for all of the ephemeral generations were allocated.
#ifdef _DEBUG
    {
        int generation_num = max_generation-1;
        while (generation_num >= 0)
        {
            generation* gen = generation_of (generation_num);
            assert (generation_plan_allocation_start (gen));
            generation_num--;
        }
    }
#endif // _DEBUG

    if (!new_segment_p)
    {
        dprintf (2, ("Demoting ephemeral segment"));
        //demote the entire segment.
        settings.demotion = TRUE;
        get_gc_data_per_heap()->set_mechanism_bit (gc_demotion_bit);
        demotion_low = heap_segment_mem (ephemeral_heap_segment);
        demotion_high = heap_segment_reserved (ephemeral_heap_segment);
    }
    else
    {
        demotion_low = MAX_PTR;
        demotion_high = 0;
#ifndef MULTIPLE_HEAPS
        settings.demotion = FALSE;
        get_gc_data_per_heap()->clear_mechanism_bit (gc_demotion_bit);
#endif //!MULTIPLE_HEAPS
    }

    if (!should_promote_ephemeral && new_segment_p)
    {
        assert ((ptrdiff_t)total_ephemeral_size <= eph_size);
    }

    if (heap_segment_mem (old_seg) == heap_segment_plan_allocated (old_seg))
    {
        // This is to catch when we accidently delete a segment that has pins.
        verify_no_pins (heap_segment_mem (old_seg), heap_segment_reserved (old_seg));
    }

    verify_no_pins (heap_segment_plan_allocated (old_seg), heap_segment_reserved(old_seg));

    dprintf(2,("---- End of Heap Expansion ----"));
    return consing_gen;
}
#endif //!USE_REGIONS

BOOL gc_heap::expand_reused_seg_p()
{
#ifdef USE_REGIONS
    return FALSE;
#else
    BOOL reused_seg = FALSE;
    int heap_expand_mechanism = gc_data_per_heap.get_mechanism (gc_heap_expand);
    if ((heap_expand_mechanism == expand_reuse_bestfit) ||
        (heap_expand_mechanism == expand_reuse_normal))
    {
        reused_seg = TRUE;
    }

    return reused_seg;
#endif //USE_REGIONS
}

void gc_heap::verify_no_pins (uint8_t* start, uint8_t* end)
{
#ifdef VERIFY_HEAP
    if (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_GC)
    {
        BOOL contains_pinned_plugs = FALSE;
        size_t mi = 0;
        mark* m = 0;
        while (mi != mark_stack_tos)
        {
            m = pinned_plug_of (mi);
            if ((pinned_plug (m) >= start) && (pinned_plug (m) < end))
            {
                contains_pinned_plugs = TRUE;
                break;
            }
            else
                mi++;
        }

        if (contains_pinned_plugs)
        {
            FATAL_GC_ERROR();
        }
    }
#endif //VERIFY_HEAP
}

void gc_heap::set_static_data()
{
    static_data* pause_mode_sdata = static_data_table[latency_level];
    for (int i = 0; i < total_generation_count; i++)
    {
        dynamic_data* dd = dynamic_data_of (i);
        static_data* sdata = &pause_mode_sdata[i];

        dd->sdata = sdata;
        dd->min_size = sdata->min_size;

        dprintf (GTC_LOG, ("PM: %d, gen%d:  min: %zd, max: %zd, fr_l: %zd, fr_b: %d%%",
            settings.pause_mode,i,
            dd->min_size, dd_max_size (dd),
            sdata->fragmentation_limit, (int)(sdata->fragmentation_burden_limit * 100)));
    }
}

// Initialize the values that are not const.
void gc_heap::init_static_data()
{
    size_t gen0_min_size = get_gen0_min_size();

    size_t gen0_max_size = 0;

    size_t gen0_max_size_config = (size_t)GCConfig::GetGCGen0MaxBudget();

    if (gen0_max_size_config)
    {
        gen0_max_size = gen0_max_size_config;

#ifdef FEATURE_EVENT_TRACE
        gen0_max_budget_from_config = gen0_max_size;
#endif //FEATURE_EVENT_TRACE
    }
    else
    {
        gen0_max_size =
#ifdef MULTIPLE_HEAPS
            max ((size_t)6 * 1024 * 1024, min (Align(soh_segment_size / 2), (size_t)200 * 1024 * 1024));
#else //MULTIPLE_HEAPS
            (
#ifdef BACKGROUND_GC
                gc_can_use_concurrent ?
                6 * 1024 * 1024 :
#endif //BACKGROUND_GC
                max ((size_t)6 * 1024 * 1024, min (Align(soh_segment_size / 2), (size_t)200 * 1024 * 1024))
                );
#endif //MULTIPLE_HEAPS

        gen0_max_size = max (gen0_min_size, gen0_max_size);

        if (heap_hard_limit)
        {
            size_t gen0_max_size_seg = soh_segment_size / 4;
            dprintf (GTC_LOG, ("limit gen0 max %zd->%zd", gen0_max_size, gen0_max_size_seg));
            gen0_max_size = min (gen0_max_size, gen0_max_size_seg);
        }
    }

    gen0_max_size = Align (gen0_max_size);
    gen0_min_size = min (gen0_min_size, gen0_max_size);

    GCConfig::SetGCGen0MaxBudget (gen0_max_size);

    // TODO: gen0_max_size has a 200mb cap; gen1_max_size should also have a cap.
    size_t gen1_max_size = (size_t)
#ifdef MULTIPLE_HEAPS
        max ((size_t)6*1024*1024, Align(soh_segment_size/2));
#else //MULTIPLE_HEAPS
        (
#ifdef BACKGROUND_GC
            gc_can_use_concurrent ?
            6*1024*1024 :
#endif //BACKGROUND_GC
            max ((size_t)6*1024*1024, Align(soh_segment_size/2))
        );
#endif //MULTIPLE_HEAPS

#ifndef HOST_64BIT
    if (heap_hard_limit)
    {
        size_t gen1_max_size_seg = soh_segment_size / 2;
        dprintf (GTC_LOG, ("limit gen1 max %zd->%zd", gen1_max_size, gen1_max_size_seg));
        gen1_max_size = min (gen1_max_size, gen1_max_size_seg);
    }
#endif //!HOST_64BIT

    size_t gen1_max_size_config = (size_t)GCConfig::GetGCGen1MaxBudget();

    if (gen1_max_size_config)
    {
        gen1_max_size = min (gen1_max_size, gen1_max_size_config);
    }

    gen1_max_size = Align (gen1_max_size);

    dprintf (GTC_LOG, ("gen0 min: %zd, max: %zd, gen1 max: %zd",
        gen0_min_size, gen0_max_size, gen1_max_size));

    for (int i = latency_level_first; i <= latency_level_last; i++)
    {
        static_data_table[i][0].min_size = gen0_min_size;
        static_data_table[i][0].max_size = gen0_max_size;
        static_data_table[i][1].max_size = gen1_max_size;
    }

#ifdef DYNAMIC_HEAP_COUNT
    if (gc_heap::dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes)
    {
        gc_heap::dynamic_heap_count_data.min_gen0_new_allocation = gen0_min_size;
        if (gen0_max_size_config)
        {
            gc_heap::dynamic_heap_count_data.max_gen0_new_allocation = gen0_max_size;
        }
    }
#endif //DYNAMIC_HEAP_COUNT
}

bool gc_heap::init_dynamic_data()
{
    uint64_t now_raw_ts = RawGetHighPrecisionTimeStamp ();
#ifdef HEAP_BALANCE_INSTRUMENTATION
    start_raw_ts = now_raw_ts;
#endif //HEAP_BALANCE_INSTRUMENTATION
    uint64_t now = (uint64_t)((double)now_raw_ts * qpf_us);

    set_static_data();

    if (heap_number == 0)
    {
        process_start_time = now;
        smoothed_desired_total[0] = dynamic_data_of (0)->min_size * n_heaps;
#ifdef DYNAMIC_HEAP_COUNT
        last_suspended_end_time = now;
#endif //DYNAMIC_HEAP_COUNT
#ifdef HEAP_BALANCE_INSTRUMENTATION
        last_gc_end_time_us = now;
        dprintf (HEAP_BALANCE_LOG, ("qpf=%zd, start: %zd(%d)", qpf, start_raw_ts, now));
#endif //HEAP_BALANCE_INSTRUMENTATION
    }

    for (int i = 0; i < total_generation_count; i++)
    {
        dynamic_data* dd = dynamic_data_of (i);
        dd->gc_clock = 0;
        dd->time_clock = now;
        dd->previous_time_clock = now;
        dd->current_size = 0;
        dd->promoted_size = 0;
        dd->collection_count = 0;
        dd->new_allocation = dd->min_size;
        dd->gc_new_allocation = dd->new_allocation;
        dd->desired_allocation = dd->new_allocation;
        dd->fragmentation = 0;
    }

    return true;
}

float gc_heap::surv_to_growth (float cst, float limit, float max_limit)
{
    if (cst < ((max_limit - limit ) / (limit * (max_limit-1.0f))))
        return ((limit - limit*cst) / (1.0f - (cst * limit)));
    else
        return max_limit;
}


//if the allocation budget wasn't exhausted, the new budget may be wrong because the survival may
//not be correct (collection happened too soon). Correct with a linear estimation based on the previous
//value of the budget
static size_t linear_allocation_model (float allocation_fraction, size_t new_allocation,
                                       size_t previous_desired_allocation, float time_since_previous_collection_secs)
{
    if ((allocation_fraction < 0.95) && (allocation_fraction > 0.0))
    {
        const float decay_time = 5*60.0f; // previous desired allocation expires over 5 minutes
        float decay_factor = (decay_time <= time_since_previous_collection_secs) ?
                                0 :
                                ((decay_time - time_since_previous_collection_secs) / decay_time);
        float previous_allocation_factor = (1.0f - allocation_fraction) * decay_factor;
        dprintf (2, ("allocation fraction: %d, decay factor: %d, previous allocation factor: %d",
            (int)(allocation_fraction*100.0), (int)(decay_factor*100.0), (int)(previous_allocation_factor*100.0)));
        new_allocation = (size_t)((1.0 - previous_allocation_factor)*new_allocation + previous_allocation_factor * previous_desired_allocation);
    }
    return new_allocation;
}

size_t gc_heap::desired_new_allocation (dynamic_data* dd,
                                        size_t out, int gen_number,
                                        int pass)
{
    gc_history_per_heap* current_gc_data_per_heap = get_gc_data_per_heap();

    if (dd_begin_data_size (dd) == 0)
    {
        size_t new_allocation = dd_min_size (dd);
        current_gc_data_per_heap->gen_data[gen_number].new_allocation = new_allocation;
        return new_allocation;
    }
    else
    {
        float     cst;
        size_t    previous_desired_allocation = dd_desired_allocation (dd);
        size_t    current_size = dd_current_size (dd);
        float     max_limit = dd_max_limit (dd);
        float     limit = dd_limit (dd);
        size_t    min_gc_size = dd_min_size (dd);
        float     f = 0;
        size_t    max_size = dd_max_size (dd);
        size_t    new_allocation = 0;
        float     time_since_previous_collection_secs = (dd_time_clock (dd) - dd_previous_time_clock (dd))*1e-6f;
        float allocation_fraction = (float) (dd_desired_allocation (dd) - dd_gc_new_allocation (dd)) / (float) (dd_desired_allocation (dd));

        if (gen_number >= max_generation)
        {
            size_t    new_size = 0;

            cst = min (1.0f, float (out) / float (dd_begin_data_size (dd)));

            f = surv_to_growth (cst, limit, max_limit);
            if (conserve_mem_setting != 0)
            {
                // if this is set, compute a growth factor based on it.
                // example: a setting of 6 means we have a goal of 60% live data
                // this means we allow 40% fragmentation
                // to keep heap size stable, we only use half of that (20%) for new allocation
                // f is (live data + new allocation)/(live data), so would be (60% + 20%) / 60% or 1.33
                float f_conserve = ((10.0f / conserve_mem_setting) - 1) * 0.5f + 1.0f;

                // use the smaller one
                f = min (f, f_conserve);
            }

            size_t max_growth_size = (size_t)(max_size / f);
            if (current_size >= max_growth_size)
            {
                new_size = max_size;
            }
            else
            {
                new_size = (size_t) min (max ( (size_t)(f * current_size), min_gc_size), max_size);
            }

            assert ((new_size >= current_size) || (new_size == max_size));

            if (gen_number == max_generation)
            {
                new_allocation  =  max((new_size - current_size), min_gc_size);

                new_allocation = linear_allocation_model (allocation_fraction, new_allocation,
                                                          dd_desired_allocation (dd), time_since_previous_collection_secs);

                if (
#ifdef BGC_SERVO_TUNING
                    !bgc_tuning::fl_tuning_triggered &&
#endif //BGC_SERVO_TUNING
                    (conserve_mem_setting == 0) &&
                    (dd_fragmentation (dd) > ((size_t)((f-1)*current_size))))
                {
                    //reducing allocation in case of fragmentation
                    size_t new_allocation1 = max (min_gc_size,
                                                  // CAN OVERFLOW
                                                  (size_t)((float)new_allocation * current_size /
                                                           ((float)current_size + 2*dd_fragmentation (dd))));
                    dprintf (2, ("Reducing max_gen allocation due to fragmentation from %zd to %zd",
                                 new_allocation, new_allocation1));
                    new_allocation = new_allocation1;
                }
            }
            else // not a SOH generation
            {
                uint32_t memory_load = 0;
                uint64_t available_physical = 0;
                get_memory_info (&memory_load, &available_physical);
#ifdef TRACE_GC
                if (heap_hard_limit)
                {
                    size_t allocated = 0;
                    size_t committed = uoh_committed_size (gen_number, &allocated);
                    dprintf (2, ("GC#%zd h%d, GMI: UOH budget, UOH commit %zd (obj %zd, frag %zd), total commit: %zd (recorded: %zd)",
                        (size_t)settings.gc_index, heap_number,
                        committed, allocated,
                        dd_fragmentation (dynamic_data_of (gen_number)),
                        get_total_committed_size(), (current_total_committed - current_total_committed_bookkeeping)));
                }
#endif //TRACE_GC
                if (heap_number == 0)
                    settings.exit_memory_load = memory_load;
                if (available_physical > 1024*1024)
                    available_physical -= 1024*1024;

                uint64_t available_free = available_physical + (uint64_t)generation_free_list_space (generation_of (gen_number));
                if (available_free > (uint64_t)MAX_PTR)
                {
                    available_free = (uint64_t)MAX_PTR;
                }

                //try to avoid OOM during large object allocation
                new_allocation = max (min(max((new_size - current_size), dd_desired_allocation (dynamic_data_of (max_generation))),
                                          (size_t)available_free),
                                      max ((current_size/4), min_gc_size));

                new_allocation = linear_allocation_model (allocation_fraction, new_allocation,
                                                          dd_desired_allocation (dd), time_since_previous_collection_secs);

            }
        }
        else
        {
            size_t survivors = out;
            cst = float (survivors) / float (dd_begin_data_size (dd));
            f = surv_to_growth (cst, limit, max_limit);
            new_allocation = (size_t) min (max ((size_t)(f * (survivors)), min_gc_size), max_size);

            new_allocation = linear_allocation_model (allocation_fraction, new_allocation,
                                                      dd_desired_allocation (dd), time_since_previous_collection_secs);

#ifdef DYNAMIC_HEAP_COUNT
            if (dynamic_adaptation_mode != dynamic_adaptation_to_application_sizes)
#endif //DYNAMIC_HEAP_COUNT
            {
                if (gen_number == 0)
                {
                    if (pass == 0)
                    {
                        size_t free_space = generation_free_list_space (generation_of (gen_number));
                        // DTREVIEW - is min_gc_size really a good choice?
                        // on 64-bit this will almost always be true.
                        dprintf (GTC_LOG, ("frag: %zd, min: %zd", free_space, min_gc_size));
                        if (free_space > min_gc_size)
                        {
                            settings.gen0_reduction_count = 2;
                        }
                        else
                        {
                            if (settings.gen0_reduction_count > 0)
                                settings.gen0_reduction_count--;
                        }
                    }
                    if (settings.gen0_reduction_count > 0)
                    {
                        dprintf (2, ("Reducing new allocation based on fragmentation"));
                        new_allocation = min (new_allocation,
                                              max (min_gc_size, (max_size/3)));
                    }
                }
            }
        }

        size_t new_allocation_ret = Align (new_allocation, get_alignment_constant (gen_number <= max_generation));
        int gen_data_index = gen_number;
        gc_generation_data* gen_data = &(current_gc_data_per_heap->gen_data[gen_data_index]);
        gen_data->new_allocation = new_allocation_ret;

        dd_surv (dd) = cst;

        dprintf (2, (ThreadStressLog::gcDesiredNewAllocationMsg(),
                    heap_number, gen_number, out, current_size, (dd_desired_allocation (dd) - dd_gc_new_allocation (dd)),
                    (int)(cst*100), (int)(f*100), current_size + new_allocation, new_allocation));

        return new_allocation_ret;
    }
}

// REGIONS TODO: this can be merged with generation_size.
//returns the planned size of a generation (including free list element)
size_t gc_heap::generation_plan_size (int gen_number)
{
#ifdef USE_REGIONS
    size_t result = 0;
    heap_segment* seg = heap_segment_rw (generation_start_segment (generation_of (gen_number)));
    while (seg)
    {
        uint8_t* end = heap_segment_plan_allocated (seg);
        result += end - heap_segment_mem (seg);
        dprintf (REGIONS_LOG, ("h%d size + %zd (%p - %p) -> %zd",
            heap_number, (end - heap_segment_mem (seg)),
            heap_segment_mem (seg), end, result));
        seg = heap_segment_next (seg);
    }
    return result;
#else //USE_REGIONS
    if (0 == gen_number)
        return (size_t)max((heap_segment_plan_allocated (ephemeral_heap_segment) -
                    generation_plan_allocation_start (generation_of (gen_number))),
                   (ptrdiff_t)Align (min_obj_size));
    else
    {
        generation* gen = generation_of (gen_number);
        if (heap_segment_rw (generation_start_segment (gen)) == ephemeral_heap_segment)
            return (generation_plan_allocation_start (generation_of (gen_number - 1)) -
                    generation_plan_allocation_start (generation_of (gen_number)));
        else
        {
            size_t gensize = 0;
            heap_segment* seg = heap_segment_rw (generation_start_segment (gen));

            _ASSERTE(seg != NULL);

            while (seg && (seg != ephemeral_heap_segment))
            {
                gensize += heap_segment_plan_allocated (seg) -
                           heap_segment_mem (seg);
                seg = heap_segment_next_rw (seg);
            }
            if (seg)
            {
                gensize += (generation_plan_allocation_start (generation_of (gen_number - 1)) -
                            heap_segment_mem (ephemeral_heap_segment));
            }
            return gensize;
        }
    }
#endif //USE_REGIONS
}

//returns the size of a generation (including free list element)
size_t gc_heap::generation_size (int gen_number)
{
#ifdef USE_REGIONS
    size_t result = 0;
    heap_segment* seg = heap_segment_rw (generation_start_segment (generation_of (gen_number)));
    while (seg)
    {
        uint8_t* end = heap_segment_allocated (seg);
        result += end - heap_segment_mem (seg);
        dprintf (2, ("h%d size + %zd (%p - %p) -> %zd",
            heap_number, (end - heap_segment_mem (seg)),
            heap_segment_mem (seg), end, result));
        seg = heap_segment_next (seg);
    }
    return result;
#else //USE_REGIONS
    if (0 == gen_number)
        return (size_t)max((heap_segment_allocated (ephemeral_heap_segment) -
                    generation_allocation_start (generation_of (gen_number))),
                   (ptrdiff_t)Align (min_obj_size));
    else
    {
        generation* gen = generation_of (gen_number);
        if (heap_segment_rw (generation_start_segment (gen)) == ephemeral_heap_segment)
            return (generation_allocation_start (generation_of (gen_number - 1)) -
                    generation_allocation_start (generation_of (gen_number)));
        else
        {
            size_t gensize = 0;
            heap_segment* seg = heap_segment_rw (generation_start_segment (gen));

            _ASSERTE(seg != NULL);

            while (seg && (seg != ephemeral_heap_segment))
            {
                gensize += heap_segment_allocated (seg) -
                           heap_segment_mem (seg);
                seg = heap_segment_next_rw (seg);
            }
            if (seg)
            {
                gensize += (generation_allocation_start (generation_of (gen_number - 1)) -
                            heap_segment_mem (ephemeral_heap_segment));
            }

            return gensize;
        }
    }
#endif //USE_REGIONS
}

size_t  gc_heap::compute_in (int gen_number)
{
    assert (gen_number != 0);
    dynamic_data* dd = dynamic_data_of (gen_number);

    size_t in = generation_allocation_size (generation_of (gen_number));

#ifndef USE_REGIONS
    if (gen_number == max_generation && ephemeral_promotion)
    {
        in = 0;
        for (int i = 0; i <= max_generation; i++)
        {
            dynamic_data* dd = dynamic_data_of (i);
            in += dd_survived_size (dd);
            if (i != max_generation)
            {
                generation_condemned_allocated (generation_of (gen_number)) += dd_survived_size (dd);
            }
        }
    }
#endif //!USE_REGIONS

    dd_gc_new_allocation (dd) -= in;
    dd_new_allocation (dd) = dd_gc_new_allocation (dd);

    gc_history_per_heap* current_gc_data_per_heap = get_gc_data_per_heap();
    gc_generation_data* gen_data = &(current_gc_data_per_heap->gen_data[gen_number]);
    gen_data->in = in;

    generation_allocation_size (generation_of (gen_number)) = 0;
    return in;
}

#ifdef HOST_64BIT
inline
size_t gc_heap::trim_youngest_desired (uint32_t memory_load,
                                       size_t total_new_allocation,
                                       size_t total_min_allocation)
{
    if (memory_load < MAX_ALLOWED_MEM_LOAD)
    {
        // If the total of memory load and gen0 budget exceeds
        // our max memory load limit, trim the gen0 budget so the total
        // is the max memory load limit.
        size_t remain_memory_load = (MAX_ALLOWED_MEM_LOAD - memory_load) * mem_one_percent;
        return min (total_new_allocation, remain_memory_load);
    }
    else
    {
        size_t total_max_allocation = max ((size_t)mem_one_percent, total_min_allocation);
        return min (total_new_allocation, total_max_allocation);
    }
}

size_t gc_heap::joined_youngest_desired (size_t new_allocation)
{
    dprintf (2, ("Entry memory load: %d; gen0 new_alloc: %zd", settings.entry_memory_load, new_allocation));

    size_t final_new_allocation = new_allocation;
    if (new_allocation > MIN_YOUNGEST_GEN_DESIRED)
    {
        uint32_t num_heaps = 1;

#ifdef MULTIPLE_HEAPS
        num_heaps = gc_heap::n_heaps;
#endif //MULTIPLE_HEAPS

        size_t total_new_allocation = new_allocation * num_heaps;
        size_t total_min_allocation = MIN_YOUNGEST_GEN_DESIRED * num_heaps;

        if ((settings.entry_memory_load >= MAX_ALLOWED_MEM_LOAD) ||
            (total_new_allocation > max (youngest_gen_desired_th, total_min_allocation)))
        {
            uint32_t memory_load = 0;
            get_memory_info (&memory_load);
            settings.exit_memory_load = memory_load;
            dprintf (2, ("Current memory load: %d", memory_load));

            size_t final_total =
                trim_youngest_desired (memory_load, total_new_allocation, total_min_allocation);
            size_t max_new_allocation =
#ifdef MULTIPLE_HEAPS
                                         dd_max_size (g_heaps[0]->dynamic_data_of (0));
#else //MULTIPLE_HEAPS
                                         dd_max_size (dynamic_data_of (0));
#endif //MULTIPLE_HEAPS

            final_new_allocation  = min (Align ((final_total / num_heaps), get_alignment_constant (TRUE)), max_new_allocation);
        }
    }

    if (final_new_allocation < new_allocation)
    {
        settings.gen0_reduction_count = 2;
    }

    return final_new_allocation;
}
#endif // HOST_64BIT

inline
gc_history_global* gc_heap::get_gc_data_global()
{
#ifdef BACKGROUND_GC
    return (settings.concurrent ? &bgc_data_global : &gc_data_global);
#else
    return &gc_data_global;
#endif //BACKGROUND_GC
}

inline
gc_history_per_heap* gc_heap::get_gc_data_per_heap()
{
#ifdef BACKGROUND_GC
    return (settings.concurrent ? &bgc_data_per_heap : &gc_data_per_heap);
#else
    return &gc_data_per_heap;
#endif //BACKGROUND_GC
}

void gc_heap::compute_new_dynamic_data (int gen_number)
{
    _ASSERTE(gen_number >= 0);
    _ASSERTE(gen_number <= max_generation);

    dynamic_data* dd = dynamic_data_of (gen_number);
    generation*   gen = generation_of (gen_number);
    size_t        in = (gen_number==0) ? 0 : compute_in (gen_number);

    size_t total_gen_size = generation_size (gen_number);
    //keep track of fragmentation
    dd_fragmentation (dd) = generation_free_list_space (gen) + generation_free_obj_space (gen);

    // We need to reset the condemned alloc for the condemned generation because it will participate in the free list efficiency
    // calculation. And if a generation is condemned, it means all the allocations into this generation during that GC will be
    // condemned and it wouldn't make sense to use this value to calculate the FL efficiency since at this point the FL hasn't
    // been built.
    generation_condemned_allocated (gen) = 0;

    if (settings.concurrent)
    {
        // For BGC we could have non zero values due to gen1 FGCs. We reset all 3 allocs to start anew.
        generation_free_list_allocated (gen) = 0;
        generation_end_seg_allocated (gen) = 0;
    }
    else
    {
        assert (generation_free_list_allocated (gen) == 0);
        assert (generation_end_seg_allocated (gen) == 0);
    }

    // make sure the subtraction below doesn't overflow
    if (dd_fragmentation (dd) <= total_gen_size)
        dd_current_size (dd) = total_gen_size - dd_fragmentation (dd);
    else
        dd_current_size (dd) = 0;

    gc_history_per_heap* current_gc_data_per_heap = get_gc_data_per_heap();

    size_t out = dd_survived_size (dd);

    gc_generation_data* gen_data = &(current_gc_data_per_heap->gen_data[gen_number]);
    gen_data->size_after = total_gen_size;
    gen_data->free_list_space_after = generation_free_list_space (gen);
    gen_data->free_obj_space_after = generation_free_obj_space (gen);

    if ((settings.pause_mode == pause_low_latency) && (gen_number <= 1))
    {
        // When we are in the low latency mode, we can still be
        // condemning more than gen1's 'cause of induced GCs.
        dd_desired_allocation (dd) = low_latency_alloc;
        dd_gc_new_allocation (dd) = dd_desired_allocation (dd);
        dd_new_allocation (dd) = dd_gc_new_allocation (dd);
    }
    else
    {
        if (gen_number == 0)
        {
            //compensate for dead finalizable objects promotion.
            //they shouldn't be counted for growth.
            size_t final_promoted = 0;
            final_promoted = min (finalization_promoted_bytes, out);
            // Prefast: this is clear from above but prefast needs to be told explicitly
            _ASSERTE(final_promoted <= out);

            dprintf (2, ("gen: %d final promoted: %zd", gen_number, final_promoted));
            dd_freach_previous_promotion (dd) = final_promoted;
            size_t lower_bound = desired_new_allocation  (dd, out-final_promoted, gen_number, 0);

            if (settings.condemned_generation == 0)
            {
                //there is no noise.
                dd_desired_allocation (dd) = lower_bound;
            }
            else
            {
                size_t higher_bound = desired_new_allocation (dd, out, gen_number, 1);

                // <TODO>This assert was causing AppDomains\unload\test1n\test1nrun.bat to fail</TODO>
                //assert ( lower_bound <= higher_bound);

                //discount the noise. Change the desired allocation
                //only if the previous value is outside of the range.
                if (dd_desired_allocation (dd) < lower_bound)
                {
                    dd_desired_allocation (dd) = lower_bound;
                }
                else if (dd_desired_allocation (dd) > higher_bound)
                {
                    dd_desired_allocation (dd) = higher_bound;
                }
#if defined (HOST_64BIT) && !defined (MULTIPLE_HEAPS)
                dd_desired_allocation (dd) = joined_youngest_desired (dd_desired_allocation (dd));
#endif // HOST_64BIT && !MULTIPLE_HEAPS
                trim_youngest_desired_low_memory();
                dprintf (2, ("final gen0 new_alloc: %zd", dd_desired_allocation (dd)));
            }
        }
        else
        {
            dd_desired_allocation (dd) = desired_new_allocation (dd, out, gen_number, 0);
        }
        dd_gc_new_allocation (dd) = dd_desired_allocation (dd);

#ifdef USE_REGIONS
        // we may have had some incoming objects during this GC -
        // adjust the consumed budget for these
        dd_new_allocation (dd) = dd_gc_new_allocation (dd) - in;
#else //USE_REGIONS
        // for segments, we want to keep the .NET 6.0 behavior where we did not adjust
        dd_new_allocation (dd) = dd_gc_new_allocation (dd);
#endif //USE_REGIONS
    }

    gen_data->pinned_surv = dd_pinned_survived_size (dd);
    gen_data->npinned_surv = dd_survived_size (dd) - dd_pinned_survived_size (dd);

    dd_promoted_size (dd) = out;
    if (gen_number == max_generation)
    {
        for (int i = (gen_number + 1); i < total_generation_count; i++)
        {
            dd = dynamic_data_of (i);
            total_gen_size = generation_size (i);
            generation* gen = generation_of (i);
            dd_fragmentation (dd) = generation_free_list_space (gen) +
                generation_free_obj_space (gen);
            dd_current_size (dd) = total_gen_size - dd_fragmentation (dd);
            dd_survived_size (dd) = dd_current_size (dd);
            in = 0;
            out = dd_current_size (dd);
            dd_desired_allocation (dd) = desired_new_allocation (dd, out, i, 0);
            dd_gc_new_allocation (dd) = Align (dd_desired_allocation (dd),
                get_alignment_constant (FALSE));
            dd_new_allocation (dd) = dd_gc_new_allocation (dd);

            gen_data = &(current_gc_data_per_heap->gen_data[i]);
            gen_data->size_after = total_gen_size;
            gen_data->free_list_space_after = generation_free_list_space (gen);
            gen_data->free_obj_space_after = generation_free_obj_space (gen);
            gen_data->npinned_surv = out;
#ifdef BACKGROUND_GC
            end_uoh_size[i - uoh_start_generation] = total_gen_size;
#endif //BACKGROUND_GC
            dd_promoted_size (dd) = out;
        }
    }
}

void gc_heap::trim_youngest_desired_low_memory()
{
    if (g_low_memory_status)
    {
        size_t committed_mem = committed_size();
        dynamic_data* dd = dynamic_data_of (0);
        size_t current = dd_desired_allocation (dd);
        size_t candidate = max (Align ((committed_mem / 10), get_alignment_constant(FALSE)), dd_min_size (dd));

        dd_desired_allocation (dd) = min (current, candidate);
    }
}

ptrdiff_t gc_heap::estimate_gen_growth (int gen_number)
{
    dynamic_data* dd_gen = dynamic_data_of (gen_number);
    generation *gen = generation_of (gen_number);
    ptrdiff_t new_allocation_gen = dd_new_allocation (dd_gen);
    ptrdiff_t free_list_space_gen = generation_free_list_space (gen);

#ifdef USE_REGIONS
    // in the case of regions, we assume all the space up to reserved gets used before we get a new region for this gen
    ptrdiff_t reserved_not_in_use = 0;
    ptrdiff_t allocated_gen = 0;

    for (heap_segment* region = generation_start_segment_rw (gen); region != nullptr; region = heap_segment_next (region))
    {
        allocated_gen += heap_segment_allocated (region) - heap_segment_mem (region);
        reserved_not_in_use += heap_segment_reserved (region) - heap_segment_allocated (region);
    }

    // compute how much of the allocated space is on the free list
    double free_list_fraction_gen = (allocated_gen == 0) ? 0.0 : (double)(free_list_space_gen) / (double)allocated_gen;

    // estimate amount of usable free space
    // e.g. if 90% of the allocated space is free, assume 90% of these 90% can get used
    // e.g. if 10% of the allocated space is free, assume 10% of these 10% can get used
    ptrdiff_t usable_free_space = (ptrdiff_t)(free_list_fraction_gen * free_list_space_gen);

    ptrdiff_t budget_gen = new_allocation_gen - usable_free_space - reserved_not_in_use;

    dprintf (REGIONS_LOG, ("h%2d gen %d budget %zd allocated: %zd, FL: %zd, reserved_not_in_use %zd budget_gen %zd",
        heap_number, gen_number, new_allocation_gen, allocated_gen, free_list_space_gen, reserved_not_in_use, budget_gen));

#else  //USE_REGIONS
    // estimate how we are going to need in this generation - estimate half the free list space gets used
    ptrdiff_t budget_gen = new_allocation_gen - (free_list_space_gen / 2);
    dprintf (REGIONS_LOG, ("budget for gen %d on heap %d is %zd (new %zd, free %zd)",
        gen_number, heap_number, budget_gen, new_allocation_gen, free_list_space_gen));
#endif //USE_REGIONS

    return budget_gen;
}

#if !defined(USE_REGIONS) || defined(MULTIPLE_HEAPS)
uint8_t* gc_heap::get_smoothed_decommit_target (uint8_t* previous_decommit_target, uint8_t* new_decommit_target, heap_segment* seg)
{
    uint8_t* decommit_target = new_decommit_target;
    if (decommit_target < previous_decommit_target)
    {
        // we used to have a higher target - do exponential smoothing by computing
        // essentially decommit_target = 1/3*decommit_target + 2/3*previous_decommit_target
        // computation below is slightly different to avoid overflow
        ptrdiff_t target_decrease = previous_decommit_target - decommit_target;
        decommit_target += target_decrease * 2 / 3;
    }

#ifdef STRESS_DECOMMIT
    // our decommit logic should work for a random decommit target within tail_region - make sure it does
    decommit_target = heap_segment_mem (seg) + gc_rand::get_rand (heap_segment_reserved (seg) - heap_segment_mem (seg));
#endif //STRESS_DECOMMIT

#ifdef MULTIPLE_HEAPS
    if (decommit_target < heap_segment_committed (seg))
    {
        gradual_decommit_in_progress_p = TRUE;
    }
#endif //MULTIPLE_HEAPS

    int gen_num =
#ifdef USE_REGIONS
        seg->gen_num;
#else
        0;
#endif
    dprintf (3, ("h%2d gen %d allocated: %zdkb committed: %zdkb target: %zdkb",
        heap_number,
        gen_num,
        ((heap_segment_allocated (seg) - heap_segment_mem (seg)) / 1024),
        ((heap_segment_committed (seg) - heap_segment_mem (seg)) / 1024),
        (heap_segment_decommit_target (seg) - heap_segment_mem (seg)) / 1024));

    return decommit_target;
}

// For regions this really just sets the decommit target for ephemeral tail regions so this should really be done in
// distribute_free_regions where we are calling estimate_gen_growth.
void gc_heap::decommit_ephemeral_segment_pages()
{
    if (settings.concurrent || use_large_pages_p || (settings.pause_mode == pause_no_gc))
    {
        return;
    }

#if defined(MULTIPLE_HEAPS) && defined(USE_REGIONS)
    for (int gen_number = soh_gen0; gen_number <= soh_gen1; gen_number++)
    {
        generation *gen = generation_of (gen_number);
        heap_segment* tail_region = generation_tail_region (gen);
        uint8_t* previous_decommit_target = heap_segment_decommit_target (tail_region);

        // reset the decommit targets to make sure we don't decommit inadvertently
        for (heap_segment* region = generation_start_segment_rw (gen); region != nullptr; region = heap_segment_next (region))
        {
            heap_segment_decommit_target (region) = heap_segment_reserved (region);
        }

        ptrdiff_t budget_gen = estimate_gen_growth (gen_number) + loh_size_threshold;

        if (budget_gen >= 0)
        {
            // we need more than the regions we have - nothing to decommit
            continue;
        }

        // we may have too much committed - let's see if we can decommit in the tail region
        ptrdiff_t tail_region_size = heap_segment_reserved (tail_region) - heap_segment_mem (tail_region);
        ptrdiff_t unneeded_tail_size = min (-budget_gen, tail_region_size);
        uint8_t *decommit_target = heap_segment_reserved (tail_region) - unneeded_tail_size;
        decommit_target = max (decommit_target, heap_segment_allocated (tail_region));

        heap_segment_decommit_target (tail_region) = get_smoothed_decommit_target (previous_decommit_target, decommit_target, tail_region);
    }
#elif !defined(USE_REGIONS)
    dynamic_data* dd0 = dynamic_data_of (0);

    ptrdiff_t desired_allocation = dd_new_allocation (dd0) +
                                   max (estimate_gen_growth (soh_gen1), (ptrdiff_t)0) +
                                   loh_size_threshold;

    size_t slack_space =
#ifdef HOST_64BIT
                max(min(min(soh_segment_size/32, dd_max_size (dd0)), (generation_size (max_generation) / 10)), (size_t)desired_allocation);
#else
                desired_allocation;
#endif // HOST_64BIT

    uint8_t* decommit_target = heap_segment_allocated (ephemeral_heap_segment) + slack_space;
    uint8_t* previous_decommit_target = heap_segment_decommit_target (ephemeral_heap_segment);
    heap_segment_decommit_target (ephemeral_heap_segment) = get_smoothed_decommit_target (previous_decommit_target, decommit_target, ephemeral_heap_segment);

#if defined(MULTIPLE_HEAPS) && defined(_DEBUG)
    // these are only for checking against logic errors
    ephemeral_heap_segment->saved_committed = heap_segment_committed (ephemeral_heap_segment);
    ephemeral_heap_segment->saved_desired_allocation = dd_desired_allocation (dd0);
#endif //MULTIPLE_HEAPS && _DEBUG

#ifndef MULTIPLE_HEAPS
    // we want to limit the amount of decommit we do per time to indirectly
    // limit the amount of time spent in recommit and page faults
    size_t ephemeral_elapsed = (size_t)((dd_time_clock (dd0) - gc_last_ephemeral_decommit_time) / 1000);
    gc_last_ephemeral_decommit_time = dd_time_clock (dd0);

    // this is the amount we were planning to decommit
    ptrdiff_t decommit_size = heap_segment_committed (ephemeral_heap_segment) - decommit_target;

    // we do a max of DECOMMIT_SIZE_PER_MILLISECOND per millisecond of elapsed time since the last GC
    // we limit the elapsed time to 10 seconds to avoid spending too much time decommitting
    ptrdiff_t max_decommit_size = min (ephemeral_elapsed, (size_t)(10*1000)) * DECOMMIT_SIZE_PER_MILLISECOND;
    decommit_size = min (decommit_size, max_decommit_size);

    slack_space = heap_segment_committed (ephemeral_heap_segment) - heap_segment_allocated (ephemeral_heap_segment) - decommit_size;
    decommit_heap_segment_pages (ephemeral_heap_segment, slack_space);
#endif // !MULTIPLE_HEAPS

    gc_history_per_heap* current_gc_data_per_heap = get_gc_data_per_heap();
    current_gc_data_per_heap->extra_gen0_committed = heap_segment_committed (ephemeral_heap_segment) - heap_segment_allocated (ephemeral_heap_segment);
#endif //MULTIPLE_HEAPS && USE_REGIONS
}
#endif //!USE_REGIONS || MULTIPLE_HEAPS

#if defined(MULTIPLE_HEAPS) || defined(USE_REGIONS)
// return true if we actually decommitted anything
bool gc_heap::decommit_step (uint64_t step_milliseconds)
{
    if (settings.pause_mode == pause_no_gc)
    {
        // don't decommit at all if we have entered a no gc region
        return false;
    }

    size_t decommit_size = 0;

#ifdef USE_REGIONS
    const size_t max_decommit_step_size = DECOMMIT_SIZE_PER_MILLISECOND * step_milliseconds;
    for (int kind = basic_free_region; kind < count_free_region_kinds; kind++)
    {
        dprintf (REGIONS_LOG, ("decommit_step %d, regions_to_decommit = %zd",
            kind, global_regions_to_decommit[kind].get_num_free_regions()));
        while (global_regions_to_decommit[kind].get_num_free_regions() > 0)
        {
            heap_segment* region = global_regions_to_decommit[kind].unlink_region_front();
            size_t size = decommit_region (region, recorded_committed_free_bucket, -1);
            decommit_size += size;
            if (decommit_size >= max_decommit_step_size)
            {
                return true;
            }
        }
    }
    if (use_large_pages_p)
    {
        return (decommit_size != 0);
    }
#endif //USE_REGIONS
#ifdef MULTIPLE_HEAPS
    // should never get here for large pages because decommit_ephemeral_segment_pages
    // will not do anything if use_large_pages_p is true
    assert(!use_large_pages_p);

    for (int i = 0; i < n_heaps; i++)
    {
        gc_heap* hp = gc_heap::g_heaps[i];
        decommit_size += hp->decommit_ephemeral_segment_pages_step ();
    }
#endif //MULTIPLE_HEAPS
    return (decommit_size != 0);
}
#endif //MULTIPLE_HEAPS || USE_REGIONS

#ifdef USE_REGIONS
size_t gc_heap::decommit_region (heap_segment* region, int bucket, int h_number)
{
    FIRE_EVENT(GCFreeSegment_V1, heap_segment_mem (region));
    uint8_t* page_start = align_lower_page (get_region_start (region));
    uint8_t* decommit_end = heap_segment_committed (region);
    size_t decommit_size = decommit_end - page_start;
    bool decommit_succeeded_p = virtual_decommit (page_start, decommit_size, bucket, h_number);
    bool require_clearing_memory_p = !decommit_succeeded_p || use_large_pages_p;
    dprintf (REGIONS_LOG, ("decommitted region %p(%p-%p) (%zu bytes) - success: %d",
        region,
        page_start,
        decommit_end,
        decommit_size,
        decommit_succeeded_p));
    if (require_clearing_memory_p)
    {
        uint8_t* clear_end = use_large_pages_p ? heap_segment_used (region) : heap_segment_committed (region);
        size_t clear_size = clear_end - page_start;
        memclr (page_start, clear_size);
        heap_segment_used (region) = heap_segment_mem (region);
        dprintf(REGIONS_LOG, ("cleared region %p(%p-%p) (%zu bytes)",
            region,
            page_start,
            clear_end,
            clear_size));
    }
    else
    {
        heap_segment_committed (region) = heap_segment_mem (region);
    }

#ifdef BACKGROUND_GC
    // Under USE_REGIONS, mark array is never partially committed. So we are only checking for this
    // flag here.
    if ((region->flags & heap_segment_flags_ma_committed) != 0)
    {
#ifdef MULTIPLE_HEAPS
        // In return_free_region, we set heap_segment_heap (region) to nullptr so we cannot use it here.
        // but since all heaps share the same mark array we simply pick the 0th heap to use. 
        gc_heap* hp = g_heaps [0];
#else
        gc_heap* hp = pGenGCHeap;
#endif
        hp->decommit_mark_array_by_seg (region);
        region->flags &= ~(heap_segment_flags_ma_committed);
    }
#endif //BACKGROUND_GC

    if (use_large_pages_p)
    {
        assert (heap_segment_used (region) == heap_segment_mem (region));
    }
    else
    {
        assert (heap_segment_committed (region) == heap_segment_mem (region));
    }
#ifdef BACKGROUND_GC
    assert ((region->flags & heap_segment_flags_ma_committed) == 0);
#endif //BACKGROUND_GC

    global_region_allocator.delete_region (get_region_start (region));

    return decommit_size;
}
#endif //USE_REGIONS

#ifdef MULTIPLE_HEAPS
// return the decommitted size
size_t gc_heap::decommit_ephemeral_segment_pages_step ()
{
    size_t size = 0;
#ifdef USE_REGIONS
    for (int gen_number = soh_gen0; gen_number <= soh_gen1; gen_number++)
    {
        generation* gen = generation_of (gen_number);
        heap_segment* seg = generation_tail_region (gen);
#else // USE_REGIONS
    {
        heap_segment* seg = ephemeral_heap_segment;
        // we rely on desired allocation not being changed outside of GC
        assert (seg->saved_desired_allocation == dd_desired_allocation (dynamic_data_of (0)));
#endif // USE_REGIONS

        uint8_t* decommit_target = heap_segment_decommit_target (seg);
        size_t EXTRA_SPACE = 2 * OS_PAGE_SIZE;
        decommit_target += EXTRA_SPACE;
        uint8_t* committed = heap_segment_committed (seg);
        uint8_t* allocated = (seg == ephemeral_heap_segment) ? alloc_allocated : heap_segment_allocated (seg);
        if ((allocated <= decommit_target) && (decommit_target < committed))
        {
#ifdef USE_REGIONS
            if (gen_number == soh_gen0)
            {
                // for gen 0, sync with the allocator by taking the more space lock
                // and re-read the variables
                //
                // we call try_enter_spin_lock here instead of enter_spin_lock because
                // calling enter_spin_lock from this thread can deadlock at the start
                // of a GC - if gc_started is already true, we call wait_for_gc_done(),
                // but we are on GC thread 0, so GC cannot make progress
                if (!try_enter_spin_lock (&more_space_lock_soh))
                {
                    continue;
                }
                add_saved_spinlock_info (false, me_acquire, mt_decommit_step, msl_entered);
                seg = generation_tail_region (gen);
#ifndef STRESS_DECOMMIT
                decommit_target = heap_segment_decommit_target (seg);
                decommit_target += EXTRA_SPACE;
#endif
                committed = heap_segment_committed (seg);
                allocated = (seg == ephemeral_heap_segment) ? alloc_allocated : heap_segment_allocated (seg);
            }
            if ((allocated <= decommit_target) && (decommit_target < committed))
#else // USE_REGIONS
            // we rely on other threads not messing with committed if we are about to trim it down
            assert (seg->saved_committed == heap_segment_committed (seg));
#endif // USE_REGIONS
            {
                // how much would we need to decommit to get to decommit_target in one step?
                size_t full_decommit_size = (committed - decommit_target);

                // don't do more than max_decommit_step_size per step
                size_t decommit_size = min (max_decommit_step_size, full_decommit_size);

                // figure out where the new committed should be
                uint8_t* new_committed = (committed - decommit_size);
                size += decommit_heap_segment_pages_worker (seg, new_committed);

#if defined(_DEBUG) && !defined(USE_REGIONS)
                seg->saved_committed = committed - size;
#endif //_DEBUG && !USE_REGIONS
            }
#ifdef USE_REGIONS
            if (gen_number == soh_gen0)
            {
                // for gen 0, we took the more space lock - leave it again
                add_saved_spinlock_info (false, me_release, mt_decommit_step, msl_entered);
                leave_spin_lock (&more_space_lock_soh);
            }
#endif // USE_REGIONS
        }
    }
    return size;
}
#endif //MULTIPLE_HEAPS

//This is meant to be called by decide_on_compacting.
size_t gc_heap::generation_fragmentation (generation* gen,
                                          generation* consing_gen,
                                          uint8_t* end)
{
    ptrdiff_t frag = 0;

#ifdef USE_REGIONS
    for (int gen_num = 0; gen_num <= gen->gen_num; gen_num++)
    {
        generation* gen = generation_of (gen_num);
        heap_segment* seg = heap_segment_rw (generation_start_segment (gen));
        while (seg)
        {
            frag += (heap_segment_saved_allocated (seg) -
                 heap_segment_plan_allocated (seg));

            dprintf (3, ("h%d g%d adding seg plan frag: %p-%p=%zd -> %zd",
                heap_number, gen_num,
                heap_segment_saved_allocated (seg),
                heap_segment_plan_allocated (seg),
                (heap_segment_saved_allocated (seg) - heap_segment_plan_allocated (seg)),
                frag));

            seg = heap_segment_next_rw (seg);
        }
    }
#else //USE_REGIONS
    uint8_t* alloc = generation_allocation_pointer (consing_gen);
    // If the allocation pointer has reached the ephemeral segment
    // fine, otherwise the whole ephemeral segment is considered
    // fragmentation
    if (in_range_for_segment (alloc, ephemeral_heap_segment))
    {
        if (alloc <= heap_segment_allocated(ephemeral_heap_segment))
            frag = end - alloc;
        else
        {
            // case when no survivors, allocated set to beginning
            frag = 0;
        }
        dprintf (3, ("ephemeral frag: %zd", frag));
    }
    else
        frag = (heap_segment_allocated (ephemeral_heap_segment) -
                heap_segment_mem (ephemeral_heap_segment));
    heap_segment* seg = heap_segment_rw (generation_start_segment (gen));

    _ASSERTE(seg != NULL);

    while (seg != ephemeral_heap_segment)
    {
        frag += (heap_segment_allocated (seg) -
                 heap_segment_plan_allocated (seg));
        dprintf (3, ("seg: %zx, frag: %zd", (size_t)seg,
                     (heap_segment_allocated (seg) -
                      heap_segment_plan_allocated (seg))));

        seg = heap_segment_next_rw (seg);
        assert (seg);
    }
#endif //USE_REGIONS

    dprintf (3, ("frag: %zd discounting pinned plugs", frag));
    //add the length of the dequeued plug free space
    size_t bos = 0;
    while (bos < mark_stack_bos)
    {
        frag += (pinned_len (pinned_plug_of (bos)));
        dprintf (3, ("adding pinned len %zd to frag ->%zd",
            pinned_len (pinned_plug_of (bos)), frag));
        bos++;
    }

    return frag;
}

// for SOH this returns the total sizes of the generation and its
// younger generation(s).
// for LOH this returns just LOH size.
size_t gc_heap::generation_sizes (generation* gen, bool use_saved_p)
{
    size_t result = 0;

#ifdef USE_REGIONS
    int gen_num = gen->gen_num;
    int start_gen_index = ((gen_num > max_generation) ? gen_num : 0);
    for (int i = start_gen_index; i <= gen_num; i++)
    {
        heap_segment* seg = heap_segment_in_range (generation_start_segment (generation_of (i)));
        while (seg)
        {
            uint8_t* end = (use_saved_p ?
                heap_segment_saved_allocated (seg) : heap_segment_allocated (seg));
            result += end - heap_segment_mem (seg);
            dprintf (3, ("h%d gen%d size + %zd (%p - %p) -> %zd",
                heap_number, i, (end - heap_segment_mem (seg)),
                heap_segment_mem (seg), end, result));
            seg = heap_segment_next (seg);
        }
    }
#else //USE_REGIONS
    if (generation_start_segment (gen ) == ephemeral_heap_segment)
        result = (heap_segment_allocated (ephemeral_heap_segment) -
                  generation_allocation_start (gen));
    else
    {
        heap_segment* seg = heap_segment_in_range (generation_start_segment (gen));

        _ASSERTE(seg != NULL);

        while (seg)
        {
            result += (heap_segment_allocated (seg) -
                       heap_segment_mem (seg));
            seg = heap_segment_next_in_range (seg);
        }
    }
#endif //USE_REGIONS

    return result;
}

#ifdef USE_REGIONS
bool gc_heap::decide_on_compaction_space()
{
    size_t gen0size = approximate_new_allocation();

    dprintf (REGIONS_LOG, ("gen0size: %zd, free: %zd",
        gen0size, (num_regions_freed_in_sweep * ((size_t)1 << min_segment_size_shr))));
    // If we don't compact, would we have enough space?
    if (sufficient_space_regions ((num_regions_freed_in_sweep * ((size_t)1 << min_segment_size_shr)),
                                  gen0size))
    {
        dprintf (REGIONS_LOG, ("it is sufficient!"));
        return false;
    }

    // If we do compact, would we have enough space?
    get_gen0_end_plan_space();

    if (!gen0_large_chunk_found)
    {
        gen0_large_chunk_found = (free_regions[basic_free_region].get_num_free_regions() > 0);
    }

    dprintf (REGIONS_LOG, ("gen0_pinned_free_space: %zd, end_gen0_region_space: %zd, gen0size: %zd",
            gen0_pinned_free_space, end_gen0_region_space, gen0size));

    if (sufficient_space_regions ((gen0_pinned_free_space + end_gen0_region_space), gen0size) &&
        gen0_large_chunk_found)
    {
        sufficient_gen0_space_p = TRUE;
    }

    return true;
}
#endif //USE_REGIONS

size_t gc_heap::estimated_reclaim (int gen_number)
{
    dynamic_data* dd = dynamic_data_of (gen_number);
    size_t gen_allocated = (dd_desired_allocation (dd) - dd_new_allocation (dd));
    size_t gen_total_size = gen_allocated + dd_current_size (dd);
    size_t est_gen_surv = (size_t)((float) (gen_total_size) * dd_surv (dd));
    size_t est_gen_free = gen_total_size - est_gen_surv + dd_fragmentation (dd);

    dprintf (GTC_LOG, ("h%d gen%d total size: %zd, est dead space: %zd (s: %d, allocated: %zd), frag: %zd",
                heap_number, gen_number,
                gen_total_size,
                est_gen_free,
                (int)(dd_surv (dd) * 100),
                gen_allocated,
                dd_fragmentation (dd)));

    return est_gen_free;
}

bool gc_heap::is_full_compacting_gc_productive()
{
#ifdef USE_REGIONS
    // If we needed to grow gen2 by extending either the end of its tail region
    // or having to acquire more regions for gen2, then we view this as unproductive.
    //
    // Note that when we freely choose which region to demote and promote, this calculation
    // will need to change.
    heap_segment* gen1_start_region = generation_start_segment (generation_of (max_generation - 1));
    if (heap_segment_plan_gen_num (gen1_start_region) == max_generation)
    {
        dprintf (REGIONS_LOG, ("gen1 start region %p is now part of gen2, unproductive",
            heap_segment_mem (gen1_start_region)));
        return false;
    }
    else
    {
        heap_segment* gen2_tail_region = generation_tail_region (generation_of (max_generation));
        if (heap_segment_plan_allocated (gen2_tail_region) >= heap_segment_allocated (gen2_tail_region))
        {
            dprintf (REGIONS_LOG, ("last gen2 region extended %p->%p, unproductive",
                heap_segment_allocated (gen2_tail_region), heap_segment_plan_allocated (gen2_tail_region)));

            return false;
        }
    }

    return true;
#else //USE_REGIONS
    if (generation_plan_allocation_start (generation_of (max_generation - 1)) >=
        generation_allocation_start (generation_of (max_generation - 1)))
    {
        dprintf (1, ("gen1 start %p->%p, gen2 size %zd->%zd, lock elevation",
                generation_allocation_start (generation_of (max_generation - 1)),
                generation_plan_allocation_start (generation_of (max_generation - 1)),
                    generation_size (max_generation),
                    generation_plan_size (max_generation)));
        return false;
    }
    else
        return true;
#endif //USE_REGIONS
}

BOOL gc_heap::decide_on_compacting (int condemned_gen_number,
                                    size_t fragmentation,
                                    BOOL& should_expand)
{
    BOOL should_compact = FALSE;
    should_expand = FALSE;
    generation*   gen = generation_of (condemned_gen_number);
    dynamic_data* dd = dynamic_data_of (condemned_gen_number);
    size_t gen_sizes     = generation_sizes(gen, true);
    float  fragmentation_burden = ( ((0 == fragmentation) || (0 == gen_sizes)) ? (0.0f) :
                                    (float (fragmentation) / gen_sizes) );

    dprintf (GTC_LOG, ("h%d g%d fragmentation: %zd (%d%%), gen_sizes: %zd",
        heap_number, settings.condemned_generation,
        fragmentation, (int)(fragmentation_burden * 100.0),
        gen_sizes));

#ifdef USE_REGIONS
    if (special_sweep_p)
    {
        return FALSE;
    }
#endif //USE_REGIONS

#if defined(STRESS_HEAP) && !defined(FEATURE_NATIVEAOT)
    // for GC stress runs we need compaction
    if (GCStress<cfg_any>::IsEnabled() && !settings.concurrent)
        should_compact = TRUE;
#endif //defined(STRESS_HEAP) && !defined(FEATURE_NATIVEAOT)

    if (GCConfig::GetForceCompact())
        should_compact = TRUE;

    if ((condemned_gen_number == max_generation) && last_gc_before_oom)
    {
        should_compact = TRUE;
#ifndef USE_REGIONS
        last_gc_before_oom = FALSE;
#endif //!USE_REGIONS
        get_gc_data_per_heap()->set_mechanism (gc_heap_compact, compact_last_gc);
    }

    if (settings.reason == reason_induced_compacting)
    {
        dprintf (2, ("induced compacting GC"));
        should_compact = TRUE;
        get_gc_data_per_heap()->set_mechanism (gc_heap_compact, compact_induced_compacting);
    }

    if (settings.reason == reason_induced_aggressive)
    {
        dprintf (2, ("aggressive compacting GC"));
        should_compact = TRUE;
        get_gc_data_per_heap()->set_mechanism (gc_heap_compact, compact_aggressive_compacting);
    }

    if (settings.reason == reason_pm_full_gc)
    {
        assert (condemned_gen_number == max_generation);
        if (heap_number == 0)
        {
            dprintf (GTC_LOG, ("PM doing compacting full GC after a gen1"));
        }
        should_compact = TRUE;
    }

    dprintf (2, ("Fragmentation: %zu Fragmentation burden %d%%",
                fragmentation, (int) (100*fragmentation_burden)));

    if (provisional_mode_triggered && (condemned_gen_number == (max_generation - 1)))
    {
        dprintf (GTC_LOG, ("gen1 in PM always compact"));
        should_compact = TRUE;
    }

#ifdef USE_REGIONS
    if (!should_compact)
    {
        should_compact = !!decide_on_compaction_space();
    }
#else //USE_REGIONS
    if (!should_compact)
    {
        if (dt_low_ephemeral_space_p (tuning_deciding_compaction))
        {
            dprintf(GTC_LOG, ("compacting due to low ephemeral"));
            should_compact = TRUE;
            get_gc_data_per_heap()->set_mechanism (gc_heap_compact, compact_low_ephemeral);
        }
    }

    if (should_compact)
    {
        if ((condemned_gen_number >= (max_generation - 1)))
        {
            if (dt_low_ephemeral_space_p (tuning_deciding_expansion))
            {
                dprintf (GTC_LOG,("Not enough space for all ephemeral generations with compaction"));
                should_expand = TRUE;
            }
        }
    }
#endif //USE_REGIONS

#ifdef HOST_64BIT
    BOOL high_memory = FALSE;
#endif // HOST_64BIT

    if (!should_compact)
    {
        // We are not putting this in dt_high_frag_p because it's not exactly
        // high fragmentation - it's just enough planned fragmentation for us to
        // want to compact. Also the "fragmentation" we are talking about here
        // is different from anywhere else.
        dprintf (REGIONS_LOG, ("frag: %zd, fragmentation_burden: %.3f",
            fragmentation, fragmentation_burden));
        BOOL frag_exceeded = ((fragmentation >= dd_fragmentation_limit (dd)) &&
                                (fragmentation_burden >= dd_fragmentation_burden_limit (dd)));

        if (frag_exceeded)
        {
#ifdef BACKGROUND_GC
            // do not force compaction if this was a stress-induced GC
            IN_STRESS_HEAP(if (!settings.stress_induced))
            {
#endif // BACKGROUND_GC
            assert (settings.concurrent == FALSE);
            should_compact = TRUE;
            get_gc_data_per_heap()->set_mechanism (gc_heap_compact, compact_high_frag);
#ifdef BACKGROUND_GC
            }
#endif // BACKGROUND_GC
        }

#ifdef HOST_64BIT
        // check for high memory situation
        if(!should_compact)
        {
            uint32_t num_heaps = 1;
#ifdef MULTIPLE_HEAPS
            num_heaps = gc_heap::n_heaps;
#endif // MULTIPLE_HEAPS

            ptrdiff_t reclaim_space = generation_size(max_generation) - generation_plan_size(max_generation);

            if((settings.entry_memory_load >= high_memory_load_th) && (settings.entry_memory_load < v_high_memory_load_th))
            {
                if(reclaim_space > (int64_t)(min_high_fragmentation_threshold (entry_available_physical_mem, num_heaps)))
                {
                    dprintf(GTC_LOG,("compacting due to fragmentation in high memory"));
                    should_compact = TRUE;
                    get_gc_data_per_heap()->set_mechanism (gc_heap_compact, compact_high_mem_frag);
                }
                high_memory = TRUE;
            }
            else if(settings.entry_memory_load >= v_high_memory_load_th)
            {
                if(reclaim_space > (ptrdiff_t)(min_reclaim_fragmentation_threshold (num_heaps)))
                {
                    dprintf(GTC_LOG,("compacting due to fragmentation in very high memory"));
                    should_compact = TRUE;
                    get_gc_data_per_heap()->set_mechanism (gc_heap_compact, compact_vhigh_mem_frag);
                }
                high_memory = TRUE;
            }
        }
#endif // HOST_64BIT
    }

    // The purpose of calling ensure_gap_allocation here is to make sure
    // that we actually are able to commit the memory to allocate generation
    // starts.
    if ((should_compact == FALSE) &&
        (ensure_gap_allocation (condemned_gen_number) == FALSE))
    {
        should_compact = TRUE;
        get_gc_data_per_heap()->set_mechanism (gc_heap_compact, compact_no_gaps);
    }

    if (settings.condemned_generation == max_generation)
    {
        //check the progress
        if (
#ifdef HOST_64BIT
            (high_memory && !should_compact) ||
#endif // HOST_64BIT
            !is_full_compacting_gc_productive())
        {
            //no progress -> lock
            settings.should_lock_elevation = TRUE;
        }
    }

    if (settings.pause_mode == pause_no_gc)
    {
        should_compact = TRUE;
        if ((size_t)(heap_segment_reserved (ephemeral_heap_segment) - heap_segment_plan_allocated (ephemeral_heap_segment))
            < soh_allocation_no_gc)
        {
            should_expand = TRUE;
        }
    }

    dprintf (2, ("will %s(%s)", (should_compact ? "compact" : "sweep"), (should_expand ? "ex" : "")));
    return should_compact;
}

size_t align_lower_good_size_allocation (size_t size)
{
    return (size/64)*64;
}

size_t gc_heap::approximate_new_allocation()
{
    dynamic_data* dd0 = dynamic_data_of (0);
    return max (2*dd_min_size (dd0), ((dd_desired_allocation (dd0)*2)/3));
}

bool gc_heap::check_against_hard_limit (size_t space_required)
{
    bool can_fit = TRUE;

    // If hard limit is specified, and if we attributed all that's left in commit to the ephemeral seg
    // so we treat that as segment end, do we have enough space.
    if (heap_hard_limit)
    {
        size_t left_in_commit = heap_hard_limit - current_total_committed;
        int num_heaps = get_num_heaps();
        left_in_commit /= num_heaps;
        if (left_in_commit < space_required)
        {
            can_fit = FALSE;
        }

        dprintf (2, ("h%d end seg %zd, but only %zd left in HARD LIMIT commit, required: %zd %s on eph",
            heap_number, space_required,
            left_in_commit, space_required,
            (can_fit ? "ok" : "short")));
    }

    return can_fit;
}

#ifdef USE_REGIONS
bool gc_heap::sufficient_space_regions_for_allocation (size_t end_space, size_t end_space_required)
{
    // REGIONS PERF TODO: we can repurpose large regions here too, if needed.
    size_t free_regions_space = (free_regions[basic_free_region].get_num_free_regions() * ((size_t)1 << min_segment_size_shr)) +
                                global_region_allocator.get_free();
    size_t total_alloc_space = end_space + free_regions_space;
    dprintf (REGIONS_LOG, ("h%d required %zd, end %zd + free %zd=%zd",
        heap_number, end_space_required, end_space, free_regions_space, total_alloc_space));
    size_t total_commit_space = end_gen0_region_committed_space + free_regions[basic_free_region].get_size_committed_in_free();
    if (total_alloc_space > end_space_required)
    {
        if (end_space_required > total_commit_space)
        {
            return check_against_hard_limit (end_space_required - total_commit_space);
        }
        else
        {
            return true;
        }
    }
    else
    {
        return false;
    }
}

bool gc_heap::sufficient_space_regions (size_t end_space, size_t end_space_required)
{
    // REGIONS PERF TODO: we can repurpose large regions here too, if needed.
    // REGIONS PERF TODO: for callsites other than allocation, we should also take commit into account
    size_t free_regions_space = (free_regions[basic_free_region].get_num_free_regions() * ((size_t)1 << min_segment_size_shr)) +
                                global_region_allocator.get_free();
    size_t total_alloc_space = end_space + free_regions_space;
    dprintf (REGIONS_LOG, ("h%d required %zd, end %zd + free %zd=%zd",
        heap_number, end_space_required, end_space, free_regions_space, total_alloc_space));
    if (total_alloc_space > end_space_required)
    {
        return check_against_hard_limit (end_space_required);
    }
    else
    {
        return false;
    }
}
#else //USE_REGIONS
BOOL gc_heap::sufficient_space_end_seg (uint8_t* start, uint8_t* committed, uint8_t* reserved, size_t end_space_required)
{
    BOOL can_fit = FALSE;
    size_t committed_space = (size_t)(committed - start);
    size_t end_seg_space = (size_t)(reserved - start);
    if (committed_space > end_space_required)
    {
        return true;
    }
    else if (end_seg_space > end_space_required)
    {
        return check_against_hard_limit (end_space_required - committed_space);
    }
    else
        return false;
}
#endif //USE_REGIONS

// After we did a GC we expect to have at least this
// much space at the end of the segment to satisfy
// a reasonable amount of allocation requests.
size_t gc_heap::end_space_after_gc()
{
    return max ((dd_min_size (dynamic_data_of (0))/2), (END_SPACE_AFTER_GC_FL));
}

BOOL gc_heap::ephemeral_gen_fit_p (gc_tuning_point tp)
{
    uint8_t* start = 0;

#ifdef USE_REGIONS
    assert ((tp == tuning_deciding_condemned_gen) || (tp == tuning_deciding_full_gc));
#else//USE_REGIONS
    if ((tp == tuning_deciding_condemned_gen) ||
        (tp == tuning_deciding_compaction))
    {
        start = (settings.concurrent ? alloc_allocated : heap_segment_allocated (ephemeral_heap_segment));
        if (settings.concurrent)
        {
            dprintf (2, ("%zd left at the end of ephemeral segment (alloc_allocated)",
                (size_t)(heap_segment_reserved (ephemeral_heap_segment) - alloc_allocated)));
        }
        else
        {
            dprintf (2, ("%zd left at the end of ephemeral segment (allocated)",
                (size_t)(heap_segment_reserved (ephemeral_heap_segment) - heap_segment_allocated (ephemeral_heap_segment))));
        }
    }
    else if (tp == tuning_deciding_expansion)
    {
        start = heap_segment_plan_allocated (ephemeral_heap_segment);
        dprintf (2, ("%zd left at the end of ephemeral segment based on plan",
            (size_t)(heap_segment_reserved (ephemeral_heap_segment) - start)));
    }
    else
    {
        assert (tp == tuning_deciding_full_gc);
        dprintf (2, ("FGC: %zd left at the end of ephemeral segment (alloc_allocated)",
            (size_t)(heap_segment_reserved (ephemeral_heap_segment) - alloc_allocated)));
        start = alloc_allocated;
    }

    if (start == 0) // empty ephemeral generations
    {
        assert (tp == tuning_deciding_expansion);
        // if there are no survivors in the ephemeral segment,
        // this should be the beginning of ephemeral segment.
        start = generation_allocation_pointer (generation_of (max_generation));
        assert (start == heap_segment_mem (ephemeral_heap_segment));
    }

    if (tp == tuning_deciding_expansion)
    {
        assert (settings.condemned_generation >= (max_generation-1));
        size_t gen0size = approximate_new_allocation();
        size_t eph_size = gen0size;
        size_t gen_min_sizes = 0;

        for (int j = 1; j <= max_generation-1; j++)
        {
            gen_min_sizes += 2*dd_min_size (dynamic_data_of(j));
        }

        eph_size += gen_min_sizes;

        dprintf (3, ("h%d deciding on expansion, need %zd (gen0: %zd, 2*min: %zd)",
            heap_number, gen0size, gen_min_sizes, eph_size));

        // We must find room for one large object and enough room for gen0size
        if ((size_t)(heap_segment_reserved (ephemeral_heap_segment) - start) > eph_size)
        {
            dprintf (3, ("Enough room before end of segment"));
            return TRUE;
        }
        else
        {
            size_t room = align_lower_good_size_allocation
                (heap_segment_reserved (ephemeral_heap_segment) - start);
            size_t end_seg = room;

            //look at the plug free space
            size_t largest_alloc = END_SPACE_AFTER_GC_FL;
            bool large_chunk_found = FALSE;
            size_t bos = 0;
            uint8_t* gen0start = generation_plan_allocation_start (youngest_generation);
            dprintf (3, ("ephemeral_gen_fit_p: gen0 plan start: %zx", (size_t)gen0start));
            if (gen0start == 0)
                return FALSE;
            dprintf (3, ("ephemeral_gen_fit_p: room before free list search %zd, needed: %zd",
                         room, gen0size));
            while ((bos < mark_stack_bos) &&
                   !((room >= gen0size) && large_chunk_found))
            {
                uint8_t* plug = pinned_plug (pinned_plug_of (bos));
                if (in_range_for_segment (plug, ephemeral_heap_segment))
                {
                    if (plug >= gen0start)
                    {
                        size_t chunk = align_lower_good_size_allocation (pinned_len (pinned_plug_of (bos)));
                        room += chunk;
                        if (!large_chunk_found)
                        {
                            large_chunk_found = (chunk >= largest_alloc);
                        }
                        dprintf (3, ("ephemeral_gen_fit_p: room now %zd, large chunk: %d",
                                     room, large_chunk_found));
                    }
                }
                bos++;
            }

            if (room >= gen0size)
            {
                if (large_chunk_found)
                {
                    sufficient_gen0_space_p = TRUE;

                    dprintf (3, ("Enough room"));
                    return TRUE;
                }
                else
                {
                    // now we need to find largest_alloc at the end of the segment.
                    if (end_seg >= end_space_after_gc())
                    {
                        dprintf (3, ("Enough room (may need end of seg)"));
                        return TRUE;
                    }
                }
            }

            dprintf (3, ("Not enough room"));
                return FALSE;
        }
    }
    else
#endif //USE_REGIONS
    {
        size_t end_space = 0;
        dynamic_data* dd = dynamic_data_of (0);
        if ((tp == tuning_deciding_condemned_gen) ||
            (tp == tuning_deciding_full_gc))
        {
            end_space = max (2*dd_min_size (dd), end_space_after_gc());
        }
        else
        {
            assert (tp == tuning_deciding_compaction);
            end_space = approximate_new_allocation();
        }

#ifdef USE_REGIONS
        size_t gen0_end_space = get_gen0_end_space (memory_type_reserved);
        BOOL can_fit = sufficient_space_regions (gen0_end_space, end_space);
#else //USE_REGIONS
        BOOL can_fit = sufficient_space_end_seg (start, heap_segment_committed (ephemeral_heap_segment), heap_segment_reserved (ephemeral_heap_segment), end_space);
#endif //USE_REGIONS
        return can_fit;
    }
}

CObjectHeader* gc_heap::allocate_uoh_object (size_t jsize, uint32_t flags, int gen_number, int64_t& alloc_bytes)
{
    alloc_context acontext;
    acontext.init();

#if HOST_64BIT
    size_t maxObjectSize = (INT64_MAX - 7 - Align(min_obj_size));
#else
    size_t maxObjectSize = (INT32_MAX - 7 - Align(min_obj_size));
#endif

    if (jsize >= maxObjectSize)
    {
        if (GCConfig::GetBreakOnOOM())
        {
            GCToOSInterface::DebugBreak();
        }
        return NULL;
    }

    size_t size = AlignQword (jsize);
    int align_const = get_alignment_constant (FALSE);
    size_t pad = 0;
#ifdef FEATURE_LOH_COMPACTION
    if (gen_number == loh_generation)
    {
        pad = Align (loh_padding_obj_size, align_const);
    }
#endif //FEATURE_LOH_COMPACTION

    assert (size >= Align (min_obj_size, align_const));
#ifdef _MSC_VER
#pragma inline_depth(0)
#endif //_MSC_VER
    if (! allocate_more_space (&acontext, (size + pad), flags, gen_number))
    {
        return 0;
    }

#ifdef _MSC_VER
#pragma inline_depth(20)
#endif //_MSC_VER

#ifdef FEATURE_LOH_COMPACTION
    // The GC allocator made a free object already in this alloc context and
    // adjusted the alloc_ptr accordingly.
#endif //FEATURE_LOH_COMPACTION

    uint8_t*  result = acontext.alloc_ptr;

    assert ((size_t)(acontext.alloc_limit - acontext.alloc_ptr) == size);
    alloc_bytes += size;

    CObjectHeader* obj = (CObjectHeader*)result;

    assert (obj != 0);
    assert ((size_t)obj == Align ((size_t)obj, align_const));

    return obj;
}

void gc_heap::reset_memory (uint8_t* o, size_t sizeo)
{
    if (gc_heap::use_large_pages_p)
        return;

    if (sizeo > 128 * 1024)
    {
        // We cannot reset the memory for the useful part of a free object.
        size_t size_to_skip = min_free_list - plug_skew;

        size_t page_start = align_on_page ((size_t)(o + size_to_skip));
        size_t size = align_lower_page ((size_t)o + sizeo - size_to_skip - plug_skew) - page_start;
        // Note we need to compensate for an OS bug here. This bug would cause the MEM_RESET to fail
        // on write watched memory.
        if (reset_mm_p && gc_heap::dt_high_memory_load_p())
        {
#ifdef MULTIPLE_HEAPS
            bool unlock_p = true;
#else
            // We don't do unlock because there could be many processes using workstation GC and it's
            // bad perf to have many threads doing unlock at the same time.
            bool unlock_p = false;
#endif //MULTIPLE_HEAPS

            reset_mm_p = GCToOSInterface::VirtualReset((void*)page_start, size, unlock_p);
        }
    }
}

BOOL gc_heap::uoh_object_marked (uint8_t* o, BOOL clearp)
{
    BOOL m = FALSE;
    // It shouldn't be necessary to do these comparisons because this is only used for blocking
    // GCs and LOH segments cannot be out of range.
    if ((o >= lowest_address) && (o < highest_address))
    {
        if (marked (o))
        {
            if (clearp)
            {
                clear_marked (o);
                if (pinned (o))
                    clear_pinned(o);
            }
            m = TRUE;
        }
        else
            m = FALSE;
    }
    else
        m = TRUE;
    return m;
}

void gc_heap::walk_survivors_relocation (void* profiling_context, record_surv_fn fn)
{
    // Now walk the portion of memory that is actually being relocated.
    walk_relocation (profiling_context, fn);

#ifdef FEATURE_LOH_COMPACTION
    if (loh_compacted_p)
    {
        walk_relocation_for_loh (profiling_context, fn);
    }
#endif //FEATURE_LOH_COMPACTION
}

void gc_heap::walk_survivors_for_uoh (void* profiling_context, record_surv_fn fn, int gen_number)
{
    generation* gen        = generation_of (gen_number);
    heap_segment* seg      = heap_segment_rw (generation_start_segment (gen));;

    _ASSERTE(seg != NULL);

    uint8_t* o                = get_uoh_start_object (seg, gen);
    uint8_t* plug_end         = o;
    uint8_t* plug_start       = o;

    while (1)
    {
        if (o >= heap_segment_allocated (seg))
        {
            seg = heap_segment_next (seg);
            if (seg == 0)
                break;
            else
                o = heap_segment_mem (seg);
        }
        if (uoh_object_marked(o, FALSE))
        {
            plug_start = o;

            BOOL m = TRUE;
            while (m)
            {
                o = o + AlignQword (size (o));
                if (o >= heap_segment_allocated (seg))
                {
                    break;
                }
                m = uoh_object_marked (o, FALSE);
            }

            plug_end = o;

            fn (plug_start, plug_end, 0, profiling_context, false, false);
        }
        else
        {
            while (o < heap_segment_allocated (seg) && !uoh_object_marked(o, FALSE))
            {
                o = o + AlignQword (size (o));
            }
        }
    }
}

#ifdef BACKGROUND_GC

BOOL gc_heap::background_object_marked (uint8_t* o, BOOL clearp)
{
    BOOL m = FALSE;
    if ((o >= background_saved_lowest_address) && (o < background_saved_highest_address))
    {
        if (mark_array_marked (o))
        {
            if (clearp)
            {
                mark_array_clear_marked (o);
                //dprintf (3, ("mark array bit for object %zx is cleared", o));
                dprintf (3, ("CM: %p", o));
            }
            m = TRUE;
        }
        else
            m = FALSE;
    }
    else
        m = TRUE;

    dprintf (3, ("o %p(%zu) %s", o, size(o), (m ? "was bm" : "was NOT bm")));
    return m;
}

void gc_heap::background_delay_delete_uoh_segments()
{
    for (int i = uoh_start_generation; i < total_generation_count; i++)
    {
        generation* gen = generation_of (i);
        heap_segment* seg = heap_segment_rw (generation_start_segment (gen));
        heap_segment* prev_seg = 0;

#ifdef USE_REGIONS
        heap_segment* first_remaining_region = 0;
#endif //USE_REGIONS

        while (seg)
        {
            heap_segment* next_seg = heap_segment_next (seg);
            if (seg->flags & heap_segment_flags_uoh_delete)
            {
                dprintf (3, ("deleting %zx-%p-%p", (size_t)seg, heap_segment_allocated (seg), heap_segment_reserved (seg)));
                delete_heap_segment (seg, (GCConfig::GetRetainVM() != 0));
                heap_segment_next (prev_seg) = next_seg;
#ifdef USE_REGIONS
                update_start_tail_regions (gen, seg, prev_seg, next_seg);
#endif //USE_REGIONS
            }
            else
            {
#ifdef USE_REGIONS
                if (!first_remaining_region)
                    first_remaining_region = seg;
#endif //USE_REGIONS
                prev_seg = seg;
            }

            seg = next_seg;
        }

#ifdef USE_REGIONS
        assert (heap_segment_rw (generation_start_segment (gen)) == generation_start_segment (gen));
        if (generation_start_segment (gen) != first_remaining_region)
        {
            dprintf (REGIONS_LOG, ("h%d gen%d start %p -> %p",
                heap_number, gen->gen_num,
                heap_segment_mem (generation_start_segment (gen)),
                heap_segment_mem (first_remaining_region)));
            generation_start_segment (gen) = first_remaining_region;
        }
        if (generation_tail_region (gen) != prev_seg)
        {
            dprintf (REGIONS_LOG, ("h%d gen%d start %p -> %p",
                heap_number, gen->gen_num,
                heap_segment_mem (generation_tail_region (gen)),
                heap_segment_mem (prev_seg)));
            generation_tail_region (gen) = prev_seg;
        }
#endif //USE_REGIONS
    }
}

uint8_t* gc_heap::background_next_end (heap_segment* seg, BOOL uoh_objects_p)
{
    return
        (uoh_objects_p ? heap_segment_allocated (seg) : heap_segment_background_allocated (seg));
}

void gc_heap::set_mem_verify (uint8_t* start, uint8_t* end, uint8_t b)
{
#ifdef VERIFY_HEAP
    if (end > start)
    {
        if ((GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_GC) &&
           !(GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_NO_MEM_FILL))
        {
            dprintf (3, ("setting mem to %c [%p, [%p", b, start, end));
            memset (start, b, (end - start));
        }
    }
#endif //VERIFY_HEAP
}

void gc_heap::generation_delete_heap_segment (generation* gen,
                                              heap_segment* seg,
                                              heap_segment* prev_seg,
                                              heap_segment* next_seg)
{
    dprintf (3, ("bgc sweep: deleting seg %zx(%p), next %zx(%p), prev %zx(%p)",
        (size_t)seg, heap_segment_mem (seg),
        (size_t)next_seg, (next_seg ? heap_segment_mem (next_seg) : 0),
        (size_t)prev_seg, (prev_seg ? heap_segment_mem (prev_seg) : 0)));
    if (gen->gen_num > max_generation)
    {
        dprintf (3, ("Preparing empty large segment %zx for deletion", (size_t)seg));

        // We cannot thread segs in here onto freeable_uoh_segment because
        // grow_brick_card_tables could be committing mark array which needs to read
        // the seg list. So we delay it till next time we suspend EE.
        seg->flags |= heap_segment_flags_uoh_delete;
        // Since we will be decommitting the seg, we need to prevent heap verification
        // to verify this segment.
        heap_segment_allocated (seg) = heap_segment_mem (seg);
    }
    else
    {
        assert (seg != ephemeral_heap_segment);

#ifdef DOUBLY_LINKED_FL
        // For doubly linked list we go forward for SOH
        heap_segment_next (prev_seg) = next_seg;
#else //DOUBLY_LINKED_FL
        heap_segment_next (next_seg) = prev_seg;
#endif //DOUBLY_LINKED_FL

        dprintf (3, ("Preparing empty small segment %zx for deletion", (size_t)seg));
        heap_segment_next (seg) = freeable_soh_segment;
        freeable_soh_segment = seg;

#ifdef USE_REGIONS
#ifdef DOUBLY_LINKED_FL
        heap_segment* next_region = next_seg;
        heap_segment* prev_region = prev_seg;
#else //DOUBLY_LINKED_FL
        heap_segment* next_region = prev_seg;
        heap_segment* prev_region = next_seg;
#endif //DOUBLY_LINKED_FL

        update_start_tail_regions (gen, seg, prev_region, next_region);
#endif //USE_REGIONS
    }

    decommit_heap_segment (seg);
    seg->flags |= heap_segment_flags_decommitted;

    set_mem_verify (heap_segment_allocated (seg) - plug_skew, heap_segment_used (seg), 0xbb);
}

void gc_heap::process_background_segment_end (heap_segment* seg,
                                              generation* gen,
                                              uint8_t* last_plug_end,
                                              heap_segment* start_seg,
                                              BOOL* delete_p,
                                              size_t free_obj_size_last_gap)
{
    *delete_p = FALSE;
    uint8_t* allocated = heap_segment_allocated (seg);
    uint8_t* background_allocated = heap_segment_background_allocated (seg);
    BOOL uoh_p = heap_segment_uoh_p (seg);

    dprintf (3, ("EoS [%zx, %p[(%p[), last: %p(%zu)",
                (size_t)heap_segment_mem (seg), background_allocated, allocated, last_plug_end, free_obj_size_last_gap));

    if (!uoh_p && (allocated != background_allocated))
    {
        assert (gen->gen_num <= max_generation);

        dprintf (3, ("Make a free object before newly promoted objects [%zx, %p[",
                    (size_t)last_plug_end, background_allocated));

        size_t last_gap = background_allocated - last_plug_end;
        if (last_gap > 0)
        {
            thread_gap (last_plug_end, last_gap, generation_of (max_generation));
            add_gen_free (max_generation, last_gap);

            fix_brick_to_highest (last_plug_end, background_allocated);

            // When we allowed fgc's during going through gaps, we could have erased the brick
            // that corresponds to bgc_allocated 'cause we had to update the brick there,
            // recover it here.
            fix_brick_to_highest (background_allocated, background_allocated);
        }
    }
    else
    {
        // by default, if allocated == background_allocated, it can't
        // be the ephemeral segment.
        if (seg == ephemeral_heap_segment)
        {
            FATAL_GC_ERROR();
        }

#ifndef USE_REGIONS
        if (allocated == heap_segment_mem (seg))
        {
            // this can happen with UOH segments when multiple threads
            // allocate new segments and not all of them were needed to
            // satisfy allocation requests.
            assert (gen->gen_num > max_generation);
        }
#endif //!USE_REGIONS

        if (last_plug_end == heap_segment_mem (seg))
        {
            // REGIONS TODO: start_seg doesn't matter for regions. We can get rid of it too.
            // Just need to update the start segment accordingly in generation_delete_heap_segment.
            // Also this might leave us with no regions at all for gen2 and we should be prepared
            // for that. One approach is to ensure at least one region per generation at the beginning
            // of a GC.
            if (seg != start_seg)
            {
                *delete_p = TRUE;
            }

            dprintf (3, ("h%d seg %p %s be deleted", heap_number,
                        heap_segment_mem (seg), (*delete_p ? "should" : "should not")));

        }
        if (!*delete_p)
        {
            dprintf (3, ("[h%d] seg %zx alloc %p->%zx",
                heap_number, (size_t)seg,
                heap_segment_allocated (seg),
                (size_t)last_plug_end));
            heap_segment_allocated (seg) = last_plug_end;
            set_mem_verify (heap_segment_allocated (seg) - plug_skew, heap_segment_used (seg), 0xbb);

            decommit_heap_segment_pages (seg, 0);
        }
    }

    if (free_obj_size_last_gap)
    {
        generation_free_obj_space (gen) -= free_obj_size_last_gap;
        dprintf (2, ("[h%d] PS: gen2FO-: %zd->%zd",
            heap_number, free_obj_size_last_gap, generation_free_obj_space (gen)));
    }

    dprintf (3, ("verifying seg %p's mark array was completely cleared", seg));
    bgc_verify_mark_array_cleared (seg);
}

inline
BOOL gc_heap::fgc_should_consider_object (uint8_t* o,
                                          heap_segment* seg,
                                          BOOL consider_bgc_mark_p,
                                          BOOL check_current_sweep_p,
                                          BOOL check_saved_sweep_p)
{
#ifdef USE_REGIONS
    assert (!check_saved_sweep_p);
#endif //USE_REGIONS

    // the logic for this function must be kept in sync with the analogous function
    // in ToolBox\SOS\Strike\gc.cpp

    // TRUE means we don't need to check the bgc mark bit
    // FALSE means we do.
    BOOL no_bgc_mark_p = FALSE;

    if (consider_bgc_mark_p)
    {
        if (check_current_sweep_p && (o < current_sweep_pos))
        {
            dprintf (3, ("no bgc mark - o: %p < cs: %p", o, current_sweep_pos));
            no_bgc_mark_p = TRUE;
        }

        if (!no_bgc_mark_p)
        {
#ifndef USE_REGIONS
            if(check_saved_sweep_p && (o >= saved_sweep_ephemeral_start))
            {
                dprintf (3, ("no bgc mark - o: %p >= ss: %p", o, saved_sweep_ephemeral_start));
                no_bgc_mark_p = TRUE;
            }
#endif //!USE_REGIONS
            if (!check_saved_sweep_p)
            {
                uint8_t* background_allocated = heap_segment_background_allocated (seg);

#ifndef USE_REGIONS
                // if this was the saved ephemeral segment, check_saved_sweep_p
                // would've been true.
                assert (heap_segment_background_allocated (seg) != saved_sweep_ephemeral_start);
#endif //!USE_REGIONS

                // background_allocated could be 0 for the new segments acquired during bgc
                // sweep and we still want no_bgc_mark_p to be true.
                if (o >= background_allocated)
                {
                    dprintf (3, ("no bgc mark - o: %p >= ba: %p", o, background_allocated));
                    no_bgc_mark_p = TRUE;
                }
            }
        }
    }
    else
    {
        no_bgc_mark_p = TRUE;
    }

    dprintf (3, ("bgc mark %p: %s (bm: %s)", o, (no_bgc_mark_p ? "no" : "yes"), ((no_bgc_mark_p || background_object_marked (o, FALSE)) ? "yes" : "no")));
    return (no_bgc_mark_p ? TRUE : background_object_marked (o, FALSE));
}

// consider_bgc_mark_p tells you if you need to care about the bgc mark bit at all
// if it's TRUE, check_current_sweep_p tells you if you should consider the
// current sweep position or not.
void gc_heap::should_check_bgc_mark (heap_segment* seg,
                                     BOOL* consider_bgc_mark_p,
                                     BOOL* check_current_sweep_p,
                                     BOOL* check_saved_sweep_p)
{
    // the logic for this function must be kept in sync with the analogous function
    // in ToolBox\SOS\Strike\gc.cpp
    *consider_bgc_mark_p = FALSE;
    *check_current_sweep_p = FALSE;
    *check_saved_sweep_p = FALSE;

    if (current_c_gc_state == c_gc_state_planning)
    {
        // We are doing the current_sweep_pos comparison here because we have yet to
        // turn on the swept flag for the segment but in_range_for_segment will return
        // FALSE if the address is the same as reserved.
        if ((seg->flags & heap_segment_flags_swept) || (current_sweep_pos == heap_segment_reserved (seg)))
        {
            dprintf (3, ("seg %p is already swept by bgc", seg));
        }
        else if (heap_segment_background_allocated (seg) == 0)
        {
            dprintf (3, ("seg %p newly alloc during bgc", seg));
        }
        else
        {
            *consider_bgc_mark_p = TRUE;

            dprintf (3, ("seg %p hasn't been swept by bgc", seg));

#ifndef USE_REGIONS
            if (seg == saved_sweep_ephemeral_seg)
            {
                dprintf (3, ("seg %p is the saved ephemeral seg", seg));
                *check_saved_sweep_p = TRUE;
            }
#endif //!USE_REGIONS

            if (in_range_for_segment (current_sweep_pos, seg))
            {
                dprintf (3, ("current sweep pos is %p and within seg %p",
                              current_sweep_pos, seg));
                *check_current_sweep_p = TRUE;
            }
        }
    }
}

// REGIONS TODO: I'm not releasing any empty ephemeral regions here the gen0 allocator is
// iterating over these regions. We'd want to do the same as what we do with LOH segs/regions.
void gc_heap::background_ephemeral_sweep()
{
    dprintf (3, ("bgc ephemeral sweep"));

    int align_const = get_alignment_constant (TRUE);

#ifndef USE_REGIONS
    saved_sweep_ephemeral_seg = ephemeral_heap_segment;
    saved_sweep_ephemeral_start = generation_allocation_start (generation_of (max_generation - 1));
#endif //!USE_REGIONS

    // Since we don't want to interfere with gen0 allocation while we are threading gen0 free list,
    // we thread onto a list first then publish it when we are done.
    allocator youngest_free_list;
    size_t youngest_free_list_space = 0;
    size_t youngest_free_obj_space = 0;

    youngest_free_list.clear();

    for (int i = 0; i <= (max_generation - 1); i++)
    {
        generation* gen_to_reset = generation_of (i);
        assert (generation_free_list_space (gen_to_reset) == 0);
        // Can only assert free_list_space is 0, not free_obj_space as the allocator could have added
        // something there.
    }

    for (int i = (max_generation - 1); i >= 0; i--)
    {
        generation* current_gen = generation_of (i);
#ifdef USE_REGIONS
        heap_segment* ephemeral_region = heap_segment_rw (generation_start_segment (current_gen));
        while (ephemeral_region)
#endif //USE_REGIONS
        {
#ifdef USE_REGIONS
            uint8_t* o = heap_segment_mem (ephemeral_region);
            uint8_t* end = heap_segment_background_allocated (ephemeral_region);
            dprintf (3, ("bgc eph: gen%d seg %p(%p-%p)",
                heap_segment_gen_num (ephemeral_region),
                heap_segment_mem (ephemeral_region),
                heap_segment_allocated (ephemeral_region),
                heap_segment_background_allocated (ephemeral_region)));
            // This doesn't conflict with the allocator getting a new region in gen0.
            // If the allocator just threaded a region onto the gen0 region list we will
            // read that region and detect that its background allocated is 0.
            if (!end)
            {
                ephemeral_region->flags |= heap_segment_flags_swept;
                ephemeral_region = heap_segment_next (ephemeral_region);
                continue;
            }
#else //USE_REGIONS
            uint8_t* o = generation_allocation_start (current_gen);
            //Skip the generation gap object
            o = o + Align(size (o), align_const);
            uint8_t* end = ((i > 0) ?
                        generation_allocation_start (generation_of (i - 1)) :
                        heap_segment_allocated (ephemeral_heap_segment));
#endif //USE_REGIONS

            uint8_t* plug_end = o;
            uint8_t* plug_start = o;
            BOOL marked_p = FALSE;

            while (o < end)
            {
                marked_p = background_object_marked (o, TRUE);
                if (marked_p)
                {
                    plug_start = o;
                    size_t plug_size = plug_start - plug_end;

                    if (i >= 1)
                    {
                        thread_gap (plug_end, plug_size, current_gen);
                    }
                    else
                    {
                        if (plug_size > 0)
                        {
                            make_unused_array (plug_end, plug_size);
                            if (plug_size >= min_free_list)
                            {
                                youngest_free_list_space += plug_size;
                                youngest_free_list.thread_item (plug_end, plug_size);
                            }
                            else
                            {
                                youngest_free_obj_space += plug_size;
                            }
                        }
                    }

                    fix_brick_to_highest (plug_end, plug_start);
                    fix_brick_to_highest (plug_start, plug_start);

                    BOOL m = TRUE;
                    while (m)
                    {
                        o = o + Align (size (o), align_const);
                        if (o >= end)
                        {
                            break;
                        }

                        m = background_object_marked (o, TRUE);
                    }
                    plug_end = o;
                    dprintf (3, ("bgs: plug [%zx, %zx[", (size_t)plug_start, (size_t)plug_end));
                }
                else
                {
                    while ((o < end) && !background_object_marked (o, FALSE))
                    {
                        o = o + Align (size (o), align_const);
                    }
                }
            }

            if (plug_end != end)
            {
                if (i >= 1)
                {
                    thread_gap (plug_end, end - plug_end, current_gen);
                }
                else
                {
#ifndef USE_REGIONS
                    heap_segment_allocated (ephemeral_heap_segment) = plug_end;
                    heap_segment_saved_bg_allocated (ephemeral_heap_segment) = plug_end;
#endif //!USE_REGIONS
                    make_unused_array (plug_end, (end - plug_end));
                }

                fix_brick_to_highest (plug_end, end);
            }
#ifdef USE_REGIONS
            ephemeral_region->flags |= heap_segment_flags_swept;
            // Setting this to 0 so background_sweep can terminate for SOH.
            heap_segment_background_allocated (ephemeral_region) = 0;
            ephemeral_region = heap_segment_next (ephemeral_region);
#endif //USE_REGIONS
        }
        dd_fragmentation (dynamic_data_of (i)) =
            generation_free_list_space (current_gen) + generation_free_obj_space (current_gen);
    }

    generation* youngest_gen = generation_of (0);
    generation_free_list_space (youngest_gen) = youngest_free_list_space;
    generation_free_obj_space (youngest_gen) = youngest_free_obj_space;
    dd_fragmentation (dynamic_data_of (0)) = youngest_free_list_space + youngest_free_obj_space;
    generation_allocator (youngest_gen)->copy_with_no_repair (&youngest_free_list);
}

void gc_heap::background_sweep()
{
    //concurrent_print_time_delta ("finished with mark and start with sweep");
    concurrent_print_time_delta ("Sw");
    dprintf (2, ("---- (GC%zu)Background Sweep Phase ----", VolatileLoad(&settings.gc_index)));

    bool rebuild_maxgen_fl_p = true;

#ifdef DOUBLY_LINKED_FL
#ifdef DYNAMIC_HEAP_COUNT
    rebuild_maxgen_fl_p = trigger_bgc_for_rethreading_p;
#else
    rebuild_maxgen_fl_p = false;
#endif //DYNAMIC_HEAP_COUNT
#endif //DOUBLY_LINKED_FL

    for (int i = 0; i <= max_generation; i++)
    {
        generation* gen_to_reset = generation_of (i);

        bool clear_fl_p = true;

#ifdef DOUBLY_LINKED_FL
        if (i == max_generation)
        {
            clear_fl_p = rebuild_maxgen_fl_p;

            dprintf (6666, ("h%d: gen2 still has FL: %zd, FO: %zd, clear gen2 FL %s",
                heap_number,
                generation_free_list_space (gen_to_reset),
                generation_free_obj_space (gen_to_reset),
                (clear_fl_p ? "yes" : "no")));
        }
#endif //DOUBLY_LINKED_FL

        if (clear_fl_p)
        {
            if (i == max_generation)
            {
                dprintf (6666, ("clearing g2 FL for h%d!", heap_number));
            }
            generation_allocator (gen_to_reset)->clear();
            generation_free_list_space (gen_to_reset) = 0;
            generation_free_obj_space (gen_to_reset) = 0;
        }

        generation_free_list_allocated (gen_to_reset) = 0;
        generation_end_seg_allocated (gen_to_reset) = 0;
        generation_condemned_allocated (gen_to_reset) = 0;
        generation_sweep_allocated (gen_to_reset) = 0;
        //reset the allocation so foreground gc can allocate into older generation
        generation_allocation_pointer (gen_to_reset)= 0;
        generation_allocation_limit (gen_to_reset) = 0;
        generation_allocation_segment (gen_to_reset) = heap_segment_rw (generation_start_segment (gen_to_reset));
    }

    FIRE_EVENT(BGC2ndNonConEnd);

    uoh_alloc_thread_count = 0;

    init_free_and_plug();

    current_bgc_state = bgc_sweep_soh;
    verify_soh_segment_list();

#ifdef DOUBLY_LINKED_FL
    // set the initial segment and position so that foreground GC knows where BGC is with the sweep
    current_sweep_seg = heap_segment_rw (generation_start_segment (generation_of (max_generation)));
    current_sweep_pos = 0;
#endif //DOUBLY_LINKED_FL

#ifdef FEATURE_BASICFREEZE
    sweep_ro_segments();
#endif //FEATURE_BASICFREEZE

    dprintf (3, ("lh state: planning"));

    // Multiple threads may reach here.  This conditional partially avoids multiple volatile writes.
    if (current_c_gc_state != c_gc_state_planning)
    {
        current_c_gc_state = c_gc_state_planning;
    }

    concurrent_print_time_delta ("Swe");

    for (int i = uoh_start_generation; i < total_generation_count; i++)
    {
        heap_segment* uoh_seg = heap_segment_rw (generation_start_segment (generation_of (i)));
        _ASSERTE(uoh_seg  != NULL);
        while (uoh_seg)
        {
            uoh_seg->flags &= ~heap_segment_flags_swept;
            heap_segment_background_allocated (uoh_seg) = heap_segment_allocated (uoh_seg);
            uoh_seg = heap_segment_next_rw (uoh_seg);
        }
    }

#ifdef MULTIPLE_HEAPS
    bgc_t_join.join(this, gc_join_restart_ee);
    if (bgc_t_join.joined())
    {
        dprintf(2, ("Starting BGC threads for resuming EE"));
        bgc_t_join.restart();
    }
#endif //MULTIPLE_HEAPS

    if (heap_number == 0)
    {
        get_and_reset_uoh_alloc_info();
        uint64_t suspended_end_ts = GetHighPrecisionTimeStamp();
        last_bgc_info[last_bgc_info_index].pause_durations[1] = (size_t)(suspended_end_ts - suspended_start_time);
        total_suspended_time += last_bgc_info[last_bgc_info_index].pause_durations[1];
        restart_EE ();
    }

    FIRE_EVENT(BGC2ndConBegin);

    background_ephemeral_sweep();

    concurrent_print_time_delta ("Swe eph");

#ifdef MULTIPLE_HEAPS
    bgc_t_join.join(this, gc_join_after_ephemeral_sweep);
    if (bgc_t_join.joined())
#endif //MULTIPLE_HEAPS
    {
#ifdef FEATURE_EVENT_TRACE
        bgc_heap_walk_for_etw_p = GCEventStatus::IsEnabled(GCEventProvider_Default,
                                                           GCEventKeyword_GCHeapSurvivalAndMovement,
                                                           GCEventLevel_Information);
#endif //FEATURE_EVENT_TRACE

        leave_spin_lock (&gc_lock);

#ifdef MULTIPLE_HEAPS
        dprintf(2, ("Starting BGC threads for BGC sweeping"));
        bgc_t_join.restart();
#endif //MULTIPLE_HEAPS
    }

    disable_preemptive (true);

    dynamic_data* dd     = dynamic_data_of (max_generation);
    const int num_objs   = 256;
    int current_num_objs = 0;

    for (int i = max_generation; i < total_generation_count; i++)
    {
        generation* gen = generation_of (i);
        heap_segment* gen_start_seg = heap_segment_rw (generation_start_segment(gen));
        heap_segment* next_seg = 0;
        heap_segment* prev_seg;
        heap_segment* start_seg;
        int align_const = get_alignment_constant (i == max_generation);

#ifndef DOUBLY_LINKED_FL
        if (i == max_generation)
        {
#ifdef USE_REGIONS
            start_seg = generation_tail_region (gen);
#else
            // start with saved ephemeral segment
            // we are no longer holding gc_lock, so a new ephemeral segment could be added, we want the saved one.
            start_seg = saved_sweep_ephemeral_seg;
#endif //USE_REGIONS
            prev_seg = heap_segment_next(start_seg);
        }
        else
#endif //!DOUBLY_LINKED_FL
        {
            // If we use doubly linked FL we don't need to go backwards as we are maintaining the free list.
            start_seg = gen_start_seg;
            prev_seg = NULL;

            if (i > max_generation)
            {
                // UOH allocations are allowed while sweeping SOH, so
                // we defer clearing UOH free lists until we start sweeping them
                generation_allocator (gen)->clear();
                generation_free_list_space (gen) = 0;
                generation_free_obj_space (gen) = 0;
                generation_free_list_allocated (gen) = 0;
                generation_end_seg_allocated (gen) = 0;
                generation_condemned_allocated (gen) = 0;
                generation_sweep_allocated (gen) = 0;
                generation_allocation_pointer (gen)= 0;
                generation_allocation_limit (gen) = 0;
                generation_allocation_segment (gen) = heap_segment_rw (generation_start_segment (gen));
            }
            else
            {
                dprintf (3333, ("h%d: SOH sweep start on seg %zx: total FL: %zd, FO: %zd",
                    heap_number, (size_t)start_seg,
                    generation_free_list_space (gen),
                    generation_free_obj_space (gen)));
            }
        }

        _ASSERTE(start_seg != NULL);
        heap_segment* seg = start_seg;
        dprintf (2, ("bgs: sweeping gen %d seg %p->%p(%p)", gen->gen_num,
            heap_segment_mem (seg),
            heap_segment_allocated (seg),
            heap_segment_background_allocated (seg)));
        while (seg
#ifdef DOUBLY_LINKED_FL
               // We no longer go backwards in segment list for SOH so we need to bail when we see
               // segments newly allocated during bgc sweep.
               && !((heap_segment_background_allocated (seg) == 0) && (gen != large_object_generation))
#endif //DOUBLY_LINKED_FL
                )
        {
            uint8_t* o = heap_segment_mem (seg);
            if (seg == gen_start_seg)
            {
#ifndef USE_REGIONS
                assert (o == generation_allocation_start (gen));
                assert (method_table (o) == g_gc_pFreeObjectMethodTable);
                o = o + Align (size (o), align_const);
#endif //!USE_REGIONS
            }

            uint8_t* plug_end = o;
            current_sweep_pos = o;
            next_sweep_obj = o;
#ifdef DOUBLY_LINKED_FL
            current_sweep_seg = seg;
#endif //DOUBLY_LINKED_FL

            // This records the total size of free objects (including the ones on and not on FL)
            // in the gap and it gets set to 0 when we encounter a plug. If the last gap we saw
            // on a seg is unmarked, we will process this in process_background_segment_end.
            size_t free_obj_size_last_gap = 0;

            allow_fgc();
            uint8_t* end = background_next_end (seg, (i > max_generation));
            dprintf (3333, ("bgs: seg: %zx, [%zx, %zx[%zx", (size_t)seg,
                            (size_t)heap_segment_mem (seg),
                            (size_t)heap_segment_allocated (seg),
                            (size_t)heap_segment_background_allocated (seg)));

            while (o < end)
            {
                if (background_object_marked (o, TRUE))
                {
                    uint8_t* plug_start = o;
                    if (i > max_generation)
                    {
                        dprintf (2, ("uoh fr: [%p-%p[(%zd)", plug_end, plug_start, plug_start-plug_end));
                    }

                    thread_gap (plug_end, plug_start-plug_end, gen);
                    if (i == max_generation)
                    {
                        add_gen_free (max_generation, plug_start-plug_end);

#ifdef DOUBLY_LINKED_FL
                        if (free_obj_size_last_gap)
                        {
                            generation_free_obj_space (gen) -= free_obj_size_last_gap;
                            dprintf (3333, ("[h%d] LG: gen2FO-: %zd->%zd",
                                heap_number, free_obj_size_last_gap, generation_free_obj_space (gen)));

                            free_obj_size_last_gap = 0;
                        }
#endif //DOUBLY_LINKED_FL

                        fix_brick_to_highest (plug_end, plug_start);
                        // we need to fix the brick for the next plug here 'cause an FGC can
                        // happen and can't read a stale brick.
                        fix_brick_to_highest (plug_start, plug_start);
                    }

                    do
                    {
                        next_sweep_obj = o + Align (size (o), align_const);
                        current_num_objs++;
                        if (current_num_objs >= num_objs)
                        {
                            current_sweep_pos = next_sweep_obj;
                            allow_fgc();
                            current_num_objs = 0;
                        }
                        o = next_sweep_obj;
                    } while ((o < end) && background_object_marked(o, TRUE));

                    plug_end = o;
                    if (i == max_generation)
                    {
                        add_gen_plug (max_generation, plug_end-plug_start);
                        dd_survived_size (dd) += (plug_end - plug_start);
                    }
                    dprintf (3, ("bgs: plug [%zx, %zx[", (size_t)plug_start, (size_t)plug_end));
                }

                while ((o < end) && !background_object_marked (o, FALSE))
                {
                    size_t size_o = Align(size (o), align_const);
                    next_sweep_obj = o + size_o;

#ifdef DOUBLY_LINKED_FL
                    if ((i == max_generation) && !rebuild_maxgen_fl_p)
                    {
                        if (method_table (o) == g_gc_pFreeObjectMethodTable)
                        {
                            free_obj_size_last_gap += size_o;

                            if (is_on_free_list (o, size_o))
                            {
#ifdef MULTIPLE_HEAPS
                                assert (heap_of (o) == this);
#endif //MULTIPLE_HEAPS
                                generation_allocator (gen)->unlink_item_no_undo (o, size_o);
                                generation_free_list_space (gen) -= size_o;
                                assert ((ptrdiff_t)generation_free_list_space (gen) >= 0);
                                generation_free_obj_space (gen) += size_o;

                                dprintf (3333, ("[h%d] gen2F-: %p->%p(%zd) FL: %zd",
                                    heap_number, o, (o + size_o), size_o,
                                    generation_free_list_space (gen)));
                                dprintf (3333, ("h%d: gen2FO+: %p(%zx)->%zd (g: %zd)",
                                    heap_number, o, size_o,
                                    generation_free_obj_space (gen),
                                    free_obj_size_last_gap));
                                remove_gen_free (max_generation, size_o);
                            }
                            else
                            {
                                // this was not on the free list so it was already part of
                                // free_obj_space, so no need to subtract from it. However,
                                // we do need to keep track in this gap's FO space.
                                dprintf (3333, ("h%d: gen2FO: %p(%zd)->%zd (g: %zd)",
                                    heap_number, o, size_o,
                                    generation_free_obj_space (gen), free_obj_size_last_gap));
                            }

                            dprintf (3333, ("h%d: total FO: %p->%p FL: %zd, FO: %zd (g: %zd)",
                                heap_number, plug_end, next_sweep_obj,
                                generation_free_list_space (gen),
                                generation_free_obj_space (gen),
                                free_obj_size_last_gap));
                        }
                    }
#endif //DOUBLY_LINKED_FL

                    current_num_objs++;
                    if (current_num_objs >= num_objs)
                    {
                        current_sweep_pos = plug_end;
                        dprintf (1234, ("f: swept till %p", current_sweep_pos));
                        allow_fgc();
                        current_num_objs = 0;
                    }

                    o = next_sweep_obj;
                }
            }

#ifdef DOUBLY_LINKED_FL
            next_seg = heap_segment_next (seg);
#else //DOUBLY_LINKED_FL
            if (i > max_generation)
            {
                next_seg = heap_segment_next (seg);
            }
            else
            {
                // For SOH segments we go backwards.
                next_seg = heap_segment_prev (gen_start_seg, seg);
            }
#endif //DOUBLY_LINKED_FL

            BOOL delete_p = FALSE;
            if (!heap_segment_read_only_p (seg))
            {
                if (i > max_generation)
                {
                    // we can treat all UOH segments as in the bgc domain
                    // regardless of whether we saw in bgc mark or not
                    // because we don't allow UOH allocations during bgc
                    // sweep anyway - the UOH segments can't change.
                    process_background_segment_end (seg, gen, plug_end,
                                                    start_seg, &delete_p, 0);
                }
                else
                {
                    assert (heap_segment_background_allocated (seg) != 0);
                    process_background_segment_end (seg, gen, plug_end,
                                                    start_seg, &delete_p, free_obj_size_last_gap);

#ifndef USE_REGIONS
                    assert (next_seg || !delete_p);
#endif //!USE_REGIONS
                }
            }

            heap_segment* saved_prev_seg = prev_seg;

            if (delete_p)
            {
                generation_delete_heap_segment (gen, seg, prev_seg, next_seg);
            }
            else
            {
                prev_seg = seg;
                dprintf (2, ("seg %p (%p) has been swept", seg, heap_segment_mem (seg)));
                seg->flags |= heap_segment_flags_swept;
                current_sweep_pos = end;
            }

            verify_soh_segment_list();

#ifdef DOUBLY_LINKED_FL
            while (next_seg && heap_segment_background_allocated (next_seg) == 0)
            {
                dprintf (2, ("[h%d] skip new %p ", heap_number, next_seg));
                next_seg = heap_segment_next (next_seg);
            }
#endif //DOUBLY_LINKED_FL

            dprintf (GTC_LOG, ("seg: %p(%p), next_seg: %p(%p), prev_seg: %p(%p), delete_p %d",
                seg, (seg ? heap_segment_mem (seg) : 0),
                next_seg, (next_seg ? heap_segment_mem (next_seg) : 0),
                saved_prev_seg, (saved_prev_seg ? heap_segment_mem (saved_prev_seg) : 0),
                (delete_p ? 1 : 0)));
            seg = next_seg;
        }

        generation_allocation_segment (gen) = heap_segment_rw (generation_start_segment (gen));
        _ASSERTE(generation_allocation_segment(gen) != NULL);

        if (i == max_generation)
        {
            dprintf (2, ("bgs: sweeping uoh objects"));
            concurrent_print_time_delta ("Swe SOH");
            FIRE_EVENT(BGC1stSweepEnd, 0);

            //block concurrent allocation for UOH objects
            enter_spin_lock (&more_space_lock_uoh);
            add_saved_spinlock_info (true, me_acquire, mt_bgc_uoh_sweep, msl_entered);

            concurrent_print_time_delta ("Swe UOH took msl");

            // We wait till all allocating threads are completely done.
            int spin_count = yp_spin_count_unit;
            while (uoh_alloc_thread_count)
            {
                spin_and_switch (spin_count, (uoh_alloc_thread_count == 0));
            }

            current_bgc_state = bgc_sweep_uoh;
        }
    }

    size_t total_soh_size = generation_sizes (generation_of (max_generation));
    size_t total_loh_size = generation_size (loh_generation);
    size_t total_poh_size = generation_size (poh_generation);

    dprintf (GTC_LOG, ("h%d: S: poh: %zd, loh: %zd, soh: %zd", heap_number, total_poh_size, total_loh_size, total_soh_size));

    dprintf (GTC_LOG, ("end of bgc sweep: gen2 FL: %zd, FO: %zd",
        generation_free_list_space (generation_of (max_generation)),
        generation_free_obj_space (generation_of (max_generation))));

    dprintf (GTC_LOG, ("h%d: end of bgc sweep: loh FL: %zd, FO: %zd",
        heap_number,
        generation_free_list_space (generation_of (loh_generation)),
        generation_free_obj_space (generation_of (loh_generation))));

    dprintf (GTC_LOG, ("h%d: end of bgc sweep: poh FL: %zd, FO: %zd",
        heap_number,
        generation_free_list_space (generation_of (poh_generation)),
        generation_free_obj_space (generation_of (poh_generation))));

    FIRE_EVENT(BGC2ndConEnd);
    concurrent_print_time_delta ("background sweep");

    heap_segment* reset_seg = heap_segment_rw (generation_start_segment (generation_of (max_generation)));
    _ASSERTE(reset_seg != NULL);

    while (reset_seg)
    {
        heap_segment_saved_bg_allocated (reset_seg) = heap_segment_background_allocated (reset_seg);
        heap_segment_background_allocated (reset_seg) = 0;
        reset_seg = heap_segment_next_rw (reset_seg);
    }

    // We calculate dynamic data here because if we wait till we signal the lh event,
    // the allocation thread can change the fragmentation and we may read an intermediate
    // value (which can be greater than the generation size). Plus by that time it won't
    // be accurate.
    compute_new_dynamic_data (max_generation);

    // We also need to adjust size_before for UOH allocations that occurred during sweeping.
    gc_history_per_heap* current_gc_data_per_heap = get_gc_data_per_heap();
    for (int i = uoh_start_generation; i < total_generation_count; i++)
    {
        assert(uoh_a_bgc_marking[i - uoh_start_generation] == 0);
        assert(uoh_a_no_bgc[i - uoh_start_generation] == 0);
        current_gc_data_per_heap->gen_data[i].size_before += uoh_a_bgc_planning[i - uoh_start_generation];
    }

#ifdef DOUBLY_LINKED_FL
    current_bgc_state = bgc_not_in_process;

    // We can have an FGC triggered before we set the global state to free
    // so we need to not have left over current_sweep_seg that point to
    // a segment that might've been deleted at the beginning of an FGC.
    current_sweep_seg = 0;
#endif //DOUBLY_LINKED_FL

    enable_preemptive ();

#ifdef MULTIPLE_HEAPS
    bgc_t_join.join(this, gc_join_set_state_free);
    if (bgc_t_join.joined())
#endif //MULTIPLE_HEAPS
    {
        // TODO: We are using this join just to set the state. Should
        // look into eliminating it - check to make sure things that use
        // this state can live with per heap state like should_check_bgc_mark.
        current_c_gc_state = c_gc_state_free;

#ifdef DYNAMIC_HEAP_COUNT
        update_total_soh_stable_size();
#endif //DYNAMIC_HEAP_COUNT

#ifdef BGC_SERVO_TUNING
        if (bgc_tuning::enable_fl_tuning)
        {
            enter_spin_lock (&gc_lock);
            bgc_tuning::record_and_adjust_bgc_end();
            leave_spin_lock (&gc_lock);
        }
#endif //BGC_SERVO_TUNING

#ifdef MULTIPLE_HEAPS
        dprintf(2, ("Starting BGC threads after background sweep phase"));
        bgc_t_join.restart();
#endif //MULTIPLE_HEAPS
    }

    disable_preemptive (true);

    add_saved_spinlock_info (true, me_release, mt_bgc_uoh_sweep, msl_entered);
    leave_spin_lock (&more_space_lock_uoh);

    //dprintf (GTC_LOG, ("---- (GC%zu)End Background Sweep Phase ----", VolatileLoad(&settings.gc_index)));
    dprintf (GTC_LOG, ("---- (GC%zu)ESw ----", VolatileLoad(&settings.gc_index)));
}
#endif //BACKGROUND_GC

void gc_heap::sweep_uoh_objects (int gen_num)
{
    //this min value is for the sake of the dynamic tuning.
    //so we know that we are not starting even if we have no
    //survivors.
    generation* gen        = generation_of (gen_num);
    heap_segment* start_seg = heap_segment_rw (generation_start_segment (gen));

    _ASSERTE(start_seg != NULL);

    heap_segment* seg      = start_seg;
    heap_segment* prev_seg = 0;
    uint8_t* o             = get_uoh_start_object (seg, gen);

    uint8_t* plug_end         = o;
    uint8_t* plug_start       = o;

    generation_allocator (gen)->clear();
    generation_free_list_space (gen) = 0;
    generation_free_obj_space (gen) = 0;
    generation_free_list_allocated (gen) = 0;

    dprintf (3, ("sweeping uoh objects"));
    dprintf (3, ("seg: %zx, [%zx, %zx[, starting from %p",
                 (size_t)seg,
                 (size_t)heap_segment_mem (seg),
                 (size_t)heap_segment_allocated (seg),
                 o));

    while (1)
    {
        if (o >= heap_segment_allocated (seg))
        {
            heap_segment* next_seg = heap_segment_next (seg);
            //delete the empty segment if not the only one
            // REGIONS TODO: for regions we can get rid of the start_seg. Just need
            // to update start region accordingly.
            if ((plug_end == heap_segment_mem (seg)) &&
                (seg != start_seg) && !heap_segment_read_only_p (seg))
            {
                //prepare for deletion
                dprintf (3, ("Preparing empty large segment %zx", (size_t)seg));
                assert (prev_seg);
                heap_segment_next (prev_seg) = next_seg;
                heap_segment_next (seg) = freeable_uoh_segment;
                freeable_uoh_segment = seg;
#ifdef USE_REGIONS
                update_start_tail_regions (gen, seg, prev_seg, next_seg);
#endif //USE_REGIONS
            }
            else
            {
                if (!heap_segment_read_only_p (seg))
                {
                    dprintf (3, ("Trimming seg to %zx[", (size_t)plug_end));
                    heap_segment_allocated (seg) = plug_end;
                    decommit_heap_segment_pages (seg, 0);
                }
                prev_seg = seg;
            }
            seg = next_seg;
            if (seg == 0)
                break;
            else
            {
                o = heap_segment_mem (seg);
                plug_end = o;
                dprintf (3, ("seg: %zx, [%zx, %zx[", (size_t)seg,
                             (size_t)heap_segment_mem (seg),
                             (size_t)heap_segment_allocated (seg)));
#ifdef USE_REGIONS
                continue;
#endif //USE_REGIONS
            }
        }
        if (uoh_object_marked(o, TRUE))
        {
            plug_start = o;
            //everything between plug_end and plug_start is free
            thread_gap (plug_end, plug_start-plug_end, gen);

            BOOL m = TRUE;
            while (m)
            {
                o = o + AlignQword (size (o));
                if (o >= heap_segment_allocated (seg))
                {
                    break;
                }
                m = uoh_object_marked (o, TRUE);
            }
            plug_end = o;
            dprintf (3, ("plug [%zx, %zx[", (size_t)plug_start, (size_t)plug_end));
        }
        else
        {
            while (o < heap_segment_allocated (seg) && !uoh_object_marked(o, FALSE))
            {
                o = o + AlignQword (size (o));
            }
        }
    }

    generation_allocation_segment (gen) = heap_segment_rw (generation_start_segment (gen));

    _ASSERTE(generation_allocation_segment(gen) != NULL);
}

void gc_heap::relocate_in_uoh_objects (int gen_num)
{
    generation* gen = generation_of (gen_num);

    heap_segment* seg = heap_segment_rw (generation_start_segment (gen));

    _ASSERTE(seg != NULL);

    uint8_t* o = get_uoh_start_object (seg, gen);

    while (1)
    {
        if (o >= heap_segment_allocated (seg))
        {
            seg = heap_segment_next_rw (seg);
            if (seg == 0)
                break;
            else
            {
                o = heap_segment_mem (seg);
            }
        }
        while (o < heap_segment_allocated (seg))
        {
            check_class_object_demotion (o);
            if (contain_pointers (o))
            {
                dprintf(3, ("Relocating through uoh object %zx", (size_t)o));
                go_through_object_nostart (method_table (o), o, size(o), pval,
                        {
                            reloc_survivor_helper (pval);
                        });
            }
            o = o + AlignQword (size (o));
        }
    }
}

void gc_heap::mark_through_cards_for_uoh_objects (card_fn fn,
                                                  int gen_num,
                                                  BOOL relocating
                                                  CARD_MARKING_STEALING_ARG(gc_heap* hpt))
{
#ifdef USE_REGIONS
    uint8_t*      low               = 0;
#else
    uint8_t*      low               = gc_low;
#endif //USE_REGIONS
    size_t        end_card          = 0;
    generation*   oldest_gen        = generation_of (gen_num);
    heap_segment* seg               = heap_segment_rw (generation_start_segment (oldest_gen));

    _ASSERTE(seg != NULL);

    uint8_t*      beg               = get_uoh_start_object (seg, oldest_gen);
    uint8_t*      end               = heap_segment_allocated (seg);

    size_t  cg_pointers_found = 0;

    size_t  card_word_end = (card_of (align_on_card_word (end)) /
                             card_word_width);

    size_t      n_eph             = 0;
    size_t      n_gen             = 0;
    size_t      n_card_set        = 0;

#ifdef USE_REGIONS
    uint8_t*    next_boundary = 0;
    uint8_t*    nhigh         = 0;
#else
    uint8_t*    next_boundary = (relocating ?
                              generation_plan_allocation_start (generation_of (max_generation -1)) :
                              ephemeral_low);

    uint8_t*    nhigh         = (relocating ?
                              heap_segment_plan_allocated (ephemeral_heap_segment) :
                              ephemeral_high);
#endif //USE_REGIONS
    BOOL          foundp            = FALSE;
    uint8_t*      start_address     = 0;
    uint8_t*      limit             = 0;
    size_t        card              = card_of (beg);
    uint8_t*      o                 = beg;
#ifdef BACKGROUND_GC
    BOOL consider_bgc_mark_p        = FALSE;
    BOOL check_current_sweep_p      = FALSE;
    BOOL check_saved_sweep_p        = FALSE;
    should_check_bgc_mark (seg, &consider_bgc_mark_p, &check_current_sweep_p, &check_saved_sweep_p);
#endif //BACKGROUND_GC

    size_t total_cards_cleared = 0;

#ifdef FEATURE_CARD_MARKING_STEALING
    VOLATILE(uint32_t)* chunk_index = (VOLATILE(uint32_t)*) &(gen_num == loh_generation ?
        card_mark_chunk_index_loh :
        card_mark_chunk_index_poh);

    card_marking_enumerator card_mark_enumerator(seg, low, chunk_index);
    card_word_end = 0;
#endif // FEATURE_CARD_MARKING_STEALING

#ifdef USE_REGIONS
    int condemned_gen = settings.condemned_generation;
#else
    int condemned_gen = -1;
#endif //USE_REGIONS

    //dprintf(3,( "scanning large objects from %zx to %zx", (size_t)beg, (size_t)end));
    dprintf(3, ("CMl: %zx->%zx", (size_t)beg, (size_t)end));
    while (1)
    {
        if ((o < end) && (card_of(o) > card))
        {
            dprintf (3, ("Found %zd cg pointers", cg_pointers_found));
            if (cg_pointers_found == 0)
            {
                uint8_t* last_object_processed = o;
#ifdef FEATURE_CARD_MARKING_STEALING
                last_object_processed = min(limit, o);
#endif // FEATURE_CARD_MARKING_STEALING
                dprintf (3, (" Clearing cards [%zx, %zx[ ", (size_t)card_address(card), (size_t)last_object_processed));
                clear_cards (card, card_of((uint8_t*)last_object_processed));
                total_cards_cleared += (card_of((uint8_t*)last_object_processed) - card);
            }
            n_eph +=cg_pointers_found;
            cg_pointers_found = 0;
            card = card_of ((uint8_t*)o);
        }
        if ((o < end) &&(card >= end_card))
        {
#ifdef FEATURE_CARD_MARKING_STEALING
            // find another chunk with some cards set
            foundp = find_next_chunk(card_mark_enumerator, seg, n_card_set, start_address, limit, card, end_card, card_word_end);
#else // FEATURE_CARD_MARKING_STEALING
            foundp = find_card (card_table, card, card_word_end, end_card);
            if (foundp)
            {
                n_card_set+= end_card - card;
                start_address = max (beg, card_address (card));
            }
            limit = min (end, card_address (end_card));
#endif  // FEATURE_CARD_MARKING_STEALING
        }
        if ((!foundp) || (o >= end) || (card_address (card) >= end))
        {
            if ((foundp) && (cg_pointers_found == 0))
            {
                dprintf(3,(" Clearing cards [%zx, %zx[ ", (size_t)card_address(card),
                           (size_t)card_address(card+1)));
                clear_cards (card, card+1);
                total_cards_cleared += 1;
            }
            n_eph +=cg_pointers_found;
            cg_pointers_found = 0;
#ifdef FEATURE_CARD_MARKING_STEALING
            // we have decided to move to the next segment - make sure we exhaust the chunk enumerator for this segment
            card_mark_enumerator.exhaust_segment(seg);
#endif // FEATURE_CARD_MARKING_STEALING
            if ((seg = heap_segment_next_rw (seg)) != 0)
            {
#ifdef BACKGROUND_GC
                should_check_bgc_mark (seg, &consider_bgc_mark_p, &check_current_sweep_p, &check_saved_sweep_p);
#endif //BACKGROUND_GC
                beg = heap_segment_mem (seg);
                end = compute_next_end (seg, low);
#ifdef FEATURE_CARD_MARKING_STEALING
                card_word_end = 0;
#else // FEATURE_CARD_MARKING_STEALING
                card_word_end = card_of (align_on_card_word (end)) / card_word_width;
#endif // FEATURE_CARD_MARKING_STEALING
                card = card_of (beg);
                o  = beg;
                end_card = 0;
                continue;
            }
            else
            {
                break;
            }
        }

        assert (card_set_p (card));
        {
            dprintf(3,("card %zx: o: %zx, l: %zx[ ",
                       card, (size_t)o, (size_t)limit));

            assert (Align (size (o)) >= Align (min_obj_size));
            size_t s = size (o);
            uint8_t* next_o =  o + AlignQword (s);
            Prefetch (next_o);

            while (o < limit)
            {
                s = size (o);
                assert (Align (s) >= Align (min_obj_size));
                next_o =  o + AlignQword (s);
                Prefetch (next_o);

                dprintf (4, ("|%zx|", (size_t)o));
                if (next_o < start_address)
                {
                    goto end_object;
                }

#ifdef BACKGROUND_GC
                if (!fgc_should_consider_object (o, seg, consider_bgc_mark_p, check_current_sweep_p, check_saved_sweep_p))
                {
                    goto end_object;
                }
#endif //BACKGROUND_GC

#ifdef COLLECTIBLE_CLASS
                if (is_collectible(o))
                {
                    BOOL passed_end_card_p = FALSE;

                    if (card_of (o) > card)
                    {
                        passed_end_card_p = card_transition (o, end, card_word_end,
                            cg_pointers_found,
                            n_eph, n_card_set,
                            card, end_card,
                            foundp, start_address,
                            limit, total_cards_cleared
                            CARD_MARKING_STEALING_ARGS(card_mark_enumerator, seg, card_word_end));
                    }

                    if ((!passed_end_card_p || foundp) && (card_of (o) == card))
                    {
                        // card is valid and it covers the head of the object
                        if (fn == &gc_heap::relocate_address)
                        {
                            cg_pointers_found++;
                        }
                        else
                        {
                            uint8_t* class_obj = get_class_object (o);
                            mark_through_cards_helper (&class_obj, n_gen,
                                                       cg_pointers_found, fn,
                                                       nhigh, next_boundary,
                                                       condemned_gen, max_generation CARD_MARKING_STEALING_ARG(hpt));
                        }
                    }

                    if (passed_end_card_p)
                    {
                        if (foundp && (card_address (card) < next_o))
                        {
                            goto go_through_refs;
                        }
                        else
                        {
                            goto end_object;
                        }
                    }
                }

go_through_refs:
#endif //COLLECTIBLE_CLASS

                if (contain_pointers (o))
                {
                    dprintf(3,("Going through %zx", (size_t)o));

                    go_through_object (method_table(o), o, s, poo,
                                       start_address, use_start, (o + s),
                       {
                           if (card_of ((uint8_t*)poo) > card)
                           {
                                BOOL passed_end_card_p  = card_transition ((uint8_t*)poo, end,
                                        card_word_end,
                                        cg_pointers_found,
                                        n_eph, n_card_set,
                                        card, end_card,
                                        foundp, start_address,
                                        limit, total_cards_cleared
                                        CARD_MARKING_STEALING_ARGS(card_mark_enumerator, seg, card_word_end));

                                if (passed_end_card_p)
                                {
                                    if (foundp && (card_address (card) < next_o))
                                    {
                                        //new_start();
                                        {
                                            if (ppstop <= (uint8_t**)start_address)
                                            {break;}
                                            else if (poo < (uint8_t**)start_address)
                                            {poo = (uint8_t**)start_address;}
                                        }
                                    }
                                    else
                                    {
                                        goto end_object;
                                    }
                                }
                            }

                           mark_through_cards_helper (poo, n_gen,
                                                      cg_pointers_found, fn,
                                                      nhigh, next_boundary,
                                                      condemned_gen, max_generation CARD_MARKING_STEALING_ARG(hpt));
                       }
                        );
                }

            end_object:
                o = next_o;
            }

        }
    }

    // compute the efficiency ratio of the card table
    if (!relocating)
    {
#ifdef FEATURE_CARD_MARKING_STEALING
        Interlocked::ExchangeAddPtr(&n_eph_loh, n_eph);
        Interlocked::ExchangeAddPtr(&n_gen_loh, n_gen);
        dprintf (3, ("h%d marking h%d Mloh: cross: %zd, useful: %zd, cards set: %zd, cards cleared: %zd, ratio: %d",
            hpt->heap_number, heap_number, n_eph, n_gen, n_card_set, total_cards_cleared,
            (n_eph ? (int)(((float)n_gen / (float)n_eph) * 100) : 0)));
        dprintf (3, ("h%d marking h%d Mloh: total cross %zd, useful: %zd, running ratio: %d",
            hpt->heap_number, heap_number, (size_t)n_eph_loh, (size_t)n_gen_loh,
            (n_eph_loh ? (int)(((float)n_gen_loh / (float)n_eph_loh) * 100) : 0)));
#else
        generation_skip_ratio = min (((n_eph > MIN_LOH_CROSS_GEN_REFS) ?
            (int)(((float)n_gen / (float)n_eph) * 100) : 100),
            generation_skip_ratio);
        dprintf (3, ("marking h%d Mloh: cross: %zd, useful: %zd, cards cleared: %zd, cards set: %zd, ratio: %d",
            heap_number, n_eph, n_gen, total_cards_cleared, n_card_set, generation_skip_ratio));
#endif //FEATURE_CARD_MARKING_STEALING
    }
    else
    {
        dprintf (3, ("R: Mloh: cross: %zd, useful: %zd, cards set: %zd, ratio: %d",
             n_eph, n_gen, n_card_set, generation_skip_ratio));
    }
}

void gc_heap::descr_generations_to_profiler (gen_walk_fn fn, void *context)
{
#ifdef MULTIPLE_HEAPS
    for (int i = 0; i < n_heaps; i++)
    {
        gc_heap* hp = g_heaps[i];
#else //MULTIPLE_HEAPS
    {
        gc_heap* hp = NULL;
#endif //MULTIPLE_HEAPS

        for (int curr_gen_number = total_generation_count-1; curr_gen_number >= 0; curr_gen_number--)
        {
            generation* gen = hp->generation_of (curr_gen_number);
            heap_segment* seg = heap_segment_rw (generation_start_segment (gen));
#ifdef USE_REGIONS
            while (seg)
            {
                fn(context, curr_gen_number, heap_segment_mem (seg),
                                              heap_segment_allocated (seg),
                                              heap_segment_reserved (seg));

                seg = heap_segment_next_rw (seg);
            }
#else
            while (seg && (seg != hp->ephemeral_heap_segment))
            {
                assert (curr_gen_number > 0);

                // report bounds from heap_segment_mem (seg) to
                // heap_segment_allocated (seg);
                // for generation # curr_gen_number
                // for heap # heap_no
                fn(context, curr_gen_number, heap_segment_mem (seg),
                                              heap_segment_allocated (seg),
                                              (curr_gen_number > max_generation) ?
                                                heap_segment_reserved (seg) : heap_segment_allocated (seg));

                seg = heap_segment_next_rw (seg);
            }

            if (seg)
            {
                assert (seg == hp->ephemeral_heap_segment);
                assert (curr_gen_number <= max_generation);

                if (curr_gen_number == max_generation)
                {
                    if (heap_segment_mem (seg) < generation_allocation_start (hp->generation_of (max_generation-1)))
                    {
                        // report bounds from heap_segment_mem (seg) to
                        // generation_allocation_start (generation_of (max_generation-1))
                        // for heap # heap_number
                        fn(context, curr_gen_number, heap_segment_mem (seg),
                                                      generation_allocation_start (hp->generation_of (max_generation-1)),
                                                      generation_allocation_start (hp->generation_of (max_generation-1)) );
                    }
                }
                else if (curr_gen_number != 0)
                {
                    //report bounds from generation_allocation_start (generation_of (curr_gen_number))
                    // to generation_allocation_start (generation_of (curr_gen_number-1))
                    // for heap # heap_number
                    fn(context, curr_gen_number, generation_allocation_start (hp->generation_of (curr_gen_number)),
                                                  generation_allocation_start (hp->generation_of (curr_gen_number-1)),
                                                  generation_allocation_start (hp->generation_of (curr_gen_number-1)));
                }
                else
                {
                    //report bounds from generation_allocation_start (generation_of (curr_gen_number))
                    // to heap_segment_allocated (ephemeral_heap_segment);
                    // for heap # heap_number
                    fn(context, curr_gen_number, generation_allocation_start (hp->generation_of (curr_gen_number)),
                                                  heap_segment_allocated (hp->ephemeral_heap_segment),
                                                  heap_segment_reserved (hp->ephemeral_heap_segment) );
                }
            }
#endif //USE_REGIONS
        }
    }
}

#ifdef TRACE_GC
// Note that when logging is on it can take a long time to go through the free items.
void gc_heap::print_free_list (int gen, heap_segment* seg)
{
    UNREFERENCED_PARAMETER(gen);
    UNREFERENCED_PARAMETER(seg);
/*
    if (settings.concurrent == FALSE)
    {
        uint8_t* seg_start = heap_segment_mem (seg);
        uint8_t* seg_end = heap_segment_allocated (seg);

        dprintf (3, ("Free list in seg %zx:", seg_start));

        size_t total_free_item = 0;

        allocator* gen_allocator = generation_allocator (generation_of (gen));
        for (unsigned int b = 0; b < gen_allocator->number_of_buckets(); b++)
        {
            uint8_t* fo = gen_allocator->alloc_list_head_of (b);
            while (fo)
            {
                if (fo >= seg_start && fo < seg_end)
                {
                    total_free_item++;

                    size_t free_item_len = size(fo);

                    dprintf (3, ("[%zx, %zx[:%zd",
                                 (size_t)fo,
                                 (size_t)(fo + free_item_len),
                                 free_item_len));
                }

                fo = free_list_slot (fo);
            }
        }

        dprintf (3, ("total %zd free items", total_free_item));
    }
*/
}
#endif //TRACE_GC

void gc_heap::descr_generations (const char* msg)
{
#ifndef TRACE_GC
    UNREFERENCED_PARAMETER(msg);
#endif //!TRACE_GC

#ifdef STRESS_LOG
    if (StressLog::StressLogOn(LF_GC, LL_INFO1000))
    {
        gc_heap* hp = 0;
#ifdef MULTIPLE_HEAPS
        hp= this;
#endif //MULTIPLE_HEAPS

        STRESS_LOG1(LF_GC, LL_INFO1000, "GC Heap %p\n", hp);
        for (int n = max_generation; n >= 0; --n)
        {
#ifndef USE_REGIONS
            STRESS_LOG4(LF_GC, LL_INFO1000, "    Generation %d [%p, %p] cur = %p\n",
                    n,
                    generation_allocation_start(generation_of(n)),
                    generation_allocation_limit(generation_of(n)),
                    generation_allocation_pointer(generation_of(n)));
#endif //USE_REGIONS

            heap_segment* seg = generation_start_segment(generation_of(n));
            while (seg)
            {
                STRESS_LOG4(LF_GC, LL_INFO1000, "        Segment mem %p alloc = %p used %p committed %p\n",
                        heap_segment_mem(seg),
                        heap_segment_allocated(seg),
                        heap_segment_used(seg),
                        heap_segment_committed(seg));
                seg = heap_segment_next(seg);
            }
        }
    }
#endif  // STRESS_LOG

#ifdef TRACE_GC
    dprintf (2, ("lowest_address: %zx highest_address: %zx",
             (size_t) lowest_address, (size_t) highest_address));
#ifdef BACKGROUND_GC
    dprintf (2, ("bgc lowest_address: %zx bgc highest_address: %zx",
             (size_t) background_saved_lowest_address, (size_t) background_saved_highest_address));
#endif //BACKGROUND_GC

    if (heap_number == 0)
    {
#ifdef USE_REGIONS
        size_t alloc_size = get_total_heap_size () / 1024 / 1024;
        size_t commit_size = get_total_committed_size () / 1024 / 1024;
        size_t frag_size = get_total_fragmentation () / 1024 / 1024;
        int total_new_gen0_regions_in_plns = get_total_new_gen0_regions_in_plns ();
        int total_new_regions_in_prr = get_total_new_regions_in_prr ();
        int total_new_regions_in_threading = get_total_new_regions_in_threading ();
        uint64_t elapsed_time_so_far = GetHighPrecisionTimeStamp () - process_start_time;

        size_t idx = VolatileLoadWithoutBarrier (&settings.gc_index);

        dprintf (REGIONS_LOG, ("[%s] GC#%5Id [%s] heap %Idmb (F: %Idmb %d%%) commit size: %Idmb, %0.3f min, %d,%d new in plan, %d in threading",
            msg, idx, (settings.promotion ? "PM" : "NPM"), alloc_size, frag_size,
            (int)((double)frag_size * 100.0 / (double)alloc_size),
            commit_size,
            (double)elapsed_time_so_far / (double)1000000 / (double)60,
            total_new_gen0_regions_in_plns, total_new_regions_in_prr, total_new_regions_in_threading));

        size_t total_gen_size_mb[loh_generation + 1] = { 0, 0, 0, 0 };
        size_t total_gen_fragmentation_mb[loh_generation + 1] = { 0, 0, 0, 0 };
        for (int i = 0; i < (loh_generation + 1); i++)
        {
            total_gen_size_mb[i] = get_total_generation_size (i) / 1024 / 1024;
            total_gen_fragmentation_mb[i] = get_total_gen_fragmentation (i) / 1024 / 1024;
        }

        int bgcs = VolatileLoadWithoutBarrier (&current_bgc_state);
#ifdef SIMPLE_DPRINTF
        dprintf (REGIONS_LOG, ("[%s] GC#%Id (bgcs: %d, %s) g0: %Idmb (f: %Idmb %d%%), g1: %Idmb (f: %Idmb %d%%), g2: %Idmb (f: %Idmb %d%%), g3: %Idmb (f: %Idmb %d%%)",
            msg, idx, bgcs, str_bgc_state[bgcs],
            total_gen_size_mb[0], total_gen_fragmentation_mb[0], (total_gen_size_mb[0] ? (int)((double)total_gen_fragmentation_mb[0] * 100.0 / (double)total_gen_size_mb[0]) : 0),
            total_gen_size_mb[1], total_gen_fragmentation_mb[1], (total_gen_size_mb[1] ? (int)((double)total_gen_fragmentation_mb[1] * 100.0 / (double)total_gen_size_mb[1]) : 0),
            total_gen_size_mb[2], total_gen_fragmentation_mb[2], (total_gen_size_mb[2] ? (int)((double)total_gen_fragmentation_mb[2] * 100.0 / (double)total_gen_size_mb[2]) : 0),
            total_gen_size_mb[3], total_gen_fragmentation_mb[3], (total_gen_size_mb[3] ? (int)((double)total_gen_fragmentation_mb[3] * 100.0 / (double)total_gen_size_mb[3]) : 0)));
#endif //SIMPLE_DPRINTF
        // print every 20 GCs so it's easy to see if we are making progress.
        if ((idx % 20) == 0)
        {
            dprintf (1, ("[%5s] GC#%5Id total heap size: %Idmb (F: %Idmb %d%%) commit size: %Idmb, %0.3f min, %d,%d new in plan, %d in threading\n",
                msg, idx, alloc_size, frag_size,
                (int)((double)frag_size * 100.0 / (double)alloc_size),
                commit_size,
                (double)elapsed_time_so_far / (double)1000000 / (double)60,
                total_new_gen0_regions_in_plns, total_new_regions_in_prr, total_new_regions_in_threading));
        }
#endif //USE_REGIONS
    }

    for (int curr_gen_number = total_generation_count - 1; curr_gen_number >= 0; curr_gen_number--)
    {
        size_t total_gen_size = generation_size (curr_gen_number);
#ifdef SIMPLE_DPRINTF
        dprintf (GTC_LOG, ("[%s][g%d]gen %d:, size: %zd, frag: %zd(L: %zd, O: %zd), f: %d%% %s %s %s",
                      msg,
                      settings.condemned_generation,
                      curr_gen_number,
                      total_gen_size,
                      dd_fragmentation (dynamic_data_of (curr_gen_number)),
                      generation_free_list_space (generation_of (curr_gen_number)),
                      generation_free_obj_space (generation_of (curr_gen_number)),
                      (total_gen_size ?
                        (int)(((double)dd_fragmentation (dynamic_data_of (curr_gen_number)) / (double)total_gen_size) * 100) :
                        0),
                      (settings.compaction ? "(compact)" : "(sweep)"),
                      (settings.heap_expansion ? "(EX)" : " "),
                      (settings.promotion ? "Promotion" : "NoPromotion")));
#else
        dprintf (2, ( "Generation %d: generation size: %zd, fragmentation: %zd",
                      curr_gen_number,
                      total_gen_size,
                      dd_fragmentation (dynamic_data_of (curr_gen_number))));
#endif //SIMPLE_DPRINTF

        generation* gen = generation_of (curr_gen_number);
        heap_segment* seg = heap_segment_rw (generation_start_segment (gen));
#ifdef USE_REGIONS
        dprintf (GTC_LOG, ("g%d: start seg: %p alloc seg: %p, tail region: %p",
            curr_gen_number,
            heap_segment_mem (seg),
            (generation_allocation_segment (gen) ? heap_segment_mem (generation_allocation_segment (gen)) : 0),
            heap_segment_mem (generation_tail_region (gen))));
        while (seg)
        {
            dprintf (GTC_LOG, ("g%d: (%d:p %d) [%zx %zx(sa: %zx, pa: %zx)[-%zx[ (%zd) (%zd)",
                               curr_gen_number,
                               heap_segment_gen_num (seg),
                               heap_segment_plan_gen_num (seg),
                               (size_t)heap_segment_mem (seg),
                               (size_t)heap_segment_allocated (seg),
                               (size_t)heap_segment_saved_allocated (seg),
                               (size_t)heap_segment_plan_allocated (seg),
                               (size_t)heap_segment_committed (seg),
                               (size_t)(heap_segment_allocated (seg) - heap_segment_mem (seg)),
                               (size_t)(heap_segment_committed (seg) - heap_segment_allocated (seg))));
            print_free_list (curr_gen_number, seg);
            seg = heap_segment_next (seg);
        }
#else
        while (seg && (seg != ephemeral_heap_segment))
        {
            dprintf (GTC_LOG, ("g%d: [%zx %zx[-%zx[ (%zd) (%zd)",
                        curr_gen_number,
                        (size_t)heap_segment_mem (seg),
                        (size_t)heap_segment_allocated (seg),
                        (size_t)heap_segment_committed (seg),
                        (size_t)(heap_segment_allocated (seg) - heap_segment_mem (seg)),
                        (size_t)(heap_segment_committed (seg) - heap_segment_allocated (seg))));
            print_free_list (curr_gen_number, seg);
            seg = heap_segment_next (seg);
        }
        if (seg && (seg != generation_start_segment (gen)))
        {
            dprintf (GTC_LOG, ("g%d: [%zx %zx[",
                         curr_gen_number,
                         (size_t)heap_segment_mem (seg),
                         (size_t)generation_allocation_start (generation_of (curr_gen_number-1))));
            print_free_list (curr_gen_number, seg);

        }
        else if (seg)
        {
            dprintf (GTC_LOG, ("g%d: [%zx %zx[",
                         curr_gen_number,
                         (size_t)generation_allocation_start (generation_of (curr_gen_number)),
                         (size_t)(((curr_gen_number == 0)) ?
                                  (heap_segment_allocated
                                   (generation_start_segment
                                    (generation_of (curr_gen_number)))) :
                                  (generation_allocation_start
                                   (generation_of (curr_gen_number - 1))))
                         ));
            print_free_list (curr_gen_number, seg);
        }
#endif //USE_REGIONS
    }

#endif //TRACE_GC
}

//-----------------------------------------------------------------------------
//
//                                  VM Specific support
//
//-----------------------------------------------------------------------------

//Static member variables.
VOLATILE(BOOL)    GCHeap::GcInProgress            = FALSE;
GCEvent           *GCHeap::WaitForGCEvent         = NULL;
unsigned          GCHeap::GcCondemnedGeneration   = 0;
size_t            GCHeap::totalSurvivedSize       = 0;
#ifdef FEATURE_PREMORTEM_FINALIZATION
CFinalize*        GCHeap::m_Finalize              = 0;
BOOL              GCHeap::GcCollectClasses        = FALSE;
VOLATILE(int32_t) GCHeap::m_GCFLock               = 0;

#ifndef FEATURE_NATIVEAOT // NativeAOT forces relocation a different way
#ifdef STRESS_HEAP
#ifndef MULTIPLE_HEAPS
OBJECTHANDLE      GCHeap::m_StressObjs[NUM_HEAP_STRESS_OBJS];
int               GCHeap::m_CurStressObj          = 0;
#endif // !MULTIPLE_HEAPS
#endif // STRESS_HEAP
#endif // FEATURE_NATIVEAOT

#endif //FEATURE_PREMORTEM_FINALIZATION

class NoGCRegionLockHolder
{
public:
    NoGCRegionLockHolder()
    {
        enter_spin_lock_noinstru(&g_no_gc_lock);
    }

    ~NoGCRegionLockHolder()
    {
        leave_spin_lock_noinstru(&g_no_gc_lock);
    }
};

enable_no_gc_region_callback_status gc_heap::enable_no_gc_callback(NoGCRegionCallbackFinalizerWorkItem* callback, uint64_t callback_threshold)
{
    dprintf(1, ("[no_gc_callback] calling enable_no_gc_callback with callback_threshold = %llu\n", callback_threshold));
    enable_no_gc_region_callback_status status = enable_no_gc_region_callback_status::succeed;
    suspend_EE();
    {
        if (!current_no_gc_region_info.started)
        {
            status = enable_no_gc_region_callback_status::not_started;
        }
        else if (current_no_gc_region_info.callback != nullptr)
        {
            status = enable_no_gc_region_callback_status::already_registered;
        }
        else
        {
            uint64_t total_original_soh_budget = 0;
            uint64_t total_original_loh_budget = 0;
#ifdef MULTIPLE_HEAPS
            for (int i = 0; i < gc_heap::n_heaps; i++)
            {
                gc_heap* hp = gc_heap::g_heaps [i];
#else
            {
                gc_heap* hp = pGenGCHeap;
#endif
                total_original_soh_budget += hp->soh_allocation_no_gc;
                total_original_loh_budget += hp->loh_allocation_no_gc;
            }
            uint64_t total_original_budget = total_original_soh_budget + total_original_loh_budget;
            if (total_original_budget >= callback_threshold)
            {
                uint64_t total_withheld = total_original_budget - callback_threshold;

                float soh_ratio = ((float)total_original_soh_budget)/total_original_budget;
                float loh_ratio = ((float)total_original_loh_budget)/total_original_budget;

                size_t soh_withheld_budget = (size_t)(soh_ratio * total_withheld);
                size_t loh_withheld_budget = (size_t)(loh_ratio * total_withheld);

#ifdef MULTIPLE_HEAPS
                soh_withheld_budget = soh_withheld_budget / gc_heap::n_heaps;
                loh_withheld_budget = loh_withheld_budget / gc_heap::n_heaps;
#endif
                soh_withheld_budget = max(soh_withheld_budget, (size_t)1);
                soh_withheld_budget = Align(soh_withheld_budget, get_alignment_constant (TRUE));
                loh_withheld_budget = Align(loh_withheld_budget, get_alignment_constant (FALSE));
#ifdef MULTIPLE_HEAPS
                for (int i = 0; i < gc_heap::n_heaps; i++)
                {
                    gc_heap* hp = gc_heap::g_heaps [i];
#else
                {
                    gc_heap* hp = pGenGCHeap;
#endif
                    if (dd_new_allocation (hp->dynamic_data_of (soh_gen0)) <= (ptrdiff_t)soh_withheld_budget)
                    {
                        dprintf(1, ("[no_gc_callback] failed because of running out of soh budget= %llu\n", soh_withheld_budget));
                        status = insufficient_budget;
                    }
                    if (dd_new_allocation (hp->dynamic_data_of (loh_generation)) <= (ptrdiff_t)loh_withheld_budget)
                    {
                        dprintf(1, ("[no_gc_callback] failed because of running out of loh budget= %llu\n", loh_withheld_budget));
                        status = insufficient_budget;
                    }
                }

                if (status == enable_no_gc_region_callback_status::succeed)
                {
                    dprintf(1, ("[no_gc_callback] enabling succeed\n"));
#ifdef MULTIPLE_HEAPS
                    for (int i = 0; i < gc_heap::n_heaps; i++)
                    {
                        gc_heap* hp = gc_heap::g_heaps [i];
#else
                    {
                        gc_heap* hp = pGenGCHeap;
#endif
                        dd_new_allocation (hp->dynamic_data_of (soh_gen0)) -= soh_withheld_budget;
                        dd_new_allocation (hp->dynamic_data_of (loh_generation)) -= loh_withheld_budget;
                    }
                    current_no_gc_region_info.soh_withheld_budget = soh_withheld_budget;
                    current_no_gc_region_info.loh_withheld_budget = loh_withheld_budget;
                    current_no_gc_region_info.callback = callback;
                }
            }
            else
            {
                status = enable_no_gc_region_callback_status::insufficient_budget;
            }
        }
    }
    restart_EE();

    return status;
}

#ifdef BACKGROUND_GC
BOOL gc_heap::bgc_mark_array_range (heap_segment* seg,
                                    BOOL whole_seg_p,
                                    uint8_t** range_beg,
                                    uint8_t** range_end)
{
    uint8_t* seg_start = heap_segment_mem (seg);
    uint8_t* seg_end = (whole_seg_p ? heap_segment_reserved (seg) : align_on_mark_word (heap_segment_allocated (seg)));

    if ((seg_start < background_saved_highest_address) &&
        (seg_end > background_saved_lowest_address))
    {
        *range_beg = max (seg_start, background_saved_lowest_address);
        *range_end = min (seg_end, background_saved_highest_address);
        return TRUE;
    }
    else
    {
        return FALSE;
    }
}

void gc_heap::bgc_verify_mark_array_cleared (heap_segment* seg, bool always_verify_p)
{
#ifdef _DEBUG
    if (gc_heap::background_running_p() || always_verify_p)
    {
        uint8_t* range_beg = 0;
        uint8_t* range_end = 0;

        if (bgc_mark_array_range (seg, TRUE, &range_beg, &range_end) || always_verify_p)
        {
            if (always_verify_p)
            {
                range_beg = heap_segment_mem (seg);
                range_end = heap_segment_reserved (seg);
            }
            size_t  markw = mark_word_of (range_beg);
            size_t  markw_end = mark_word_of (range_end);
            while (markw < markw_end)
            {
                if (mark_array [markw])
                {
                    dprintf (1, ("The mark bits at 0x%zx:0x%u(addr: 0x%p) were not cleared",
                                    markw, mark_array [markw], mark_word_address (markw)));
                    FATAL_GC_ERROR();
                }
                markw++;
            }
            uint8_t* p = mark_word_address (markw_end);
            while (p < range_end)
            {
                assert (!(mark_array_marked (p)));
                p++;
            }
        }
    }
#endif //_DEBUG
}

void gc_heap::verify_mark_bits_cleared (uint8_t* obj, size_t s)
{
#ifdef VERIFY_HEAP
    size_t start_mark_bit = mark_bit_of (obj) + 1;
    size_t end_mark_bit = mark_bit_of (obj + s);
    unsigned int startbit = mark_bit_bit (start_mark_bit);
    unsigned int endbit = mark_bit_bit (end_mark_bit);
    size_t startwrd = mark_bit_word (start_mark_bit);
    size_t endwrd = mark_bit_word (end_mark_bit);
    unsigned int result = 0;

    unsigned int firstwrd = ~(lowbits (~0, startbit));
    unsigned int lastwrd = ~(highbits (~0, endbit));

    if (startwrd == endwrd)
    {
        unsigned int wrd = firstwrd & lastwrd;
        result = mark_array[startwrd] & wrd;
        if (result)
        {
            FATAL_GC_ERROR();
        }
        return;
    }

    // verify the first mark word is cleared.
    if (startbit)
    {
        result = mark_array[startwrd] & firstwrd;
        if (result)
        {
            FATAL_GC_ERROR();
        }
        startwrd++;
    }

    for (size_t wrdtmp = startwrd; wrdtmp < endwrd; wrdtmp++)
    {
        result = mark_array[wrdtmp];
        if (result)
        {
            FATAL_GC_ERROR();
        }
    }

    // set the last mark word.
    if (endbit)
    {
        result = mark_array[endwrd] & lastwrd;
        if (result)
        {
            FATAL_GC_ERROR();
        }
    }
#endif //VERIFY_HEAP
}

void gc_heap::verify_mark_array_cleared()
{
#ifdef VERIFY_HEAP
    if (gc_heap::background_running_p() &&
        (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_GC))
    {
        for (int i = get_start_generation_index(); i < total_generation_count; i++)
        {
            generation* gen = generation_of (i);
            heap_segment* seg = heap_segment_rw (generation_start_segment (gen));

            while (seg)
            {
                bgc_verify_mark_array_cleared (seg);
                seg = heap_segment_next_rw (seg);
            }
        }
    }
#endif //VERIFY_HEAP
}
#endif //BACKGROUND_GC

// This function is called to make sure we don't mess up the segment list
// in SOH. It's called by:
// 1) begin and end of ephemeral GCs
// 2) during bgc sweep when we switch segments.
void gc_heap::verify_soh_segment_list()
{
#ifdef VERIFY_HEAP
    if (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_GC)
    {
        for (int i = get_start_generation_index(); i <= max_generation; i++)
        {
            generation* gen = generation_of (i);
            heap_segment* seg = heap_segment_rw (generation_start_segment (gen));
            heap_segment* last_seg = 0;
            while (seg)
            {
                last_seg = seg;
                seg = heap_segment_next_rw (seg);
            }
#ifdef USE_REGIONS
            if (last_seg != generation_tail_region (gen))
#else
            if (last_seg != ephemeral_heap_segment)
#endif //USE_REGIONS
            {
                FATAL_GC_ERROR();
            }
        }
    }
#endif //VERIFY_HEAP
}

// This function can be called at any foreground GCs or blocking GCs. For background GCs,
// it can be called at the end of the final marking; and at any point during background
// sweep.
// NOTE - to be able to call this function during background sweep, we need to temporarily
// NOT clear the mark array bits as we go.
#ifdef BACKGROUND_GC
void gc_heap::verify_partial()
{
    // Different ways to fail.
    BOOL mark_missed_p = FALSE;
    BOOL bad_ref_p = FALSE;
    BOOL free_ref_p = FALSE;

    for (int i = get_start_generation_index(); i < total_generation_count; i++)
    {
        generation* gen = generation_of (i);
        int align_const = get_alignment_constant (i == max_generation);
        heap_segment* seg = heap_segment_rw (generation_start_segment (gen));

        while (seg)
        {
            uint8_t* o = heap_segment_mem (seg);
            uint8_t* end = heap_segment_allocated (seg);

            while (o < end)
            {
                size_t s = size (o);

                BOOL marked_p = background_object_marked (o, FALSE);

                if (marked_p)
                {
                    go_through_object_cl (method_table (o), o, s, oo,
                        {
                            if (*oo)
                            {
                                //dprintf (3, ("VOM: verifying member %zx in obj %zx", (size_t)*oo, o));
                                MethodTable *pMT = method_table (*oo);

                                if (pMT == g_gc_pFreeObjectMethodTable)
                                {
                                    free_ref_p = TRUE;
                                    FATAL_GC_ERROR();
                                }

                                if (!pMT->SanityCheck())
                                {
                                    bad_ref_p = TRUE;
                                    dprintf (1, ("Bad member of %zx %zx",
                                                (size_t)oo, (size_t)*oo));
                                    FATAL_GC_ERROR();
                                }

                                if (current_bgc_state == bgc_final_marking)
                                {
                                    if (marked_p && !background_object_marked (*oo, FALSE))
                                    {
                                        mark_missed_p = TRUE;
                                        FATAL_GC_ERROR();
                                    }
                                }
                            }
                        }
                    );
                }

                o = o + Align(s, align_const);
            }
            seg = heap_segment_next_rw (seg);
        }
    }
}
#endif //BACKGROUND_GC

#ifdef VERIFY_HEAP
void
gc_heap::verify_free_lists ()
{
    for (int gen_num = 0; gen_num < total_generation_count; gen_num++)
    {
        dprintf (3, ("Verifying free list for gen:%d", gen_num));
        allocator* gen_alloc = generation_allocator (generation_of (gen_num));
        size_t sz = gen_alloc->first_bucket_size();
        bool verify_undo_slot = (gen_num != 0) && (gen_num <= max_generation) && !gen_alloc->discard_if_no_fit_p();

        for (unsigned int a_l_number = 0; a_l_number < gen_alloc->number_of_buckets(); a_l_number++)
        {
            uint8_t* free_list = gen_alloc->alloc_list_head_of (a_l_number);
            uint8_t* prev = 0;
            while (free_list)
            {
                if (!((CObjectHeader*)free_list)->IsFree())
                {
                    dprintf (1, ("Verifiying Heap: curr free list item %zx isn't a free object",
                                 (size_t)free_list));
                    FATAL_GC_ERROR();
                }
                if (((a_l_number < (gen_alloc->number_of_buckets()-1))&& (unused_array_size (free_list) >= sz))
                    || ((a_l_number != 0) && (unused_array_size (free_list) < sz/2)))
                {
                    dprintf (1, ("Verifiying Heap: curr free list item %zx isn't in the right bucket",
                                 (size_t)free_list));
                    FATAL_GC_ERROR();
                }
                if (verify_undo_slot && (free_list_undo (free_list) != UNDO_EMPTY))
                {
                    dprintf (1, ("Verifiying Heap: curr free list item %zx has non empty undo slot",
                                 (size_t)free_list));
                    FATAL_GC_ERROR();
                }
                if ((gen_num <= max_generation) && (object_gennum (free_list)!= gen_num))
                {
                    dprintf (1, ("Verifiying Heap: curr free list item %zx is in the wrong generation free list",
                                 (size_t)free_list));
                    FATAL_GC_ERROR();
                }

#ifdef DOUBLY_LINKED_FL
                uint8_t* prev_free_item = free_list_prev (free_list);
                if (gen_num == max_generation)
                {
                    if (prev_free_item != prev)
                    {
                        dprintf (1, ("%p prev should be: %p, actual: %p", free_list, prev_free_item, prev));
                        FATAL_GC_ERROR();
                    }
                }
#endif //DOUBLY_LINKED_FL

#if defined(USE_REGIONS) && defined(MULTIPLE_HEAPS)
                heap_segment* region = region_of (free_list);
                if ((region->heap != this) && ((gen_num != max_generation) || (!trigger_bgc_for_rethreading_p)))
                {
                    // The logic in change_heap_count depends on the coming BGC (or blocking gen 2) to rebuild the gen 2 free list.
                    // In that case, before the rebuild happens, the gen2 free list is expected to contain free list items that do not belong to the right heap.
                    dprintf (1, ("curr free item %p should be on heap %d, but actually is on heap %d: %d", free_list, this->heap_number, region->heap->heap_number));
                    FATAL_GC_ERROR();
                }
#endif //USE_REGIONS && MULTIPLE_HEAPS
                prev = free_list;
                free_list = free_list_slot (free_list);
            }
            //verify the sanity of the tail
            uint8_t* tail = gen_alloc->alloc_list_tail_of (a_l_number);
            if (!((tail == 0) || (tail == prev)))
            {
                dprintf (1, ("Verifying Heap: tail of free list is not correct, tail %p, prev %p", tail, prev));
                FATAL_GC_ERROR();
            }
            if (tail == 0)
            {
                uint8_t* head = gen_alloc->alloc_list_head_of (a_l_number);
                if ((head != 0) && (free_list_slot (head) != 0))
                {
                    dprintf (1, ("Verifying Heap: head of free list is not correct, head %p -> %p",
                        head, free_list_slot (head)));
                    FATAL_GC_ERROR();
                }
            }

            sz *=2;
        }
    }
}

void gc_heap::verify_committed_bytes_per_heap()
{
    size_t committed_bookkeeping = 0; // unused
    for (int oh = soh; oh < total_oh_count; oh++)
    {
#ifdef MULTIPLE_HEAPS
        assert (committed_by_oh_per_heap[oh] == compute_committed_bytes_per_heap (oh, committed_bookkeeping));
#else
        assert (committed_by_oh[oh] == compute_committed_bytes_per_heap (oh, committed_bookkeeping));
#endif //MULTIPLE_HEAPS
    }
}

void gc_heap::verify_committed_bytes()
{
    size_t total_committed = 0;
    size_t committed_decommit; // unused
    size_t committed_free; // unused
    size_t committed_bookkeeping = 0;
    size_t new_current_total_committed;
    size_t new_current_total_committed_bookkeeping;
    size_t new_committed_by_oh[recorded_committed_bucket_counts];
    compute_committed_bytes(total_committed, committed_decommit, committed_free,
                            committed_bookkeeping, new_current_total_committed, new_current_total_committed_bookkeeping,
                            new_committed_by_oh);
#ifdef MULTIPLE_HEAPS
    for (int h = 0; h < n_heaps; h++)
    {
        for (int oh = soh; oh < total_oh_count; oh++)
        {
            assert (g_heaps[h]->committed_by_oh_per_heap[oh] == g_heaps[h]->committed_by_oh_per_heap_refresh[oh]);
        }
    }
    for (int i = 0; i < recorded_committed_bucket_counts; i++)
    {
        assert (new_committed_by_oh[i] == committed_by_oh[i]);
    }
#endif //MULTIPLE_HEAPS
    assert (new_current_total_committed_bookkeeping == current_total_committed_bookkeeping);
    assert (new_current_total_committed == current_total_committed);
}

#ifdef USE_REGIONS
void gc_heap::verify_regions (int gen_number, bool can_verify_gen_num, bool can_verify_tail)
{
#ifdef _DEBUG
    // For the given generation, verify that
    //
    // 1) it has at least one region.
    // 2) the tail region is the same as the last region if we following the list of regions
    // in that generation.
    // 3) no region is pointing to itself.
    // 4) if we can verify gen num, each region's gen_num and plan_gen_num are the same and
    // they are the right generation.
    generation* gen = generation_of (gen_number);
    int num_regions_in_gen = 0;
    heap_segment* seg_in_gen = heap_segment_rw (generation_start_segment (gen));
    heap_segment* prev_region_in_gen = 0;
    heap_segment* tail_region = generation_tail_region (gen);

    while (seg_in_gen)
    {
        if (can_verify_gen_num)
        {
            if (heap_segment_gen_num (seg_in_gen) != min (gen_number, (int)max_generation))
            {
                dprintf (REGIONS_LOG, ("h%d gen%d region %p(%p) gen is %d!",
                    heap_number, gen_number, seg_in_gen, heap_segment_mem (seg_in_gen),
                    heap_segment_gen_num (seg_in_gen)));
                FATAL_GC_ERROR();
            }
            if (heap_segment_gen_num (seg_in_gen) != heap_segment_plan_gen_num (seg_in_gen))
            {
                dprintf (REGIONS_LOG, ("h%d gen%d region %p(%p) gen is %d but plan gen is %d!!",
                    heap_number, gen_number, seg_in_gen, heap_segment_mem (seg_in_gen),
                    heap_segment_gen_num (seg_in_gen), heap_segment_plan_gen_num (seg_in_gen)));
                FATAL_GC_ERROR();
            }
        }

        if (heap_segment_allocated (seg_in_gen) > heap_segment_reserved (seg_in_gen))
        {
            dprintf (REGIONS_LOG, ("h%d gen%d region %p alloc %p > reserved %p!!",
                heap_number, gen_number, heap_segment_mem (seg_in_gen),
                heap_segment_allocated (seg_in_gen), heap_segment_reserved (seg_in_gen)));
            FATAL_GC_ERROR();
        }

        prev_region_in_gen = seg_in_gen;
        num_regions_in_gen++;
        heap_segment* next_region = heap_segment_next (seg_in_gen);
        if (seg_in_gen == next_region)
        {
            dprintf (REGIONS_LOG, ("h%d gen%d region %p(%p) pointing to itself!!",
                heap_number, gen_number, seg_in_gen, heap_segment_mem (seg_in_gen)));
            FATAL_GC_ERROR();
        }
        seg_in_gen = next_region;
    }

    if (num_regions_in_gen == 0)
    {
        dprintf (REGIONS_LOG, ("h%d gen%d has no regions!!", heap_number, gen_number));
        FATAL_GC_ERROR();
    }

    if (can_verify_tail && (tail_region != prev_region_in_gen))
    {
        dprintf (REGIONS_LOG, ("h%d gen%d tail region is %p(%p), diff from last region %p(%p)!!",
            heap_number, gen_number,
            tail_region, heap_segment_mem (tail_region),
            prev_region_in_gen, heap_segment_mem (prev_region_in_gen)));
        FATAL_GC_ERROR();
    }
#endif // _DEBUG
}

inline bool is_user_alloc_gen (int gen_number)
{
    return ((gen_number == soh_gen0) || (gen_number == loh_generation) || (gen_number == poh_generation));
}

void gc_heap::verify_regions (bool can_verify_gen_num, bool concurrent_p)
{
#ifdef _DEBUG
    for (int i = 0; i < total_generation_count; i++)
    {
        bool can_verify_tail = (concurrent_p ? !is_user_alloc_gen (i) : true);
        verify_regions (i, can_verify_gen_num, can_verify_tail);

        if (can_verify_gen_num &&
            can_verify_tail &&
            (i >= max_generation))
        {
            verify_committed_bytes_per_heap ();
        }
    }
#endif // _DEBUG
}
#endif // USE_REGIONS

BOOL gc_heap::check_need_card (uint8_t* child_obj, int gen_num_for_cards,
                               uint8_t* low, uint8_t* high)
{
#ifdef USE_REGIONS
    return (is_in_heap_range (child_obj) && (get_region_gen_num (child_obj) < gen_num_for_cards));
#else
    return ((child_obj < high) && (child_obj >= low));
#endif //USE_REGIONS
}

void gc_heap::enter_gc_lock_for_verify_heap()
{
#ifdef VERIFY_HEAP
    if (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_GC)
    {
        enter_spin_lock (&gc_heap::gc_lock);
        dprintf (SPINLOCK_LOG, ("enter gc_lock for verify_heap"));
    }
#endif // VERIFY_HEAP
}

void gc_heap::leave_gc_lock_for_verify_heap()
{
#ifdef VERIFY_HEAP
    if (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_GC)
    {
        dprintf (SPINLOCK_LOG, ("leave gc_lock taken for verify_heap"));
        leave_spin_lock (&gc_heap::gc_lock);
    }
#endif // VERIFY_HEAP
}

void gc_heap::verify_heap (BOOL begin_gc_p)
{
    int heap_verify_level = static_cast<int>(GCConfig::GetHeapVerifyLevel());

#ifdef MULTIPLE_HEAPS
    t_join* current_join = &gc_t_join;
#ifdef BACKGROUND_GC
    if (settings.concurrent && (bgc_thread_id.IsCurrentThread()))
    {
        // We always call verify_heap on entry of GC on the SVR GC threads.
        current_join = &bgc_t_join;
    }
#endif //BACKGROUND_GC
#endif //MULTIPLE_HEAPS

#ifndef TRACE_GC
    UNREFERENCED_PARAMETER(begin_gc_p);
#endif //!TRACE_GC

#ifdef BACKGROUND_GC
    dprintf (2,("[%s]GC#%zu(%s): Verifying heap - begin",
        (begin_gc_p ? "BEG" : "END"),
        VolatileLoad(&settings.gc_index),
        get_str_gc_type()));
#else
    dprintf (2,("[%s]GC#%zu: Verifying heap - begin",
                (begin_gc_p ? "BEG" : "END"), VolatileLoad(&settings.gc_index)));
#endif //BACKGROUND_GC

#ifndef MULTIPLE_HEAPS
#ifndef USE_REGIONS
    if ((ephemeral_low != generation_allocation_start (generation_of (max_generation - 1))) ||
        (ephemeral_high != heap_segment_reserved (ephemeral_heap_segment)))
    {
        FATAL_GC_ERROR();
    }
#endif //!USE_REGIONS
#endif //MULTIPLE_HEAPS

#ifdef BACKGROUND_GC
    //don't touch the memory because the program is allocating from it.
    if (!settings.concurrent)
#endif //BACKGROUND_GC
    {
        if (!(heap_verify_level & GCConfig::HEAPVERIFY_NO_MEM_FILL))
        {
            // 0xaa the unused portions of segments.
            for (int i = get_start_generation_index(); i < total_generation_count; i++)
            {
                generation* gen1 = generation_of (i);
                heap_segment* seg1 = heap_segment_rw (generation_start_segment (gen1));

                while (seg1)
                {
                    uint8_t* clear_start = heap_segment_allocated (seg1) - plug_skew;
                    if (heap_segment_used (seg1) > clear_start)
                    {
                        dprintf (3, ("setting end of seg %p: [%p-[%p to 0xaa",
                            heap_segment_mem (seg1),
                            clear_start ,
                            heap_segment_used (seg1)));
                        memset (heap_segment_allocated (seg1) - plug_skew, 0xaa,
                            (heap_segment_used (seg1) - clear_start));
                    }
                    seg1 = heap_segment_next_rw (seg1);
                }
            }
        }
    }

#ifndef USE_REGIONS
#ifdef MULTIPLE_HEAPS
    current_join->join(this, gc_join_verify_copy_table);
    if (current_join->joined())
#endif //MULTIPLE_HEAPS
    {
        // in concurrent GC, new segment could be allocated when GC is working so the card brick table might not be updated at this point
        copy_brick_card_table_on_growth ();

#ifdef MULTIPLE_HEAPS
        current_join->restart();
#endif //MULTIPLE_HEAPS
    }
#endif //!USE_REGIONS

    //verify that the generation structures makes sense
    {
#ifdef _DEBUG
#ifdef USE_REGIONS
        verify_regions (true, settings.concurrent);
#else //USE_REGIONS
        generation* gen = generation_of (max_generation);

        assert (generation_allocation_start (gen) ==
                heap_segment_mem (heap_segment_rw (generation_start_segment (gen))));
        int gen_num = max_generation-1;
        generation* prev_gen = gen;
        while (gen_num >= 0)
        {
            gen = generation_of (gen_num);
            assert (generation_allocation_segment (gen) == ephemeral_heap_segment);
            assert (generation_allocation_start (gen) >= heap_segment_mem (ephemeral_heap_segment));
            assert (generation_allocation_start (gen) < heap_segment_allocated (ephemeral_heap_segment));

            if (generation_start_segment (prev_gen ) ==
                generation_start_segment (gen))
            {
                assert (generation_allocation_start (prev_gen) <
                        generation_allocation_start (gen));
            }
            prev_gen = gen;
            gen_num--;
        }
#endif //USE_REGIONS
#endif //_DEBUG
    }

    size_t          total_objects_verified = 0;
    size_t          total_objects_verified_deep = 0;

    BOOL            bCurrentBrickInvalid = FALSE;
    size_t          last_valid_brick = 0;
    size_t          curr_brick = 0;
    size_t          prev_brick = (size_t)-1;
    int             gen_num_for_cards = 0;
#ifdef USE_REGIONS
    int             gen_num_to_stop = 0;
    uint8_t*        e_high = 0;
    uint8_t*        next_boundary = 0;
#else //USE_REGIONS
    // For no regions the gen number is separately reduced when we detect the ephemeral seg.
    int gen_num_to_stop = max_generation;
    uint8_t*        e_high = ephemeral_high;
    uint8_t*        next_boundary = generation_allocation_start (generation_of (max_generation - 1));
    uint8_t*        begin_youngest = generation_allocation_start(generation_of(0));
#endif //!USE_REGIONS

    // go through all generations starting with the highest
    for (int curr_gen_num = total_generation_count - 1; curr_gen_num >= gen_num_to_stop; curr_gen_num--)
    {
        int             align_const = get_alignment_constant (curr_gen_num == max_generation);
        BOOL            large_brick_p = (curr_gen_num > max_generation);
#ifdef USE_REGIONS
        gen_num_for_cards = ((curr_gen_num >= max_generation) ? max_generation : curr_gen_num);
#endif //USE_REGIONS
        heap_segment*   seg = heap_segment_in_range (generation_start_segment (generation_of (curr_gen_num) ));

        while (seg)
        {
            uint8_t*        curr_object = heap_segment_mem (seg);
            uint8_t*        prev_object = 0;

            bool verify_bricks_p = true;
#ifdef USE_REGIONS
            if (heap_segment_read_only_p(seg))
            {
                dprintf(1, ("seg %zx is ro! Shouldn't happen with regions", (size_t)seg));
                FATAL_GC_ERROR();
            }
            if (heap_segment_gen_num (seg) != heap_segment_plan_gen_num (seg))
            {
                dprintf (1, ("Seg %p, gen num is %d, plan gen num is %d",
                    heap_segment_mem (seg), heap_segment_gen_num (seg), heap_segment_plan_gen_num (seg)));
                FATAL_GC_ERROR();
            }
#else //USE_REGIONS
            if (heap_segment_read_only_p(seg))
            {
                size_t current_brick = brick_of(max(heap_segment_mem(seg), lowest_address));
                size_t end_brick = brick_of(min(heap_segment_reserved(seg), highest_address) - 1);
                while (current_brick <= end_brick)
                {
                    if (brick_table[current_brick] != 0)
                    {
                        dprintf(1, ("Verifying Heap: %zx brick of a frozen segment is not zeroed", current_brick));
                        FATAL_GC_ERROR();
                    }
                    current_brick++;
                }
                verify_bricks_p = false;
            }
#endif //USE_REGIONS

#ifdef BACKGROUND_GC
            BOOL consider_bgc_mark_p    = FALSE;
            BOOL check_current_sweep_p  = FALSE;
            BOOL check_saved_sweep_p    = FALSE;
            should_check_bgc_mark (seg, &consider_bgc_mark_p, &check_current_sweep_p, &check_saved_sweep_p);
#endif //BACKGROUND_GC

            while (curr_object < heap_segment_allocated (seg))
            {
                if (is_mark_set (curr_object))
                {
                    dprintf (1, ("curr_object: %zx is marked!",(size_t)curr_object));
                    FATAL_GC_ERROR();
                }

                size_t s = size (curr_object);
                dprintf (3, ("o: %zx, s: %zu", (size_t)curr_object, s));
                if (s == 0)
                {
                    dprintf (1, ("Verifying Heap: size of current object %p == 0", curr_object));
                    FATAL_GC_ERROR();
                }

#ifndef USE_REGIONS
                // handle generation boundaries within ephemeral segment
                if (seg == ephemeral_heap_segment)
                {
                    if ((curr_gen_num > 0) && (curr_object >= next_boundary))
                    {
                        curr_gen_num--;
                        if (curr_gen_num > 0)
                        {
                            next_boundary = generation_allocation_start (generation_of (curr_gen_num - 1));
                        }
                    }
                }
#endif //!USE_REGIONS

#ifdef USE_REGIONS
                if (verify_bricks_p && curr_gen_num != 0)
#else
                // If object is not in the youngest generation, then lets
                // verify that the brick table is correct....
                if (verify_bricks_p && ((seg != ephemeral_heap_segment) ||
                     (brick_of(curr_object) < brick_of(begin_youngest))))
#endif //USE_REGIONS
                {
                    curr_brick = brick_of(curr_object);

                    // Brick Table Verification...
                    //
                    // On brick transition
                    //     if brick is negative
                    //          verify that brick indirects to previous valid brick
                    //     else
                    //          set current brick invalid flag to be flipped if we
                    //          encounter an object at the correct place
                    //
                    if (curr_brick != prev_brick)
                    {
                        // If the last brick we were examining had positive
                        // entry but we never found the matching object, then
                        // we have a problem
                        // If prev_brick was the last one of the segment
                        // it's ok for it to be invalid because it is never looked at
                        if (bCurrentBrickInvalid &&
                            (curr_brick != brick_of (heap_segment_mem (seg))) &&
                            !heap_segment_read_only_p (seg))
                        {
                            dprintf (1, ("curr brick %zx invalid", curr_brick));
                            FATAL_GC_ERROR();
                        }

                        if (large_brick_p)
                        {
                            //large objects verify the table only if they are in
                            //range.
                            if ((heap_segment_reserved (seg) <= highest_address) &&
                                (heap_segment_mem (seg) >= lowest_address) &&
                                brick_table [curr_brick] != 0)
                            {
                                dprintf (1, ("curr_brick %zx for large object %zx is set to %zx",
                                    curr_brick, (size_t)curr_object, (size_t)brick_table[curr_brick]));
                                FATAL_GC_ERROR();
                            }
                            else
                            {
                                bCurrentBrickInvalid = FALSE;
                            }
                        }
                        else
                        {
                            // If the current brick contains a negative value make sure
                            // that the indirection terminates at the last  valid brick
                            if (brick_table [curr_brick] <= 0)
                            {
                                if (brick_table [curr_brick] == 0)
                                {
                                    dprintf(1, ("curr_brick %zx for object %zx set to 0",
                                            curr_brick, (size_t)curr_object));
                                    FATAL_GC_ERROR();
                                }
                                ptrdiff_t i = curr_brick;
                                while ((i >= ((ptrdiff_t) brick_of (heap_segment_mem (seg)))) &&
                                       (brick_table[i] < 0))
                                {
                                    i = i + brick_table[i];
                                }
                                if (i <  ((ptrdiff_t)(brick_of (heap_segment_mem (seg))) - 1))
                                {
                                    dprintf (1, ("ptrdiff i: %zx < brick_of (heap_segment_mem (seg)):%zx - 1. curr_brick: %zx",
                                            i, brick_of (heap_segment_mem (seg)),
                                            curr_brick));
                                    FATAL_GC_ERROR();
                                }
                                bCurrentBrickInvalid = FALSE;
                            }
                            else if (!heap_segment_read_only_p (seg))
                            {
                                bCurrentBrickInvalid = TRUE;
                            }
                        }
                    }

                    if (bCurrentBrickInvalid)
                    {
                        if (curr_object == (brick_address(curr_brick) + brick_table[curr_brick] - 1))
                        {
                            bCurrentBrickInvalid = FALSE;
                            last_valid_brick = curr_brick;
                        }
                    }
                }

                if (*((uint8_t**)curr_object) != (uint8_t *) g_gc_pFreeObjectMethodTable)
                {
#ifdef FEATURE_LOH_COMPACTION
                    if ((curr_gen_num == loh_generation) && (prev_object != 0))
                    {
                        assert (method_table (prev_object) == g_gc_pFreeObjectMethodTable);
                    }
#endif //FEATURE_LOH_COMPACTION

                    total_objects_verified++;

                    BOOL can_verify_deep = TRUE;
#ifdef BACKGROUND_GC
                    can_verify_deep = fgc_should_consider_object (curr_object, seg, consider_bgc_mark_p, check_current_sweep_p, check_saved_sweep_p);
#endif //BACKGROUND_GC

                    BOOL deep_verify_obj = can_verify_deep;
                    if ((heap_verify_level & GCConfig::HEAPVERIFY_DEEP_ON_COMPACT) && !settings.compaction)
                        deep_verify_obj = FALSE;

                    ((CObjectHeader*)curr_object)->ValidateHeap(deep_verify_obj);

                    if (can_verify_deep)
                    {
                        if (curr_gen_num > 0)
                        {
                            BOOL need_card_p = FALSE;
                            if (contain_pointers_or_collectible (curr_object))
                            {
                                dprintf (4, ("curr_object: %zx", (size_t)curr_object));
                                size_t crd = card_of (curr_object);
                                BOOL found_card_p = card_set_p (crd);

#ifdef COLLECTIBLE_CLASS
                                if (is_collectible(curr_object))
                                {
                                    uint8_t* class_obj = get_class_object (curr_object);
                                    if (check_need_card (class_obj, gen_num_for_cards, next_boundary, e_high))
                                    {
                                        if (!found_card_p)
                                        {
                                            dprintf (1, ("Card not set, curr_object = [%zx:%zx pointing to class object %p",
                                                        card_of (curr_object), (size_t)curr_object, class_obj));
                                            FATAL_GC_ERROR();
                                        }
                                    }
                                }
#endif //COLLECTIBLE_CLASS

                                if (contain_pointers(curr_object))
                                {
                                    go_through_object_nostart
                                        (method_table(curr_object), curr_object, s, oo,
                                        {
                                            if (crd != card_of ((uint8_t*)oo))
                                            {
                                                crd = card_of ((uint8_t*)oo);
                                                found_card_p = card_set_p (crd);
                                                need_card_p = FALSE;
                                            }
                                            if (*oo && check_need_card (*oo, gen_num_for_cards, next_boundary, e_high))
                                            {
                                                need_card_p = TRUE;
                                            }

                                            if (need_card_p && !found_card_p)
                                            {
                                                dprintf (1, ("(in loop) Card not set, curr_object = [%zx:%zx, %zx:%zx[",
                                                            card_of (curr_object), (size_t)curr_object,
                                                            card_of (curr_object+Align(s, align_const)),
                                                            (size_t)(curr_object+Align(s, align_const))));
                                                FATAL_GC_ERROR();
                                            }
                                        }
                                            );
                                }
                                if (need_card_p && !found_card_p)
                                {
                                    dprintf (1, ("Card not set, curr_object = [%zx:%zx, %zx:%zx[",
                                        card_of (curr_object), (size_t)curr_object,
                                        card_of (curr_object + Align(s, align_const)),
                                        (size_t)(curr_object + Align(s, align_const))));
                                    FATAL_GC_ERROR();
                                }
                            }
                        }
                        total_objects_verified_deep++;
                    }
                }

                prev_object = curr_object;
                prev_brick = curr_brick;
                curr_object = curr_object + Align(s, align_const);
                if (curr_object < prev_object)
                {
                    dprintf (1, ("overflow because of a bad object size: %p size %zx", prev_object, s));
                    FATAL_GC_ERROR();
                }
            }

            if (curr_object > heap_segment_allocated(seg))
            {
                dprintf (1, ("Verifiying Heap: curr_object: %zx > heap_segment_allocated (seg: %zx) %p",
                        (size_t)curr_object, (size_t)seg, heap_segment_allocated (seg)));
                FATAL_GC_ERROR();
            }

            seg = heap_segment_next_in_range (seg);
        }
    }

#ifdef BACKGROUND_GC
    dprintf (2, ("(%s)(%s)(%s) total_objects_verified is %zd, total_objects_verified_deep is %zd",
                 get_str_gc_type(),
                 (begin_gc_p ? "BEG" : "END"),
                 ((current_c_gc_state == c_gc_state_planning) ? "in plan" : "not in plan"),
                 total_objects_verified, total_objects_verified_deep));
    if (current_c_gc_state != c_gc_state_planning)
    {
        assert (total_objects_verified == total_objects_verified_deep);
    }
#endif //BACKGROUND_GC

    verify_free_lists();

#ifdef FEATURE_PREMORTEM_FINALIZATION
    finalize_queue->CheckFinalizerObjects();
#endif // FEATURE_PREMORTEM_FINALIZATION

    {
        // to be consistent with handle table APIs pass a ScanContext*
        // to provide the heap number.  the SC isn't complete though so
        // limit its scope to handle table verification.
        ScanContext sc;
        sc.thread_number = heap_number;
        sc.thread_count = n_heaps;
        GCScan::VerifyHandleTable(max_generation, max_generation, &sc);
    }

#ifdef MULTIPLE_HEAPS
    current_join->join(this, gc_join_verify_objects_done);
    if (current_join->joined())
#endif //MULTIPLE_HEAPS
    {
        GCToEEInterface::VerifySyncTableEntry();
#ifdef MULTIPLE_HEAPS
#ifdef USE_REGIONS
        // check that the heaps not in use have not been inadvertently written to
        for (int hn = n_heaps; hn < n_max_heaps; hn++)
        {
            gc_heap* hp = g_heaps[hn];
            hp->check_decommissioned_heap();
        }
#endif //USE_REGIONS

        current_join->restart();
#endif //MULTIPLE_HEAPS
    }

#ifdef BACKGROUND_GC
    if (settings.concurrent)
    {
        verify_mark_array_cleared();
    }
    dprintf (2,("GC%zu(%s): Verifying heap - end",
        VolatileLoad(&settings.gc_index),
        get_str_gc_type()));
#else
    dprintf (2,("GC#d: Verifying heap - end", VolatileLoad(&settings.gc_index)));
#endif //BACKGROUND_GC
}
#endif  //VERIFY_HEAP


void GCHeap::ValidateObjectMember (Object* obj)
{
#ifdef VERIFY_HEAP
    size_t s = size (obj);
    uint8_t* o = (uint8_t*)obj;

    go_through_object_cl (method_table (obj), o, s, oo,
        {
            uint8_t* child_o = *oo;
            if (child_o)
            {
                //dprintf (3, ("VOM: m: %zx obj %zx", (size_t)child_o, o));
                MethodTable *pMT = method_table (child_o);
                assert(pMT);
                if (!pMT->SanityCheck()) {
                    dprintf (1, ("Bad member of %zx %zx",
                                (size_t)oo, (size_t)child_o));
                    FATAL_GC_ERROR();
                }
            }
        } );
#endif // VERIFY_HEAP
}

HRESULT GCHeap::StaticShutdown()
{
    deleteGCShadow();

    GCScan::GcRuntimeStructuresValid (FALSE);

    // Cannot assert this, since we use SuspendEE as the mechanism to quiesce all
    // threads except the one performing the shutdown.
    // ASSERT( !GcInProgress );

    // Guard against any more GC occurring and against any threads blocking
    // for GC to complete when the GC heap is gone.  This fixes a race condition
    // where a thread in GC is destroyed as part of process destruction and
    // the remaining threads block for GC complete.

    //GCTODO
    //EnterAllocLock();
    //Enter();
    //EnterFinalizeLock();
    //SetGCDone();

    // during shutdown lot of threads are suspended
    // on this even, we don't want to wake them up just yet
    //CloseHandle (WaitForGCEvent);

    //find out if the global card table hasn't been used yet
    uint32_t* ct = &g_gc_card_table[card_word (gcard_of (g_gc_lowest_address))];
    if (card_table_refcount (ct) == 0)
    {
        destroy_card_table (ct);
        g_gc_card_table = nullptr;

#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
        g_gc_card_bundle_table = nullptr;
#endif
#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
        SoftwareWriteWatch::StaticClose();
#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
    }

#ifndef USE_REGIONS
    //destroy all segments on the standby list
    while(gc_heap::segment_standby_list != 0)
    {
        heap_segment* next_seg = heap_segment_next (gc_heap::segment_standby_list);
#ifdef MULTIPLE_HEAPS
        (gc_heap::g_heaps[0])->delete_heap_segment (gc_heap::segment_standby_list, FALSE);
#else //MULTIPLE_HEAPS
        pGenGCHeap->delete_heap_segment (gc_heap::segment_standby_list, FALSE);
#endif //MULTIPLE_HEAPS
        gc_heap::segment_standby_list = next_seg;
    }
#endif // USE_REGIONS

#ifdef MULTIPLE_HEAPS

    for (int i = 0; i < gc_heap::n_heaps; i ++)
    {
        //destroy pure GC stuff
        gc_heap::destroy_gc_heap (gc_heap::g_heaps[i]);
    }
#else
    gc_heap::destroy_gc_heap (pGenGCHeap);

#endif //MULTIPLE_HEAPS
    gc_heap::shutdown_gc();

    return S_OK;
}

// Wait until a garbage collection is complete
// returns NOERROR if wait was OK, other error code if failure.
// WARNING: This will not undo the must complete state. If you are
// in a must complete when you call this, you'd better know what you're
// doing.

#ifdef FEATURE_PREMORTEM_FINALIZATION
static
HRESULT AllocateCFinalize(CFinalize **pCFinalize)
{
    *pCFinalize = new (nothrow) CFinalize();
    if (*pCFinalize == NULL || !(*pCFinalize)->Initialize())
        return E_OUTOFMEMORY;

    return S_OK;
}
#endif // FEATURE_PREMORTEM_FINALIZATION

// init the instance heap
HRESULT GCHeap::Init(size_t hn)
{
    HRESULT hres = S_OK;

#ifdef MULTIPLE_HEAPS
    if ((pGenGCHeap = gc_heap::make_gc_heap(this, (int)hn)) == 0)
        hres = E_OUTOFMEMORY;
#else
    UNREFERENCED_PARAMETER(hn);
    if (!gc_heap::make_gc_heap())
        hres = E_OUTOFMEMORY;
#endif //MULTIPLE_HEAPS

    // Failed.
    return hres;
}

//System wide initialization
HRESULT GCHeap::Initialize()
{
#ifndef TRACE_GC
    STRESS_LOG_VA (1, (ThreadStressLog::gcLoggingIsOffMsg()));
#endif
    HRESULT hr = S_OK;

    qpf = (uint64_t)GCToOSInterface::QueryPerformanceFrequency();
    qpf_ms = 1000.0 / (double)qpf;
    qpf_us = 1000.0 * 1000.0 / (double)qpf;

    g_gc_pFreeObjectMethodTable = GCToEEInterface::GetFreeObjectMethodTable();
    g_num_processors = GCToOSInterface::GetTotalProcessorCount();
    assert(g_num_processors != 0);

    gc_heap::total_physical_mem = (size_t)GCConfig::GetGCTotalPhysicalMemory();
    if (gc_heap::total_physical_mem != 0)
    {
        gc_heap::is_restricted_physical_mem = true;
#ifdef FEATURE_EVENT_TRACE
        gc_heap::physical_memory_from_config = (size_t)gc_heap::total_physical_mem;
#endif //FEATURE_EVENT_TRACE
    }
    else
    {
        gc_heap::total_physical_mem = GCToOSInterface::GetPhysicalMemoryLimit (&gc_heap::is_restricted_physical_mem);
    }
    memset (gc_heap::committed_by_oh, 0, sizeof (gc_heap::committed_by_oh));
    if (!gc_heap::compute_hard_limit())
    {
        log_init_error_to_host ("compute_hard_limit failed, check your heap hard limit related configs");
        return CLR_E_GC_BAD_HARD_LIMIT;
    }

    uint32_t nhp = 1;
    uint32_t nhp_from_config = 0;
    uint32_t max_nhp_from_config = (uint32_t)GCConfig::GetMaxHeapCount();

#ifndef MULTIPLE_HEAPS
    GCConfig::SetServerGC(false);
#else //!MULTIPLE_HEAPS
    GCConfig::SetServerGC(true);
    AffinitySet config_affinity_set;
    GCConfigStringHolder cpu_index_ranges_holder(GCConfig::GetGCHeapAffinitizeRanges());

    uintptr_t config_affinity_mask = static_cast<uintptr_t>(GCConfig::GetGCHeapAffinitizeMask());
    if (!ParseGCHeapAffinitizeRanges(cpu_index_ranges_holder.Get(), &config_affinity_set, config_affinity_mask))
    {
        log_init_error_to_host ("ParseGCHeapAffinitizeRange failed, check your HeapAffinitizeRanges config");
        return CLR_E_GC_BAD_AFFINITY_CONFIG_FORMAT;
    }

    const AffinitySet* process_affinity_set = GCToOSInterface::SetGCThreadsAffinitySet(config_affinity_mask, &config_affinity_set);
    GCConfig::SetGCHeapAffinitizeMask(static_cast<int64_t>(config_affinity_mask));

    if (process_affinity_set->IsEmpty())
    {
        log_init_error_to_host ("This process is affinitize to 0 CPUs, check your GC heap affinity related configs");
        return CLR_E_GC_BAD_AFFINITY_CONFIG;
    }

    if ((cpu_index_ranges_holder.Get() != nullptr)
#ifdef TARGET_WINDOWS
        || (config_affinity_mask != 0)
#endif
    )
    {
        affinity_config_specified_p = true;
    }

    nhp_from_config = static_cast<uint32_t>(GCConfig::GetHeapCount());

    // The CPU count may be overridden by the user. Ensure that we create no more than g_num_processors
    // heaps as that is the number of slots we have allocated for handle tables.
    g_num_active_processors = min (GCToEEInterface::GetCurrentProcessCpuCount(), g_num_processors);

    if (nhp_from_config)
    {
        // Even when the user specifies a heap count, it should not be more
        // than the number of procs this process can use.
        nhp_from_config = min (nhp_from_config, g_num_active_processors);
    }

    nhp = ((nhp_from_config == 0) ? g_num_active_processors : nhp_from_config);

    nhp = min (nhp, (uint32_t)MAX_SUPPORTED_CPUS);

    gc_heap::gc_thread_no_affinitize_p = (gc_heap::heap_hard_limit ?
        !affinity_config_specified_p : (GCConfig::GetNoAffinitize() != 0));

    if (!(gc_heap::gc_thread_no_affinitize_p))
    {
        uint32_t num_affinitized_processors = (uint32_t)process_affinity_set->Count();

        if (num_affinitized_processors != 0)
        {
            nhp = min(nhp, num_affinitized_processors);
        }
    }
#endif //!MULTIPLE_HEAPS

    if (gc_heap::heap_hard_limit)
    {
        gc_heap::hard_limit_config_p = true;
    }

    size_t seg_size_from_config = 0;
    bool compute_memory_settings_succeed = gc_heap::compute_memory_settings(true, nhp, nhp_from_config, seg_size_from_config, 0);
    assert (compute_memory_settings_succeed);

    if ((!gc_heap::heap_hard_limit) && gc_heap::use_large_pages_p)
    {
        return CLR_E_GC_LARGE_PAGE_MISSING_HARD_LIMIT;
    }
    GCConfig::SetGCLargePages(gc_heap::use_large_pages_p);

#ifdef USE_REGIONS
    gc_heap::regions_range = (size_t)GCConfig::GetGCRegionRange();
    if (gc_heap::regions_range == 0)
    {
        if (gc_heap::heap_hard_limit)
        {
#ifndef HOST_64BIT
            // Regions are not supported on 32bit
            assert(false);
#endif //!HOST_64BIT

            if (gc_heap::heap_hard_limit_oh[soh])
            {
                gc_heap::regions_range = gc_heap::heap_hard_limit;
            }
            else
            {
                // We use this calculation because it's close to what we used for segments.
                gc_heap::regions_range = ((gc_heap::use_large_pages_p) ? (2 * gc_heap::heap_hard_limit)
                                                                       : (5 * gc_heap::heap_hard_limit));
            }
        }
        else
        {
            gc_heap::regions_range = 
#ifdef MULTIPLE_HEAPS
            // For SVR use max of 2x total_physical_memory or 256gb
            max(
#else // MULTIPLE_HEAPS
            // for WKS use min
            min(
#endif // MULTIPLE_HEAPS
                (size_t)256 * 1024 * 1024 * 1024, (size_t)(2 * gc_heap::total_physical_mem));
        }
        size_t virtual_mem_limit = GCToOSInterface::GetVirtualMemoryLimit();
        gc_heap::regions_range = min(gc_heap::regions_range, virtual_mem_limit/2);
        gc_heap::regions_range = align_on_page(gc_heap::regions_range);
    }
    GCConfig::SetGCRegionRange(gc_heap::regions_range);
#endif //USE_REGIONS

    size_t seg_size = 0;
    size_t large_seg_size = 0;
    size_t pin_seg_size = 0;
    seg_size = gc_heap::soh_segment_size;

#ifndef USE_REGIONS

    if (gc_heap::heap_hard_limit)
    {
        if (gc_heap::heap_hard_limit_oh[soh])
        {
            // On 32bit we have next guarantees:
            //   0 <= seg_size_from_config <= 1Gb (from max_heap_hard_limit/2)
            //   0 <= (heap_hard_limit = heap_hard_limit_oh[soh] + heap_hard_limit_oh[loh] + heap_hard_limit_oh[poh]) < 4Gb (from gc_heap::compute_hard_limit_from_heap_limits)
            //   0 <= heap_hard_limit_oh[loh] <= 1Gb or < 2Gb
            //   0 <= heap_hard_limit_oh[poh] <= 1Gb or < 2Gb
            //   0 <= large_seg_size <= 1Gb or <= 2Gb (alignment and round up)
            //   0 <= pin_seg_size <= 1Gb or <= 2Gb (alignment and round up)
            //   0 <= soh_segment_size + large_seg_size + pin_seg_size <= 4Gb
            // 4Gb overflow is ok, because 0 size allocation will fail
            large_seg_size = max (gc_heap::adjust_segment_size_hard_limit (gc_heap::heap_hard_limit_oh[loh], nhp), seg_size_from_config);
            pin_seg_size = max (gc_heap::adjust_segment_size_hard_limit (gc_heap::heap_hard_limit_oh[poh], nhp), seg_size_from_config);
        }
        else
        {
            // On 32bit we have next guarantees:
            //   0 <= heap_hard_limit <= 1Gb (from gc_heap::compute_hard_limit)
            //   0 <= soh_segment_size <= 1Gb
            //   0 <= large_seg_size <= 1Gb
            //   0 <= pin_seg_size <= 1Gb
            //   0 <= soh_segment_size + large_seg_size + pin_seg_size <= 3Gb
#ifdef HOST_64BIT
            large_seg_size = gc_heap::use_large_pages_p ? gc_heap::soh_segment_size : gc_heap::soh_segment_size * 2;
#else //HOST_64BIT
            assert (!gc_heap::use_large_pages_p);
            large_seg_size = gc_heap::soh_segment_size;
#endif //HOST_64BIT
            pin_seg_size = large_seg_size;
        }
        if (gc_heap::use_large_pages_p)
            gc_heap::min_segment_size = min_segment_size_hard_limit;
    }
    else
    {
        large_seg_size = get_valid_segment_size (TRUE);
        pin_seg_size = large_seg_size;
    }
    assert (g_theGCHeap->IsValidSegmentSize (seg_size));
    assert (g_theGCHeap->IsValidSegmentSize (large_seg_size));
    assert (g_theGCHeap->IsValidSegmentSize (pin_seg_size));

    dprintf (1, ("%d heaps, soh seg size: %zd mb, loh: %zd mb\n",
        nhp,
        (seg_size / (size_t)1024 / 1024),
        (large_seg_size / 1024 / 1024)));

    gc_heap::min_uoh_segment_size = min (large_seg_size, pin_seg_size);

    if (gc_heap::min_segment_size == 0)
    {
        gc_heap::min_segment_size = min (seg_size, gc_heap::min_uoh_segment_size);
    }
#endif //!USE_REGIONS

    GCConfig::SetHeapCount(static_cast<int64_t>(nhp));

    loh_size_threshold = (size_t)GCConfig::GetLOHThreshold();
    loh_size_threshold = max (loh_size_threshold, LARGE_OBJECT_SIZE);

#ifdef USE_REGIONS
    gc_heap::enable_special_regions_p = (bool)GCConfig::GetGCEnableSpecialRegions();
    size_t gc_region_size = (size_t)GCConfig::GetGCRegionSize();

    if (gc_region_size >= MAX_REGION_SIZE)
    {
        log_init_error_to_host ("The GC RegionSize config is set to %zd bytes (%zd GiB), it needs to be < %zd GiB",
            gc_region_size, gib (gc_region_size), gib (MAX_REGION_SIZE));
        return CLR_E_GC_BAD_REGION_SIZE;
    }

    // Adjust GCRegionSize based on how large each heap would be, for smaller heaps we would
    // like to keep Region sizes small. We choose between 4, 2 and 1mb based on the calculations
    // below (unless its configured explicitly) such that there are at least 2 regions available
    // except for the smallest case. Now the lowest limit possible is 4mb.
    if (gc_region_size == 0)
    {
        // We have a minimum amount of basic regions we have to fit per heap, and we'd like to have the initial
        // regions only take up half of the space.
        size_t max_region_size = gc_heap::regions_range / 2 / nhp / min_regions_per_heap;
        if (max_region_size >= (4 * 1024 * 1024))
        {
            gc_region_size = 4 * 1024 * 1024;
        }
        else if (max_region_size >= (2 * 1024 * 1024))
        {
            gc_region_size = 2 * 1024 * 1024;
        }
        else
        {
            gc_region_size = 1 * 1024 * 1024;
        }
    }

    if (!power_of_two_p(gc_region_size) || ((gc_region_size * nhp * min_regions_per_heap) > gc_heap::regions_range))
    {
        log_init_error_to_host ("Region size is %zd bytes, range is %zd bytes, (%d heaps * %d regions/heap = %d) regions needed initially",
            gc_region_size, gc_heap::regions_range, nhp, min_regions_per_heap, (nhp * min_regions_per_heap));
        return E_OUTOFMEMORY;
    }

    /*
     * Allocation requests less than loh_size_threshold will be allocated on the small object heap.
     *
     * An object cannot span more than one region and regions in small object heap are of the same size - gc_region_size.
     * However, the space available for actual allocations is reduced by the following implementation details -
     *
     * 1.) heap_segment_mem is set to the new pages + sizeof(aligned_plug_and_gap) in make_heap_segment.
     * 2.) a_fit_segment_end_p set pad to Align(min_obj_size, align_const).
     * 3.) a_size_fit_p requires the available space to be >= the allocated size + Align(min_obj_size, align_const)
     *
     * It is guaranteed that an allocation request with this amount or less will succeed unless
     * we cannot commit memory for it.
     */
    int align_const = get_alignment_constant (TRUE);
    size_t effective_max_small_object_size = gc_region_size - sizeof(aligned_plug_and_gap) - Align(min_obj_size, align_const) * 2;

#ifdef FEATURE_STRUCTALIGN
    /*
     * The above assumed FEATURE_STRUCTALIGN is not turned on for platforms where USE_REGIONS is supported, otherwise it is possible
     * that the allocation size is inflated by ComputeMaxStructAlignPad in GCHeap::Alloc and we have to compute an upper bound of that
     * function.
     *
     * Note that ComputeMaxStructAlignPad is defined to be 0 if FEATURE_STRUCTALIGN is turned off.
     */
#error "FEATURE_STRUCTALIGN is not supported for USE_REGIONS"
#endif //FEATURE_STRUCTALIGN

    loh_size_threshold = min (loh_size_threshold, effective_max_small_object_size);
    GCConfig::SetLOHThreshold(loh_size_threshold);

    gc_heap::min_segment_size_shr = index_of_highest_set_bit (gc_region_size);
#else
    gc_heap::min_segment_size_shr = index_of_highest_set_bit (gc_heap::min_segment_size);
#endif //USE_REGIONS

#ifdef MULTIPLE_HEAPS
    assert (nhp <= g_num_processors);
    if (max_nhp_from_config)
    {
        nhp = min (nhp, max_nhp_from_config);
    }
    gc_heap::n_max_heaps = nhp;
    gc_heap::n_heaps = nhp;
    hr = gc_heap::initialize_gc (seg_size, large_seg_size, pin_seg_size, nhp);
#else
    hr = gc_heap::initialize_gc (seg_size, large_seg_size, pin_seg_size);
#endif //MULTIPLE_HEAPS

    GCConfig::SetGCHeapHardLimit(static_cast<int64_t>(gc_heap::heap_hard_limit));
    GCConfig::SetGCHeapHardLimitSOH(static_cast<int64_t>(gc_heap::heap_hard_limit_oh[soh]));
    GCConfig::SetGCHeapHardLimitLOH(static_cast<int64_t>(gc_heap::heap_hard_limit_oh[loh]));
    GCConfig::SetGCHeapHardLimitPOH(static_cast<int64_t>(gc_heap::heap_hard_limit_oh[poh]));

    if (hr != S_OK)
        return hr;

    gc_heap::pm_stress_on = (GCConfig::GetGCProvModeStress() != 0);

#if defined(HOST_64BIT)
    gc_heap::youngest_gen_desired_th = gc_heap::mem_one_percent;
#endif // HOST_64BIT

    WaitForGCEvent = new (nothrow) GCEvent;

    if (!WaitForGCEvent)
    {
        return E_OUTOFMEMORY;
    }

    if (!WaitForGCEvent->CreateManualEventNoThrow(TRUE))
    {
        log_init_error_to_host ("Creation of WaitForGCEvent failed");
        return E_FAIL;
    }

#ifndef FEATURE_NATIVEAOT // NativeAOT forces relocation a different way
#if defined (STRESS_HEAP) && !defined (MULTIPLE_HEAPS)
    if (GCStress<cfg_any>::IsEnabled())
    {
        for (int i = 0; i < GCHeap::NUM_HEAP_STRESS_OBJS; i++)
        {
            m_StressObjs[i] = CreateGlobalHandle(0);
        }
        m_CurStressObj = 0;
    }
#endif //STRESS_HEAP && !MULTIPLE_HEAPS
#endif // FEATURE_NATIVEAOT

    initGCShadow();         // If we are debugging write barriers, initialize heap shadow

#ifdef USE_REGIONS
    gc_heap::ephemeral_low = MAX_PTR;

    gc_heap::ephemeral_high = nullptr;
#endif //!USE_REGIONS

#ifdef MULTIPLE_HEAPS

    for (uint32_t i = 0; i < nhp; i++)
    {
        GCHeap* Hp = new (nothrow) GCHeap();
        if (!Hp)
            return E_OUTOFMEMORY;

        if ((hr = Hp->Init (i))!= S_OK)
        {
            return hr;
        }
    }

    heap_select::init_numa_node_to_heap_map (nhp);

    // If we have more active processors than heaps we still want to initialize some of the
    // mapping for the rest of the active processors because user threads can still run on
    // them which means it's important to know their numa nodes and map them to a reasonable
    // heap, ie, we wouldn't want to have all such procs go to heap 0.
    if (g_num_active_processors > nhp)
    {
        bool distribute_all_p = false;
#ifdef DYNAMIC_HEAP_COUNT
        distribute_all_p = (gc_heap::dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes);
#endif //DYNAMIC_HEAP_COUNT
        heap_select::distribute_other_procs (distribute_all_p);
    }

    gc_heap* hp = gc_heap::g_heaps[0];

    dynamic_data* gen0_dd = hp->dynamic_data_of (0);
    gc_heap::min_gen0_balance_delta = (dd_min_size (gen0_dd) >> 6);

    bool can_use_cpu_groups = GCToOSInterface::CanEnableGCCPUGroups();
    GCConfig::SetGCCpuGroup(can_use_cpu_groups);

#ifdef HEAP_BALANCE_INSTRUMENTATION
    cpu_group_enabled_p = can_use_cpu_groups;

    if (!GCToOSInterface::GetNumaInfo (&total_numa_nodes_on_machine, &procs_per_numa_node))
    {
        total_numa_nodes_on_machine = 1;

        // Note that if we are in cpu groups we need to take the way proc index is calculated
        // into consideration. It would mean we have more than 64 procs on one numa node -
        // this is mostly for testing (if we want to simulate no numa on a numa system).
        // see vm\gcenv.os.cpp GroupProcNo implementation.
        if (GCToOSInterface::GetCPUGroupInfo (&total_cpu_groups_on_machine, &procs_per_cpu_group))
            procs_per_numa_node = procs_per_cpu_group + ((total_cpu_groups_on_machine - 1) << 6);
        else
            procs_per_numa_node = g_num_processors;
    }
    hb_info_numa_nodes = new (nothrow) heap_balance_info_numa[total_numa_nodes_on_machine];
    dprintf (HEAP_BALANCE_LOG, ("total: %d, numa: %d", g_num_processors, total_numa_nodes_on_machine));

    int hb_info_size_per_proc = sizeof (heap_balance_info_proc);

    for (int numa_node_index = 0; numa_node_index < total_numa_nodes_on_machine; numa_node_index++)
    {
        int hb_info_size_per_node = hb_info_size_per_proc * procs_per_numa_node;
        uint8_t* numa_mem = (uint8_t*)GCToOSInterface::VirtualReserve (hb_info_size_per_node, 0, 0, (uint16_t)numa_node_index);
        if (!numa_mem)
        {
            return E_FAIL;
        }
        if (!GCToOSInterface::VirtualCommit (numa_mem, hb_info_size_per_node, (uint16_t)numa_node_index))
        {
            return E_FAIL;
        }

        heap_balance_info_proc* hb_info_procs = (heap_balance_info_proc*)numa_mem;
        hb_info_numa_nodes[numa_node_index].hb_info_procs = hb_info_procs;

        for (int proc_index = 0; proc_index < (int)procs_per_numa_node; proc_index++)
        {
            heap_balance_info_proc* hb_info_proc = &hb_info_procs[proc_index];
            hb_info_proc->count = default_max_hb_heap_balance_info;
            hb_info_proc->index = 0;
        }
    }
#endif //HEAP_BALANCE_INSTRUMENTATION
#else
    hr = Init (0);
#endif //MULTIPLE_HEAPS
#ifdef USE_REGIONS
    if (initial_regions)
    {
        delete[] initial_regions;
    }
#endif //USE_REGIONS
    if (hr == S_OK)
    {
#ifdef MULTIPLE_HEAPS
        dprintf (6666, ("conserve mem %d, concurent %d, max heap %d", gc_heap::conserve_mem_setting, gc_heap::gc_can_use_concurrent, gc_heap::n_heaps));
#else
        dprintf (6666, ("conserve mem %d, concurent %d, WKS", gc_heap::conserve_mem_setting, gc_heap::gc_can_use_concurrent));
#endif

#ifdef DYNAMIC_HEAP_COUNT
        // if no heap count was specified, and we are told to adjust heap count dynamically ...
        if (gc_heap::dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes)
        {
            // start with only 1 heap
            gc_heap::smoothed_desired_total[0] /= gc_heap::n_heaps;
            int initial_n_heaps = 1;

            dprintf (6666, ("n_heaps is %d, initial n_heaps is %d, %d cores", gc_heap::n_heaps, initial_n_heaps, g_num_processors));

            {
                if (!gc_heap::prepare_to_change_heap_count (initial_n_heaps))
                {
                    // we don't have sufficient resources.
                    return E_FAIL;
                }

                gc_heap::dynamic_heap_count_data.new_n_heaps = initial_n_heaps;
                gc_heap::dynamic_heap_count_data.idle_thread_count = 0;
                gc_heap::dynamic_heap_count_data.init_only_p = true;

                int max_threads_to_wake = max (gc_heap::n_heaps, initial_n_heaps);
                gc_t_join.update_n_threads (max_threads_to_wake);
                gc_heap::gc_start_event.Set ();
            }

            gc_heap::g_heaps[0]->change_heap_count (initial_n_heaps);
            gc_heap::gc_start_event.Reset ();

            // This needs to be different from our initial heap count so we can make sure we wait for
            // the idle threads correctly in gc_thread_function.
            gc_heap::dynamic_heap_count_data.last_n_heaps = 0;

            int target_tcp = (int)GCConfig::GetGCDTargetTCP();
            if (target_tcp > 0)
            {
                gc_heap::dynamic_heap_count_data.target_tcp = (float)target_tcp;
            }
            // This should be adjusted based on the target tcp. See comments in gcpriv.h
            gc_heap::dynamic_heap_count_data.around_target_threshold = 10.0;

            int gen0_growth_soh_ratio_percent = (int)GCConfig::GetGCDGen0GrowthPercent();
            if (gen0_growth_soh_ratio_percent)
            {
                gc_heap::dynamic_heap_count_data.gen0_growth_soh_ratio_percent = (int)GCConfig::GetGCDGen0GrowthPercent() * 0.01f;
            }
            // You can specify what sizes you want to allow DATAS to stay within wrt the SOH stable size.
            // By default DATAS allows 10x this size for gen0 budget when the size is small, and 0.1x when the size is large.
            int gen0_growth_min_permil = (int)GCConfig::GetGCDGen0GrowthMinFactor();
            int gen0_growth_max_permil = (int)GCConfig::GetGCDGen0GrowthMaxFactor();
            if (gen0_growth_min_permil)
            {
                gc_heap::dynamic_heap_count_data.gen0_growth_soh_ratio_min = gen0_growth_min_permil * 0.001f;
            }
            if (gen0_growth_max_permil)
            {
                gc_heap::dynamic_heap_count_data.gen0_growth_soh_ratio_max = gen0_growth_max_permil * 0.001f;
            }

            if (gc_heap::dynamic_heap_count_data.gen0_growth_soh_ratio_min > gc_heap::dynamic_heap_count_data.gen0_growth_soh_ratio_max)
            {
                log_init_error_to_host ("DATAS min permil for gen0 growth %d is greater than max %d, it needs to be lower",
                    gc_heap::dynamic_heap_count_data.gen0_growth_soh_ratio_min, gc_heap::dynamic_heap_count_data.gen0_growth_soh_ratio_max);
                return E_FAIL;
            }

            GCConfig::SetGCDTargetTCP ((int)gc_heap::dynamic_heap_count_data.target_tcp);
            GCConfig::SetGCDGen0GrowthPercent ((int)(gc_heap::dynamic_heap_count_data.gen0_growth_soh_ratio_percent * 100.0f));
            GCConfig::SetGCDGen0GrowthMinFactor ((int)(gc_heap::dynamic_heap_count_data.gen0_growth_soh_ratio_min * 1000.0f));
            GCConfig::SetGCDGen0GrowthMaxFactor ((int)(gc_heap::dynamic_heap_count_data.gen0_growth_soh_ratio_max * 1000.0f));
            dprintf (6666, ("DATAS gen0 growth multiplier will be adjusted by %d%%, cap %.3f-%.3f, min budget %Id, max %Id",
                (int)GCConfig::GetGCDGen0GrowthPercent(),
                gc_heap::dynamic_heap_count_data.gen0_growth_soh_ratio_min, gc_heap::dynamic_heap_count_data.gen0_growth_soh_ratio_max,
                gc_heap::dynamic_heap_count_data.min_gen0_new_allocation, gc_heap::dynamic_heap_count_data.max_gen0_new_allocation));
        }

        GCConfig::SetGCDynamicAdaptationMode (gc_heap::dynamic_adaptation_mode);
#endif //DYNAMIC_HEAP_COUNT
        GCScan::GcRuntimeStructuresValid (TRUE);

        GCToEEInterface::DiagUpdateGenerationBounds();

#if defined(STRESS_REGIONS) && defined(FEATURE_BASICFREEZE)
#ifdef MULTIPLE_HEAPS
        gc_heap* hp = gc_heap::g_heaps[0];
#else
        gc_heap* hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS

        // allocate some artificial ro seg datastructures.
        for (int i = 0; i < 2; i++)
        {
            size_t ro_seg_size = 1024 * 1024;
            // I'm not allocating this within the normal reserved range
            // because ro segs are supposed to always be out of range
            // for regions.
            uint8_t* seg_mem = new (nothrow) uint8_t [ro_seg_size];

            if (seg_mem == nullptr)
            {
                hr = E_FAIL;
                break;
            }

            segment_info seg_info;
            seg_info.pvMem = seg_mem;
            seg_info.ibFirstObject = 0; // nothing is there, don't fake it with sizeof(ObjHeader)
            seg_info.ibAllocated = 0;
            seg_info.ibCommit = ro_seg_size;
            seg_info.ibReserved = seg_info.ibCommit;

            if (!RegisterFrozenSegment(&seg_info))
            {
                hr = E_FAIL;
                break;
            }
        }
#endif //STRESS_REGIONS && FEATURE_BASICFREEZE
    }

    return hr;
}

////
// GC callback functions
bool GCHeap::IsPromoted(Object* object)
{
    return IsPromoted2(object, true);
}

bool GCHeap::IsPromoted2(Object* object, bool bVerifyNextHeader)
{
    uint8_t* o = (uint8_t*)object;

    bool is_marked;

    if (gc_heap::settings.condemned_generation == max_generation)
    {
#ifdef MULTIPLE_HEAPS
        gc_heap* hp = gc_heap::g_heaps[0];
#else
        gc_heap* hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS

#ifdef BACKGROUND_GC
        if (gc_heap::settings.concurrent)
        {
            is_marked = (!((o < hp->background_saved_highest_address) && (o >= hp->background_saved_lowest_address))||
                            hp->background_marked (o));
        }
        else
#endif //BACKGROUND_GC
        {
            is_marked = (!((o < hp->highest_address) && (o >= hp->lowest_address))
                        || hp->is_mark_set (o));
        }
    }
    else
    {
#ifdef USE_REGIONS
        is_marked = (gc_heap::is_in_gc_range (o) ? (gc_heap::is_in_condemned_gc (o) ? gc_heap::is_mark_set (o) : true) : true);
#else
        gc_heap* hp = gc_heap::heap_of (o);
        is_marked = (!((o < hp->gc_high) && (o >= hp->gc_low))
                   || hp->is_mark_set (o));
#endif //USE_REGIONS
    }

// Walking refs when objects are marked seems unexpected
#ifdef _DEBUG
    if (o)
    {
        ((CObjectHeader*)o)->Validate(TRUE, bVerifyNextHeader, is_marked);

        // Frozen objects aren't expected to be "not promoted" here
        assert(is_marked || !IsInFrozenSegment(object));
    }
#endif //_DEBUG

    return is_marked;
}

size_t GCHeap::GetPromotedBytes(int heap_index)
{
#ifdef BACKGROUND_GC
    if (gc_heap::settings.concurrent)
    {
        return gc_heap::bpromoted_bytes (heap_index);
    }
    else
#endif //BACKGROUND_GC
    {
        gc_heap* hp =
#ifdef MULTIPLE_HEAPS
            gc_heap::g_heaps[heap_index];
#else
            pGenGCHeap;
#endif //MULTIPLE_HEAPS
        return hp->get_promoted_bytes();
    }
}

void GCHeap::SetYieldProcessorScalingFactor (float scalingFactor)
{
    if (!gc_heap::spin_count_unit_config_p)
    {
        assert (yp_spin_count_unit != 0);
        uint32_t saved_yp_spin_count_unit = yp_spin_count_unit;
        yp_spin_count_unit = (uint32_t)((float)original_spin_count_unit * scalingFactor / (float)9);

        // It's very suspicious if it becomes 0 and also, we don't want to spin too much.
        if ((yp_spin_count_unit == 0) || (yp_spin_count_unit > MAX_YP_SPIN_COUNT_UNIT))
        {
            yp_spin_count_unit = saved_yp_spin_count_unit;
        }
    }
}

unsigned int GCHeap::WhichGeneration (Object* object)
{
    uint8_t* o = (uint8_t*)object;
#ifdef FEATURE_BASICFREEZE
    if (!((o < g_gc_highest_address) && (o >= g_gc_lowest_address)))
    {
        return INT32_MAX;
    }
#ifndef USE_REGIONS
    if (GCHeap::IsInFrozenSegment (object))
    {
        // in case if the object belongs to an in-range frozen segment
        // For regions those are never in-range.
        return INT32_MAX;
    }
#endif
#endif //FEATURE_BASICFREEZE
    gc_heap* hp = gc_heap::heap_of (o);
    unsigned int g = hp->object_gennum (o);
    dprintf (3, ("%zx is in gen %d", (size_t)object, g));
    return g;
}

enable_no_gc_region_callback_status GCHeap::EnableNoGCRegionCallback(NoGCRegionCallbackFinalizerWorkItem* callback, uint64_t callback_threshold)
{
    return gc_heap::enable_no_gc_callback(callback, callback_threshold);
}

FinalizerWorkItem* GCHeap::GetExtraWorkForFinalization()
{
    return Interlocked::ExchangePointer(&gc_heap::finalizer_work, nullptr);
}

unsigned int GCHeap::GetGenerationWithRange (Object* object, uint8_t** ppStart, uint8_t** ppAllocated, uint8_t** ppReserved)
{
    int generation = -1;
    heap_segment * hs = gc_heap::find_segment ((uint8_t*)object, FALSE);
#ifdef USE_REGIONS
    generation = heap_segment_gen_num (hs);
    if (generation == max_generation)
    {
        if (heap_segment_loh_p (hs))
        {
            generation = loh_generation;
        }
        else if (heap_segment_poh_p (hs))
        {
            generation = poh_generation;
        }
    }

    *ppStart = heap_segment_mem (hs);
    *ppAllocated = heap_segment_allocated (hs);
    *ppReserved = heap_segment_reserved (hs);
#else
#ifdef MULTIPLE_HEAPS
    gc_heap* hp = heap_segment_heap (hs);
#else
    gc_heap* hp = __this;
#endif //MULTIPLE_HEAPS
    if (hs == hp->ephemeral_heap_segment)
    {
        uint8_t* reserved = heap_segment_reserved (hs);
        uint8_t* end = heap_segment_allocated(hs);
        for (int gen = 0; gen < max_generation; gen++)
        {
            uint8_t* start = generation_allocation_start (hp->generation_of (gen));
            if ((uint8_t*)object >= start)
            {
                generation = gen;
                *ppStart = start;
                *ppAllocated = end;
                *ppReserved = reserved;
                break;
            }
            end = reserved = start;
        }
        if (generation == -1)
        {
            generation = max_generation;
            *ppStart = heap_segment_mem (hs);
            *ppAllocated = *ppReserved = generation_allocation_start (hp->generation_of (max_generation - 1));
        }
    }
    else
    {
        generation = max_generation;
        if (heap_segment_loh_p (hs))
        {
            generation = loh_generation;
        }
        else if (heap_segment_poh_p (hs))
        {
            generation = poh_generation;
        }
        *ppStart = heap_segment_mem (hs);
        *ppAllocated = heap_segment_allocated (hs);
        *ppReserved = heap_segment_reserved (hs);
    }
#endif //USE_REGIONS
    return (unsigned int)generation;
}

bool GCHeap::IsEphemeral (Object* object)
{
    uint8_t* o = (uint8_t*)object;
#if defined(FEATURE_BASICFREEZE) && defined(USE_REGIONS)
    if (!is_in_heap_range (o))
    {
        // Objects in frozen segments are not ephemeral
        return FALSE;
    }
#endif
    gc_heap* hp = gc_heap::heap_of (o);
    return !!hp->ephemeral_pointer_p (o);
}

// Return NULL if can't find next object. When EE is not suspended,
// the result is not accurate: if the input arg is in gen0, the function could
// return zeroed out memory as next object
Object * GCHeap::NextObj (Object * object)
{
#ifdef VERIFY_HEAP
    uint8_t* o = (uint8_t*)object;

#ifndef FEATURE_BASICFREEZE
    if (!((o < g_gc_highest_address) && (o >= g_gc_lowest_address)))
    {
        return NULL;
    }
#endif //!FEATURE_BASICFREEZE

    heap_segment * hs = gc_heap::find_segment (o, FALSE);
    if (!hs)
    {
        return NULL;
    }

    BOOL large_object_p = heap_segment_uoh_p (hs);
    if (large_object_p)
        return NULL; //could be racing with another core allocating.
#ifdef MULTIPLE_HEAPS
    gc_heap* hp = heap_segment_heap (hs);
#else //MULTIPLE_HEAPS
    gc_heap* hp = 0;
#endif //MULTIPLE_HEAPS
#ifdef USE_REGIONS
    unsigned int g = heap_segment_gen_num (hs);
#else
    unsigned int g = hp->object_gennum ((uint8_t*)object);
#endif
    int align_const = get_alignment_constant (!large_object_p);
    uint8_t* nextobj = o + Align (size (o), align_const);
    if (nextobj <= o) // either overflow or 0 sized object.
    {
        return NULL;
    }

    if (nextobj < heap_segment_mem (hs))
    {
        return NULL;
    }

    uint8_t* saved_alloc_allocated = hp->alloc_allocated;
    heap_segment* saved_ephemeral_heap_segment = hp->ephemeral_heap_segment;

    // We still want to verify nextobj that lands between heap_segment_allocated and alloc_allocated
    // on the ephemeral segment. In regions these 2 could be changed by another thread so we need
    // to make sure they are still in sync by the time we check. If they are not in sync, we just
    // bail which means we don't validate the next object during that small window and that's fine.
    //
    // We also miss validating nextobj if it's in the segment that just turned into the new ephemeral
    // segment since we saved which is also a very small window and again that's fine.
    if ((nextobj >= heap_segment_allocated (hs)) &&
        ((hs != saved_ephemeral_heap_segment) ||
         !in_range_for_segment(saved_alloc_allocated, saved_ephemeral_heap_segment) ||
         (nextobj >= saved_alloc_allocated)))
    {
        return NULL;
    }

    return (Object *)nextobj;
#else
    return nullptr;
#endif // VERIFY_HEAP
}

// returns TRUE if the pointer is in one of the GC heaps.
bool GCHeap::IsHeapPointer (void* vpObject, bool small_heap_only)
{
    uint8_t* object = (uint8_t*) vpObject;
#ifndef FEATURE_BASICFREEZE
    if (!((object < g_gc_highest_address) && (object >= g_gc_lowest_address)))
        return FALSE;
#endif //!FEATURE_BASICFREEZE

    heap_segment * hs = gc_heap::find_segment (object, small_heap_only);
    return !!hs;
}

void GCHeap::Promote(Object** ppObject, ScanContext* sc, uint32_t flags)
{
    THREAD_NUMBER_FROM_CONTEXT;
#ifndef MULTIPLE_HEAPS
    const int thread = 0;
#endif //!MULTIPLE_HEAPS

    uint8_t* o = (uint8_t*)*ppObject;

    if (!gc_heap::is_in_find_object_range (o))
    {
        return;
    }

#ifdef DEBUG_DestroyedHandleValue
    // we can race with destroy handle during concurrent scan
    if (o == (uint8_t*)DEBUG_DestroyedHandleValue)
        return;
#endif //DEBUG_DestroyedHandleValue

    HEAP_FROM_THREAD;

    gc_heap* hp = gc_heap::heap_of (o);

#ifdef USE_REGIONS
    if (!gc_heap::is_in_condemned_gc (o))
#else //USE_REGIONS
    if ((o < hp->gc_low) || (o >= hp->gc_high))
#endif //USE_REGIONS
    {
        return;
    }

    dprintf (3, ("Promote %zx", (size_t)o));

    if (flags & GC_CALL_INTERIOR)
    {
        if ((o = hp->find_object (o)) == 0)
        {
            return;
        }
    }

#ifdef FEATURE_CONSERVATIVE_GC
    // For conservative GC, a value on stack may point to middle of a free object.
    // In this case, we don't need to promote the pointer.
    if (GCConfig::GetConservativeGC()
        && ((CObjectHeader*)o)->IsFree())
    {
        return;
    }
#endif

#ifdef _DEBUG
    ((CObjectHeader*)o)->Validate();
#else
    UNREFERENCED_PARAMETER(sc);
#endif //_DEBUG

    if (flags & GC_CALL_PINNED)
        hp->pin_object (o, (uint8_t**) ppObject);

#ifdef STRESS_PINNING
    if ((++n_promote % 20) == 1)
            hp->pin_object (o, (uint8_t**) ppObject);
#endif //STRESS_PINNING

    hpt->mark_object_simple (&o THREAD_NUMBER_ARG);

    STRESS_LOG_ROOT_PROMOTE(ppObject, o, o ? header(o)->GetMethodTable() : NULL);
}

void GCHeap::Relocate (Object** ppObject, ScanContext* sc,
                       uint32_t flags)
{
    UNREFERENCED_PARAMETER(sc);

    uint8_t* object = (uint8_t*)(Object*)(*ppObject);

    if (!gc_heap::is_in_find_object_range (object))
    {
        return;
    }

    THREAD_NUMBER_FROM_CONTEXT;

    //dprintf (3, ("Relocate location %zx\n", (size_t)ppObject));
    dprintf (3, ("R: %zx", (size_t)ppObject));

    gc_heap* hp = gc_heap::heap_of (object);

#ifdef _DEBUG
    if (!(flags & GC_CALL_INTERIOR))
    {
        // We cannot validate this object if it's in the condemned gen because it could
        // be one of the objects that were overwritten by an artificial gap due to a pinned plug.
#ifdef USE_REGIONS
        if (!gc_heap::is_in_condemned_gc (object))
#else //USE_REGIONS
        if (!((object >= hp->gc_low) && (object < hp->gc_high)))
#endif //USE_REGIONS
        {
            ((CObjectHeader*)object)->Validate(FALSE);
        }
    }
#endif //_DEBUG

    dprintf (3, ("Relocate %zx\n", (size_t)object));

    uint8_t* pheader;

    if ((flags & GC_CALL_INTERIOR) && gc_heap::settings.loh_compaction)
    {
#ifdef USE_REGIONS
        if (!gc_heap::is_in_condemned_gc (object))
#else //USE_REGIONS
        if (!((object >= hp->gc_low) && (object < hp->gc_high)))
#endif //USE_REGIONS
        {
            return;
        }

        if (gc_heap::loh_object_p (object))
        {
            pheader = hp->find_object (object);
            if (pheader == 0)
            {
                return;
            }

            ptrdiff_t ref_offset = object - pheader;
            hp->relocate_address(&pheader THREAD_NUMBER_ARG);
            *ppObject = (Object*)(pheader + ref_offset);
            return;
        }
    }

    {
        pheader = object;
        hp->relocate_address(&pheader THREAD_NUMBER_ARG);
        *ppObject = (Object*)pheader;
    }

    STRESS_LOG_ROOT_RELOCATE(ppObject, object, pheader, ((!(flags & GC_CALL_INTERIOR)) ? ((Object*)object)->GetGCSafeMethodTable() : 0));
}

/*static*/ bool GCHeap::IsLargeObject(Object *pObj)
{
    return size( pObj ) >= loh_size_threshold;
}

#ifndef FEATURE_NATIVEAOT // NativeAOT forces relocation a different way
#ifdef STRESS_HEAP

void StressHeapDummy ();

// CLRRandom implementation can produce FPU exceptions if
// the test/application run by CLR is enabling any FPU exceptions.
// We want to avoid any unexpected exception coming from stress
// infrastructure, so CLRRandom is not an option.
// The code below is a replicate of CRT rand() implementation.
// Using CRT rand() is not an option because we will interfere with the user application
// that may also use it.
int StressRNG(int iMaxValue)
{
    static BOOL bisRandInit = FALSE;
    static int lHoldrand = 1L;

    if (!bisRandInit)
    {
        lHoldrand = (int)time(NULL);
        bisRandInit = TRUE;
    }
    int randValue = (((lHoldrand = lHoldrand * 214013L + 2531011L) >> 16) & 0x7fff);
    return randValue % iMaxValue;
}
#endif // STRESS_HEAP
#endif // !FEATURE_NATIVEAOT

// free up object so that things will move and then do a GC
//return TRUE if GC actually happens, otherwise FALSE
bool GCHeap::StressHeap(gc_alloc_context * context)
{
#if defined(STRESS_HEAP) && !defined(FEATURE_NATIVEAOT)
    alloc_context* acontext = static_cast<alloc_context*>(context);
    assert(context != nullptr);

    // if GC stress was dynamically disabled during this run we return FALSE
    if (!GCStressPolicy::IsEnabled())
        return FALSE;

#ifdef _DEBUG
    if (g_pConfig->FastGCStressLevel() && !GCToEEInterface::GetThread()->StressHeapIsEnabled()) {
        return FALSE;
    }
#endif //_DEBUG

    if ((g_pConfig->GetGCStressLevel() & EEConfig::GCSTRESS_UNIQUE)
#ifdef _DEBUG
        || g_pConfig->FastGCStressLevel() > 1
#endif //_DEBUG
        ) {
        if (!Thread::UniqueStack(&acontext)) {
            return FALSE;
        }
    }

#ifdef BACKGROUND_GC
    // don't trigger a GC from the GC threads but still trigger GCs from user threads.
    if (GCToEEInterface::WasCurrentThreadCreatedByGC())
    {
        return FALSE;
    }
#endif //BACKGROUND_GC

    if (g_pStringClass == 0)
    {
        // If the String class has not been loaded, dont do any stressing. This should
        // be kept to a minimum to get as complete coverage as possible.
        _ASSERTE(g_fEEInit);
        return FALSE;
    }

#ifndef MULTIPLE_HEAPS
    static int32_t OneAtATime = -1;

    // Only bother with this if the stress level is big enough and if nobody else is
    // doing it right now.  Note that some callers are inside the AllocLock and are
    // guaranteed synchronized.  But others are using AllocationContexts and have no
    // particular synchronization.
    //
    // For this latter case, we want a very high-speed way of limiting this to one
    // at a time.  A secondary advantage is that we release part of our StressObjs
    // buffer sparingly but just as effectively.

    if (Interlocked::Increment(&OneAtATime) == 0 &&
        !TrackAllocations()) // Messing with object sizes can confuse the profiler (see ICorProfilerInfo::GetObjectSize)
    {
        StringObject* str;

        // If the current string is used up
        if (HndFetchHandle(m_StressObjs[m_CurStressObj]) == 0)
        {
            // Populate handles with strings
            int i = m_CurStressObj;
            while(HndFetchHandle(m_StressObjs[i]) == 0)
            {
                _ASSERTE(m_StressObjs[i] != 0);
                unsigned strLen = ((unsigned)loh_size_threshold - 32) / sizeof(WCHAR);
                unsigned strSize = PtrAlign(StringObject::GetSize(strLen));

                // update the cached type handle before allocating
                SetTypeHandleOnThreadForAlloc(TypeHandle(g_pStringClass));
                str = (StringObject*) pGenGCHeap->allocate (strSize, acontext, /*flags*/ 0);
                if (str)
                {
                    str->SetMethodTable (g_pStringClass);
                    str->SetStringLength (strLen);
                    HndAssignHandle(m_StressObjs[i], ObjectToOBJECTREF(str));
                }
                i = (i + 1) % NUM_HEAP_STRESS_OBJS;
                if (i == m_CurStressObj) break;
            }

            // advance the current handle to the next string
            m_CurStressObj = (m_CurStressObj + 1) % NUM_HEAP_STRESS_OBJS;
        }

        // Get the current string
        str = (StringObject*) OBJECTREFToObject(HndFetchHandle(m_StressObjs[m_CurStressObj]));
        if (str)
        {
            // Chop off the end of the string and form a new object out of it.
            // This will 'free' an object at the beginning of the heap, which will
            // force data movement.  Note that we can only do this so many times.
            // before we have to move on to the next string.
            unsigned sizeOfNewObj = (unsigned)Align(min_obj_size * 31);
            if (str->GetStringLength() > sizeOfNewObj / sizeof(WCHAR))
            {
                unsigned sizeToNextObj = (unsigned)Align(size(str));
                uint8_t* freeObj = ((uint8_t*) str) + sizeToNextObj - sizeOfNewObj;
                pGenGCHeap->make_unused_array (freeObj, sizeOfNewObj);

#if !defined(TARGET_AMD64) && !defined(TARGET_X86)
                // ensure that the write to the new free object is seen by
                // background GC *before* the write to the string length below
                MemoryBarrier();
#endif

                str->SetStringLength(str->GetStringLength() - (sizeOfNewObj / sizeof(WCHAR)));
            }
            else
            {
                // Let the string itself become garbage.
                // will be realloced next time around
                HndAssignHandle(m_StressObjs[m_CurStressObj], 0);
            }
        }
    }
    Interlocked::Decrement(&OneAtATime);
#endif // !MULTIPLE_HEAPS

    if (g_pConfig->GetGCStressLevel() & EEConfig::GCSTRESS_INSTR_JIT)
    {
        // When GCSTRESS_INSTR_JIT is set we see lots of GCs - on every GC-eligible instruction.
        // We do not want all these GC to be gen2 because:
        // - doing only or mostly gen2 is very expensive in this mode
        // - doing only or mostly gen2 prevents coverage of generation-aware behaviors
        // - the main value of this stress mode is to catch stack scanning issues at various/rare locations
        //    in the code and gen2 is not needed for that.

        int rgen = StressRNG(100);

        // gen0:gen1:gen2 distribution: 90:8:2
        if (rgen >= 98)
            rgen = 2;
        else if (rgen >= 90)
            rgen = 1;
        else
            rgen = 0;

        GarbageCollectTry (rgen, FALSE, collection_gcstress);
    }
    else if (IsConcurrentGCEnabled())
    {
        int rgen = StressRNG(10);

        // gen0:gen1:gen2 distribution: 40:40:20
        if (rgen >= 8)
            rgen = 2;
        else if (rgen >= 4)
            rgen = 1;
        else
            rgen = 0;

        GarbageCollectTry (rgen, FALSE, collection_gcstress);
    }
    else
    {
        GarbageCollect(max_generation, FALSE, collection_gcstress);
    }

    return TRUE;
#else
    UNREFERENCED_PARAMETER(context);
    return FALSE;
#endif //STRESS_HEAP && !FEATURE_NATIVEAOT
}

#ifdef FEATURE_PREMORTEM_FINALIZATION
#define REGISTER_FOR_FINALIZATION(_object, _size) \
    hp->finalize_queue->RegisterForFinalization (0, (_object), (_size))
#else // FEATURE_PREMORTEM_FINALIZATION
#define REGISTER_FOR_FINALIZATION(_object, _size) true
#endif // FEATURE_PREMORTEM_FINALIZATION

#define CHECK_ALLOC_AND_POSSIBLY_REGISTER_FOR_FINALIZATION(_object, _size, _register) do {  \
    if ((_object) == NULL || ((_register) && !REGISTER_FOR_FINALIZATION(_object, _size)))   \
    {                                                                                       \
        STRESS_LOG_OOM_STACK(_size);                                                        \
        return NULL;                                                                        \
    }                                                                                       \
} while (false)

// Allocate small object with an alignment requirement of 8-bytes.
Object* AllocAlign8(alloc_context* acontext, gc_heap* hp, size_t size, uint32_t flags)
{
    CONTRACTL {
        NOTHROW;
        GC_TRIGGERS;
    } CONTRACTL_END;

    Object* newAlloc = NULL;

    // Depending on where in the object the payload requiring 8-byte alignment resides we might have to
    // align the object header on an 8-byte boundary or midway between two such boundaries. The unaligned
    // case is indicated to the GC via the GC_ALLOC_ALIGN8_BIAS flag.
    size_t desiredAlignment = (flags & GC_ALLOC_ALIGN8_BIAS) ? 4 : 0;

    // Retrieve the address of the next allocation from the context (note that we're inside the alloc
    // lock at this point).
    uint8_t*  result = acontext->alloc_ptr;

    // Will an allocation at this point yield the correct alignment and fit into the remainder of the
    // context?
    if ((((size_t)result & 7) == desiredAlignment) && ((result + size) <= acontext->alloc_limit))
    {
        // Yes, we can just go ahead and make the allocation.
        newAlloc = (Object*) hp->allocate (size, acontext, flags);
        ASSERT(((size_t)newAlloc & 7) == desiredAlignment);
    }
    else
    {
        // No, either the next available address is not aligned in the way we require it or there's
        // not enough space to allocate an object of the required size. In both cases we allocate a
        // padding object (marked as a free object). This object's size is such that it will reverse
        // the alignment of the next header (asserted below).
        //
        // We allocate both together then decide based on the result whether we'll format the space as
        // free object + real object or real object + free object.
        ASSERT((Align(min_obj_size) & 7) == 4);
        CObjectHeader *freeobj = (CObjectHeader*) hp->allocate (Align(size) + Align(min_obj_size), acontext, flags);
        if (freeobj)
        {
            if (((size_t)freeobj & 7) == desiredAlignment)
            {
                // New allocation has desired alignment, return this one and place the free object at the
                // end of the allocated space.
                newAlloc = (Object*)freeobj;
                freeobj = (CObjectHeader*)((uint8_t*)freeobj + Align(size));
            }
            else
            {
                // New allocation is still mis-aligned, format the initial space as a free object and the
                // rest of the space should be correctly aligned for the real object.
                newAlloc = (Object*)((uint8_t*)freeobj + Align(min_obj_size));
                ASSERT(((size_t)newAlloc & 7) == desiredAlignment);
                if (flags & GC_ALLOC_ZEROING_OPTIONAL)
                {
                    // clean the syncblock of the aligned object.
                    *(((PTR_PTR)newAlloc)-1) = 0;
                }
            }
            freeobj->SetFree(min_obj_size);
        }
    }

    return newAlloc;
}

Object*
GCHeap::Alloc(gc_alloc_context* context, size_t size, uint32_t flags REQD_ALIGN_DCL)
{
    CONTRACTL {
        NOTHROW;
        GC_TRIGGERS;
    } CONTRACTL_END;

    TRIGGERSGC();

    Object* newAlloc = NULL;
    alloc_context* acontext = static_cast<alloc_context*>(context);

#ifdef MULTIPLE_HEAPS
    if (acontext->get_alloc_heap() == 0)
    {
        AssignHeap (acontext);
        assert (acontext->get_alloc_heap());
    }
    gc_heap* hp = acontext->get_alloc_heap()->pGenGCHeap;
#else
    gc_heap* hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS

    assert(size < loh_size_threshold || (flags & GC_ALLOC_LARGE_OBJECT_HEAP));

    if (flags & GC_ALLOC_USER_OLD_HEAP)
    {
        // The LOH always guarantees at least 8-byte alignment, regardless of platform. Moreover it doesn't
        // support mis-aligned object headers so we can't support biased headers. Luckily for us
        // we've managed to arrange things so the only case where we see a bias is for boxed value types and
        // these can never get large enough to be allocated on the LOH.
        ASSERT((flags & GC_ALLOC_ALIGN8_BIAS) == 0);
        ASSERT(65536 < loh_size_threshold);

        int gen_num = (flags & GC_ALLOC_PINNED_OBJECT_HEAP) ? poh_generation : loh_generation;
        newAlloc = (Object*) hp->allocate_uoh_object (size + ComputeMaxStructAlignPadLarge(requiredAlignment), flags, gen_num, acontext->alloc_bytes_uoh);
        ASSERT(((size_t)newAlloc & 7) == 0);

#ifdef MULTIPLE_HEAPS
        if (flags & GC_ALLOC_FINALIZE)
        {
            // the heap may have changed due to heap balancing - it's important
            // to register the object for finalization on the heap it was allocated on
            hp = gc_heap::heap_of ((uint8_t*)newAlloc);
        }
#endif //MULTIPLE_HEAPS

#ifdef FEATURE_STRUCTALIGN
        newAlloc = (Object*) hp->pad_for_alignment_large ((uint8_t*) newAlloc, requiredAlignment, size);
#endif // FEATURE_STRUCTALIGN
    }
    else
    {
        if (flags & GC_ALLOC_ALIGN8)
        {
            newAlloc = AllocAlign8 (acontext, hp, size, flags);
        }
        else
        {
            newAlloc = (Object*) hp->allocate (size + ComputeMaxStructAlignPad(requiredAlignment), acontext, flags);
        }

#ifdef MULTIPLE_HEAPS
        if (flags & GC_ALLOC_FINALIZE)
        {
            // the heap may have changed due to heap balancing or heaps going out of service
            // to register the object for finalization on the heap it was allocated on
#ifdef DYNAMIC_HEAP_COUNT
            hp = (newAlloc == nullptr) ? acontext->get_alloc_heap()->pGenGCHeap : gc_heap::heap_of ((uint8_t*)newAlloc);
#else //DYNAMIC_HEAP_COUNT
            hp = acontext->get_alloc_heap()->pGenGCHeap;
            assert ((newAlloc == nullptr) || (hp == gc_heap::heap_of ((uint8_t*)newAlloc)));
#endif //DYNAMIC_HEAP_COUNT
        }
#endif //MULTIPLE_HEAPS

#ifdef FEATURE_STRUCTALIGN
        newAlloc = (Object*) hp->pad_for_alignment ((uint8_t*) newAlloc, requiredAlignment, size, acontext);
#endif // FEATURE_STRUCTALIGN
    }

    CHECK_ALLOC_AND_POSSIBLY_REGISTER_FOR_FINALIZATION(newAlloc, size, flags & GC_ALLOC_FINALIZE);
#ifdef USE_REGIONS
    assert (IsHeapPointer (newAlloc));
#endif //USE_REGIONS

    return newAlloc;
}

void
GCHeap::FixAllocContext (gc_alloc_context* context, void* arg, void *heap)
{
    alloc_context* acontext = static_cast<alloc_context*>(context);
#ifdef MULTIPLE_HEAPS

    if (arg != 0)
        acontext->init_alloc_count();

    uint8_t * alloc_ptr = acontext->alloc_ptr;

    if (!alloc_ptr)
        return;

    // The acontext->alloc_heap can be out of sync with the ptrs because
    // of heap re-assignment in allocate
    gc_heap* hp = gc_heap::heap_of (alloc_ptr);
#else
    gc_heap* hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS

    if (heap == NULL || heap == hp)
    {
        hp->fix_allocation_context (acontext, ((arg != 0)? TRUE : FALSE), TRUE);
    }
}

Object*
GCHeap::GetContainingObject (void *pInteriorPtr, bool fCollectedGenOnly)
{
    uint8_t *o = (uint8_t*)pInteriorPtr;

    if (!gc_heap::is_in_find_object_range (o))
    {
        return NULL;
    }

    gc_heap* hp = gc_heap::heap_of (o);

#ifdef USE_REGIONS
    if (fCollectedGenOnly && !gc_heap::is_in_condemned_gc (o))
    {
        return NULL;
    }

#else //USE_REGIONS

    uint8_t* lowest = (fCollectedGenOnly ? hp->gc_low : hp->lowest_address);
    uint8_t* highest = (fCollectedGenOnly ? hp->gc_high : hp->highest_address);

    if (!((o >= lowest) && (o < highest)))
    {
        return NULL;
    }
#endif //USE_REGIONS

    return (Object*)(hp->find_object (o));
}

BOOL should_collect_optimized (dynamic_data* dd, BOOL low_memory_p)
{
    if (dd_new_allocation (dd) < 0)
    {
        return TRUE;
    }

    if (((float)(dd_new_allocation (dd)) / (float)dd_desired_allocation (dd)) < (low_memory_p ? 0.7 : 0.3))
    {
        return TRUE;
    }

    return FALSE;
}

//----------------------------------------------------------------------------
// #GarbageCollector
//
//  API to ensure that a complete new garbage collection takes place
//
HRESULT
GCHeap::GarbageCollect (int generation, bool low_memory_p, int mode)
{
#if defined(HOST_64BIT)
    if (low_memory_p)
    {
        size_t total_allocated = 0;
        size_t total_desired = 0;
#ifdef MULTIPLE_HEAPS
        int hn = 0;
        for (hn = 0; hn < gc_heap::n_heaps; hn++)
        {
            gc_heap* hp = gc_heap::g_heaps [hn];
            total_desired += dd_desired_allocation (hp->dynamic_data_of (0));
            total_allocated += dd_desired_allocation (hp->dynamic_data_of (0))-
                dd_new_allocation (hp->dynamic_data_of (0));
        }
#else
        gc_heap* hp = pGenGCHeap;
        total_desired = dd_desired_allocation (hp->dynamic_data_of (0));
        total_allocated = dd_desired_allocation (hp->dynamic_data_of (0))-
            dd_new_allocation (hp->dynamic_data_of (0));
#endif //MULTIPLE_HEAPS

        if ((total_desired > gc_heap::mem_one_percent) && (total_allocated < gc_heap::mem_one_percent))
        {
            dprintf (2, ("Async low mem but we've only allocated %zu (< 10%% of physical mem) out of %zu, returning",
                         total_allocated, total_desired));

            return S_OK;
        }
    }
#endif // HOST_64BIT

#ifdef MULTIPLE_HEAPS
    gc_heap* hpt = gc_heap::g_heaps[0];
#else
    gc_heap* hpt = 0;
#endif //MULTIPLE_HEAPS

    generation = (generation < 0) ? max_generation : min (generation, (int)max_generation);
    dynamic_data* dd = hpt->dynamic_data_of (generation);

#ifdef BACKGROUND_GC
    if (gc_heap::background_running_p())
    {
        if ((mode == collection_optimized) || (mode & collection_non_blocking))
        {
            return S_OK;
        }
        if (mode & collection_blocking)
        {
            pGenGCHeap->background_gc_wait();
            if (mode & collection_optimized)
            {
                return S_OK;
            }
        }
    }
#endif //BACKGROUND_GC

    if (mode & collection_optimized)
    {
        if (pGenGCHeap->gc_started)
        {
            return S_OK;
        }
        else
        {
            BOOL should_collect = FALSE;
            BOOL should_check_uoh = (generation == max_generation);
#ifdef MULTIPLE_HEAPS
            for (int heap_number = 0; heap_number < gc_heap::n_heaps; heap_number++)
            {
                dynamic_data* dd1 = gc_heap::g_heaps [heap_number]->dynamic_data_of (generation);
                should_collect = should_collect_optimized (dd1, low_memory_p);
                if (should_check_uoh)
                {
                    for (int i = uoh_start_generation; i < total_generation_count && !should_collect; i++)
                    {
                        should_collect = should_collect_optimized (gc_heap::g_heaps [heap_number]->dynamic_data_of (i), low_memory_p);
                    }
                }

                if (should_collect)
                    break;
            }
#else
            should_collect = should_collect_optimized (dd, low_memory_p);
            if (should_check_uoh)
            {
                for (int i = uoh_start_generation; i < total_generation_count && !should_collect; i++)
                {
                    should_collect = should_collect_optimized (hpt->dynamic_data_of (i), low_memory_p);
                }
            }
#endif //MULTIPLE_HEAPS
            if (!should_collect)
            {
                return S_OK;
            }
        }
    }

    size_t CollectionCountAtEntry = dd_collection_count (dd);
    size_t BlockingCollectionCountAtEntry = gc_heap::full_gc_counts[gc_type_blocking];
    size_t CurrentCollectionCount = 0;

retry:

    CurrentCollectionCount = GarbageCollectTry(generation, low_memory_p, mode);

    if ((mode & collection_blocking) &&
        (generation == max_generation) &&
        (gc_heap::full_gc_counts[gc_type_blocking] == BlockingCollectionCountAtEntry))
    {
#ifdef BACKGROUND_GC
        if (gc_heap::background_running_p())
        {
            pGenGCHeap->background_gc_wait();
        }
#endif //BACKGROUND_GC

        goto retry;
    }

    if (CollectionCountAtEntry == CurrentCollectionCount)
    {
        goto retry;
    }

    return S_OK;
}

size_t
GCHeap::GarbageCollectTry (int generation, BOOL low_memory_p, int mode)
{
    int gen = (generation < 0) ?
               max_generation : min (generation, (int)max_generation);

    gc_reason reason = reason_empty;

    if (low_memory_p)
    {
        if (mode & collection_blocking)
        {
            reason = reason_lowmemory_blocking;
        }
        else
        {
            reason = reason_lowmemory;
        }
    }
    else
    {
        reason = reason_induced;
    }

    if (reason == reason_induced)
    {
        if (mode & collection_aggressive)
        {
            reason = reason_induced_aggressive;
        }
        else if (mode & collection_compacting)
        {
            reason = reason_induced_compacting;
        }
        else if (mode & collection_non_blocking)
        {
            reason = reason_induced_noforce;
        }
#ifdef STRESS_HEAP
        else if (mode & collection_gcstress)
        {
            reason = reason_gcstress;
        }
#endif
    }

    return GarbageCollectGeneration (gen, reason);
}

#ifdef BACKGROUND_GC
void gc_heap::add_bgc_pause_duration_0()
{
    if (settings.concurrent)
    {
        uint64_t suspended_end_ts = GetHighPrecisionTimeStamp();
        size_t pause_duration = (size_t)(suspended_end_ts - suspended_start_time);
        last_recorded_gc_info* last_gc_info = &(last_bgc_info[last_bgc_info_index]);
        last_gc_info->pause_durations[0] = pause_duration;
        if (last_gc_info->index < last_ephemeral_gc_info.index)
        {
            last_gc_info->pause_durations[0] -= last_ephemeral_gc_info.pause_durations[0];
        }

        total_suspended_time += last_gc_info->pause_durations[0];
    }
}

last_recorded_gc_info* gc_heap::get_completed_bgc_info()
{
    int completed_bgc_index = gc_heap::background_running_p() ?
        (int)(!(gc_heap::last_bgc_info_index)) : (int)gc_heap::last_bgc_info_index;
    return &gc_heap::last_bgc_info[completed_bgc_index];
}
#endif //BACKGROUND_GC

const char* gc_heap::get_str_gc_type()
{
#ifdef BACKGROUND_GC
    return (settings.concurrent ? "BGC" : (gc_heap::background_running_p () ? "FGC" : "NGC"));
#else // BACKGROUND_GC
    return "NGC";
#endif // BACKGROUND_GC
}

void gc_heap::do_pre_gc()
{
    STRESS_LOG_GC_STACK;

#ifdef STRESS_LOG
    STRESS_LOG_GC_START(VolatileLoad(&settings.gc_index),
                        (uint32_t)settings.condemned_generation,
                        (uint32_t)settings.reason);
#endif // STRESS_LOG

#ifdef MULTIPLE_HEAPS
    gc_heap* hp = g_heaps[0];
#else
    gc_heap* hp = 0;
#endif //MULTIPLE_HEAPS

#ifdef BACKGROUND_GC
    settings.b_state = hp->current_bgc_state;
    if (settings.concurrent)
    {
        last_bgc_info_index = !last_bgc_info_index;
        last_bgc_info[last_bgc_info_index].index = settings.gc_index;
    }
#endif //BACKGROUND_GC

#ifdef TRACE_GC
    size_t total_allocated_since_last_gc[total_oh_count];
    get_total_allocated_since_last_gc (total_allocated_since_last_gc);
    bool compatibleWithStressLog = true;
#ifdef SIMPLE_DPRINTF
    compatibleWithStressLog = false;
#endif //SIMPLE_DPRINTF
    bgc_state b_state = bgc_not_in_process;
#ifdef BACKGROUND_GC
    b_state = settings.b_state;
#endif //BACKGROUND_GC

    size_t heap_size_before = get_total_heap_size();
    uint64_t start_gc_time = GetHighPrecisionTimeStamp();
    uint64_t elapsed_since_last_gc_us = start_gc_time - last_alloc_reset_suspended_end_time;
    max_peak_heap_size = max (max_peak_heap_size, heap_size_before);

    dprintf (6666, (ThreadStressLog::gcDetailedStartMsg(compatibleWithStressLog),
        VolatileLoad(&settings.gc_index),
        dd_collection_count (hp->dynamic_data_of (0)),
        settings.condemned_generation,
        (elapsed_since_last_gc_us / 1000.0),
        total_allocated_since_last_gc[gc_oh_num::soh],
        (dd_desired_allocation (hp->dynamic_data_of (0)) * n_heaps),
        dd_desired_allocation (hp->dynamic_data_of (0)),
        (elapsed_since_last_gc_us ? (total_allocated_since_last_gc[gc_oh_num::soh] / 1000.0 / elapsed_since_last_gc_us) : 0),
        total_allocated_since_last_gc[gc_oh_num::loh],
        (elapsed_since_last_gc_us ? (total_allocated_since_last_gc[gc_oh_num::loh] / 1000.0 / elapsed_since_last_gc_us) : 0),
        total_allocated_since_last_gc[gc_oh_num::poh],
        (elapsed_since_last_gc_us ? (total_allocated_since_last_gc[gc_oh_num::poh] / 1000.0 / elapsed_since_last_gc_us) : 0),
        get_str_gc_type(),
        b_state,
        n_heaps
        SIMPLE_DPRINTF_ARG(heap_size_before / 1000.0 / 1000.0)
        SIMPLE_DPRINTF_ARG(max_peak_heap_size / 1000.0 / 1000.0)));

    if (heap_hard_limit)
    {
        size_t total_heap_committed = get_total_committed_size();
        size_t total_heap_committed_recorded = current_total_committed - current_total_committed_bookkeeping;
        dprintf (1, ("(%d)GC commit BEG #%zd: %zd (recorded: %zd = %zd-%zd)",
            settings.condemned_generation,
            (size_t)settings.gc_index, total_heap_committed, total_heap_committed_recorded,
            current_total_committed, current_total_committed_bookkeeping));
    }
#endif //TRACE_GC

    GCHeap::UpdatePreGCCounters();
    fire_committed_usage_event();

#if defined(__linux__)
    GCToEEInterface::UpdateGCEventStatus(static_cast<int>(GCEventStatus::GetEnabledLevel(GCEventProvider_Default)),
                                         static_cast<int>(GCEventStatus::GetEnabledKeywords(GCEventProvider_Default)),
                                         static_cast<int>(GCEventStatus::GetEnabledLevel(GCEventProvider_Private)),
                                         static_cast<int>(GCEventStatus::GetEnabledKeywords(GCEventProvider_Private)));
#endif // __linux__

    if (settings.concurrent)
    {
#ifdef BACKGROUND_GC
        full_gc_counts[gc_type_background]++;
#endif // BACKGROUND_GC
    }
    else
    {
        if (settings.condemned_generation == max_generation)
        {
            full_gc_counts[gc_type_blocking]++;
        }
        else
        {
#ifdef BACKGROUND_GC
            if (settings.background_p)
            {
                ephemeral_fgc_counts[settings.condemned_generation]++;
            }
#endif //BACKGROUND_GC
        }
    }
}

#ifdef GC_CONFIG_DRIVEN
void gc_heap::record_interesting_info_per_heap()
{
    // datapoints are always from the last blocking GC so don't record again
    // for BGCs.
    if (!(settings.concurrent))
    {
        for (int i = 0; i < max_idp_count; i++)
        {
            interesting_data_per_heap[i] += interesting_data_per_gc[i];
        }
    }

    int compact_reason = get_gc_data_per_heap()->get_mechanism (gc_heap_compact);
    if (compact_reason >= 0)
        (compact_reasons_per_heap[compact_reason])++;
    int expand_mechanism = get_gc_data_per_heap()->get_mechanism (gc_heap_expand);
    if (expand_mechanism >= 0)
        (expand_mechanisms_per_heap[expand_mechanism])++;

    for (int i = 0; i < max_gc_mechanism_bits_count; i++)
    {
        if (get_gc_data_per_heap()->is_mechanism_bit_set ((gc_mechanism_bit_per_heap)i))
            (interesting_mechanism_bits_per_heap[i])++;
    }

    //         h#  | GC  | gen | C   | EX  | NF  | BF  | ML  | DM  || PreS | PostS | Merge | Conv | Pre | Post | PrPo | PreP | PostP |
    cprintf (("%2d | %6d | %1d | %1s | %2s | %2s | %2s | %2s | %2s || %5Id | %5Id | %5Id | %5Id | %5Id | %5Id | %5Id | %5Id | %5Id |",
            heap_number,
            (size_t)settings.gc_index,
            settings.condemned_generation,
            // TEMP - I am just doing this for wks GC 'cause I wanna see the pattern of doing C/S GCs.
            (settings.compaction ? (((compact_reason >= 0) && gc_heap_compact_reason_mandatory_p[compact_reason]) ? "M" : "W") : ""), // compaction
            ((expand_mechanism >= 0)? "X" : ""), // EX
            ((expand_mechanism == expand_reuse_normal) ? "X" : ""), // NF
            ((expand_mechanism == expand_reuse_bestfit) ? "X" : ""), // BF
            (get_gc_data_per_heap()->is_mechanism_bit_set (gc_mark_list_bit) ? "X" : ""), // ML
            (get_gc_data_per_heap()->is_mechanism_bit_set (gc_demotion_bit) ? "X" : ""), // DM
            interesting_data_per_gc[idp_pre_short],
            interesting_data_per_gc[idp_post_short],
            interesting_data_per_gc[idp_merged_pin],
            interesting_data_per_gc[idp_converted_pin],
            interesting_data_per_gc[idp_pre_pin],
            interesting_data_per_gc[idp_post_pin],
            interesting_data_per_gc[idp_pre_and_post_pin],
            interesting_data_per_gc[idp_pre_short_padded],
            interesting_data_per_gc[idp_post_short_padded]));
}

void gc_heap::record_global_mechanisms()
{
    for (int i = 0; i < max_global_mechanisms_count; i++)
    {
        if (gc_data_global.get_mechanism_p ((gc_global_mechanism_p)i))
        {
            ::record_global_mechanism (i);
        }
    }
}

BOOL gc_heap::should_do_sweeping_gc (BOOL compact_p)
{
    if (!compact_ratio)
        return (!compact_p);

    size_t compact_count = compact_or_sweep_gcs[0];
    size_t sweep_count = compact_or_sweep_gcs[1];

    size_t total_count = compact_count + sweep_count;
    BOOL should_compact = compact_p;
    if (total_count > 3)
    {
        if (compact_p)
        {
            int temp_ratio = (int)((compact_count + 1) * 100 / (total_count + 1));
            if (temp_ratio > compact_ratio)
            {
                // cprintf (("compact would be: %d, total_count: %d, ratio would be %d%% > target\n",
                //     (compact_count + 1), (total_count + 1), temp_ratio));
                should_compact = FALSE;
            }
        }
        else
        {
            int temp_ratio = (int)((sweep_count + 1) * 100 / (total_count + 1));
            if (temp_ratio > (100 - compact_ratio))
            {
                // cprintf (("sweep would be: %d, total_count: %d, ratio would be %d%% > target\n",
                //     (sweep_count + 1), (total_count + 1), temp_ratio));
                should_compact = TRUE;
            }
        }
    }

    return !should_compact;
}
#endif //GC_CONFIG_DRIVEN

#ifdef BGC_SERVO_TUNING
// virtual_fl_size is only used for NGC2
void gc_heap::check_and_adjust_bgc_tuning (int gen_number, size_t physical_size, ptrdiff_t virtual_fl_size)
{
    // For LOH we need to check more often to catch things like when the size grows too much.
    int min_gen_to_check = ((gen_number == max_generation) ? (max_generation - 1) : 0);

    if (settings.condemned_generation >= min_gen_to_check)
    {
#ifdef MULTIPLE_HEAPS
        gc_heap* hp = g_heaps[0];
#else
        gc_heap* hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS

        size_t total_gen_size = physical_size;
        size_t total_generation_fl_size = get_total_generation_fl_size (gen_number);
        double gen_flr = (double)total_generation_fl_size * 100.0 / (double)total_gen_size;
        size_t gen1_index = dd_collection_count (hp->dynamic_data_of (max_generation - 1));
        size_t gen2_index = dd_collection_count (hp->dynamic_data_of (max_generation));

        bgc_tuning::tuning_calculation* current_gen_calc = &bgc_tuning::gen_calc[gen_number - max_generation];
        bgc_tuning::tuning_stats* current_gen_stats = &bgc_tuning::gen_stats[gen_number - max_generation];

        bool gen_size_inc_p = (total_gen_size > current_gen_calc->last_bgc_size);

        if ((settings.condemned_generation >= min_gen_to_check) &&
            (settings.condemned_generation != max_generation))
        {
            if (gen_size_inc_p)
            {
                current_gen_stats->last_gen_increase_flr = gen_flr;
                dprintf (BGC_TUNING_LOG, ("BTLp[g1: %zd, g2: %zd]: gen%d size inc %s %zd->%zd, flr: %.3f",
                        gen1_index, gen2_index, gen_number,
                        (gc_heap::background_running_p() ? "during bgc" : ""),
                        current_gen_stats->last_bgc_physical_size, total_gen_size, gen_flr));
            }

            if (!bgc_tuning::fl_tuning_triggered)
            {
                if (bgc_tuning::enable_fl_tuning)
                {
                    if (!((gc_heap::background_running_p() || (hp->current_bgc_state == bgc_initialized))))
                    {
                        assert (settings.entry_memory_load);

                        // We start when we are 2/3 way there so we don't overshoot.
                        if ((settings.entry_memory_load >= (bgc_tuning::memory_load_goal * 2 / 3)) &&
                            (full_gc_counts[gc_type_background] >= 2))
                        {
                            bgc_tuning::next_bgc_p = true;
                            current_gen_calc->first_alloc_to_trigger = get_total_servo_alloc (gen_number);
                            dprintf (BGC_TUNING_LOG, ("BTL[g1: %zd] mem high enough: %d(goal: %d), gen%d fl alloc: %zd, trigger BGC!",
                                gen1_index, settings.entry_memory_load, bgc_tuning::memory_load_goal,
                                gen_number, current_gen_calc->first_alloc_to_trigger));
                        }
                    }
                }
            }
        }

        if ((settings.condemned_generation == max_generation) && !(settings.concurrent))
        {
            size_t total_survived = get_total_surv_size (gen_number);
            size_t total_begin = get_total_begin_data_size (gen_number);
            double current_gc_surv_rate = (double)total_survived * 100.0 / (double)total_begin;

            // calculate the adjusted gen_flr.
            double total_virtual_size = (double)physical_size + (double)virtual_fl_size;
            double total_fl_size = (double)total_generation_fl_size + (double)virtual_fl_size;
            double new_gen_flr = total_fl_size * 100.0 / total_virtual_size;

            dprintf (BGC_TUNING_LOG, ("BTL%d NGC2 size %zd->%zd, fl %zd(%.3f)->%zd(%.3f)",
                gen_number, physical_size, (size_t)total_virtual_size,
                total_generation_fl_size, gen_flr,
                (size_t)total_fl_size, new_gen_flr));

            dprintf (BGC_TUNING_LOG, ("BTL%d* %zd, %.3f, %.3f, %.3f, %.3f, %.3f, %d, %d, %d, %zd",
                                    gen_number,
                                    (size_t)total_virtual_size,
                                    0.0,
                                    0.0,
                                    new_gen_flr,
                                    current_gen_stats->last_gen_increase_flr,
                                    current_gc_surv_rate,
                                    0,
                                    0,
                                    0,
                                    current_gen_calc->alloc_to_trigger));

            bgc_tuning::gen1_index_last_bgc_end = gen1_index;

            current_gen_calc->last_bgc_size = total_gen_size;
            current_gen_calc->last_bgc_flr = new_gen_flr;
            current_gen_calc->last_sweep_above_p = false;
            current_gen_calc->last_bgc_end_alloc = 0;

            current_gen_stats->last_alloc_end_to_start = 0;
            current_gen_stats->last_alloc_start_to_sweep = 0;
            current_gen_stats->last_alloc_sweep_to_end = 0;
            current_gen_stats->last_bgc_fl_size = total_generation_fl_size;
            current_gen_stats->last_bgc_surv_rate = current_gc_surv_rate;
            current_gen_stats->last_gen_increase_flr = 0;
        }
    }
}
#endif //BGC_SERVO_TUNING

#ifdef BACKGROUND_GC
void gc_heap::get_and_reset_uoh_alloc_info()
{
    total_uoh_a_last_bgc = 0;

    uint64_t total_uoh_a_no_bgc = 0;
    uint64_t total_uoh_a_bgc_marking = 0;
    uint64_t total_uoh_a_bgc_planning = 0;
#ifdef MULTIPLE_HEAPS
    for (int i = 0; i < gc_heap::n_heaps; i++)
    {
        gc_heap* hp = gc_heap::g_heaps[i];
#else //MULTIPLE_HEAPS
    {
        gc_heap* hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS

        // We need to adjust size_before for UOH allocations that occurred during marking
        // before we lose the values here.
        gc_history_per_heap* current_gc_data_per_heap = hp->get_gc_data_per_heap();
        // loh/poh_a_bgc_planning should be the same as they were when init_records set size_before.
        for (int i = uoh_start_generation; i < total_generation_count; i++)
        {
            current_gc_data_per_heap->gen_data[i].size_before += hp->uoh_a_bgc_marking[i - uoh_start_generation];

            total_uoh_a_no_bgc += hp->uoh_a_no_bgc[i - uoh_start_generation];
            hp->uoh_a_no_bgc[i - uoh_start_generation] = 0;

            total_uoh_a_bgc_marking += hp->uoh_a_bgc_marking[i - uoh_start_generation];
            hp->uoh_a_bgc_marking[i - uoh_start_generation] = 0;

            total_uoh_a_bgc_planning += hp->uoh_a_bgc_planning[i - uoh_start_generation];
            hp->uoh_a_bgc_planning[i - uoh_start_generation] = 0;
        }
    }
    dprintf (2, ("LOH alloc: outside bgc: %zd; bm: %zd; bp: %zd",
        total_uoh_a_no_bgc,
        total_uoh_a_bgc_marking,
        total_uoh_a_bgc_planning));

    total_uoh_a_last_bgc = total_uoh_a_no_bgc + total_uoh_a_bgc_marking + total_uoh_a_bgc_planning;
}
#endif //BACKGROUND_GC

bool gc_heap::is_pm_ratio_exceeded()
{
    size_t maxgen_frag = 0;
    size_t maxgen_size = 0;
    size_t total_heap_size = get_total_heap_size();

#ifdef MULTIPLE_HEAPS
    for (int i = 0; i < gc_heap::n_heaps; i++)
    {
        gc_heap* hp = gc_heap::g_heaps[i];
#else //MULTIPLE_HEAPS
    {
        gc_heap* hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS

        maxgen_frag += dd_fragmentation (hp->dynamic_data_of (max_generation));
        maxgen_size += hp->generation_size (max_generation);
    }

    double maxgen_ratio = (double)maxgen_size / (double)total_heap_size;
    double maxgen_frag_ratio = (double)maxgen_frag / (double)maxgen_size;
    dprintf (GTC_LOG, ("maxgen %zd(%d%% total heap), frag: %zd (%d%% maxgen)",
        maxgen_size, (int)(maxgen_ratio * 100.0),
        maxgen_frag, (int)(maxgen_frag_ratio * 100.0)));

    bool maxgen_highfrag_p = ((maxgen_ratio > 0.5) && (maxgen_frag_ratio > 0.1));

    // We need to adjust elevation here because if there's enough fragmentation it's not
    // unproductive.
    if (maxgen_highfrag_p)
    {
        settings.should_lock_elevation = FALSE;
        dprintf (GTC_LOG, ("high frag gen2, turn off elevation"));
    }

    return maxgen_highfrag_p;
}

void gc_heap::update_recorded_gen_data (last_recorded_gc_info* gc_info)
{
    memset (gc_info->gen_info, 0, sizeof (gc_info->gen_info));

#ifdef MULTIPLE_HEAPS
    for (int i = 0; i < gc_heap::n_heaps; i++)
    {
        gc_heap* hp = gc_heap::g_heaps[i];
#else //MULTIPLE_HEAPS
    {
        gc_heap* hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS

        gc_history_per_heap* current_gc_data_per_heap = hp->get_gc_data_per_heap();
        for (int gen_number = 0; gen_number < total_generation_count; gen_number++)
        {
            recorded_generation_info* recorded_info = &(gc_info->gen_info[gen_number]);
            gc_generation_data* data = &(current_gc_data_per_heap->gen_data[gen_number]);
            recorded_info->size_before += data->size_before;
            recorded_info->fragmentation_before += data->free_list_space_before + data->free_obj_space_before;
            recorded_info->size_after += data->size_after;
            recorded_info->fragmentation_after += data->free_list_space_after + data->free_obj_space_after;
        }
    }
}

void gc_heap::do_post_gc()
{
#ifdef MULTIPLE_HEAPS
    gc_heap* hp = g_heaps[0];
#else
    gc_heap* hp = 0;
#endif //MULTIPLE_HEAPS

    GCToEEInterface::GcDone(settings.condemned_generation);

    GCToEEInterface::DiagGCEnd(VolatileLoad(&settings.gc_index),
                         (uint32_t)settings.condemned_generation,
                         (uint32_t)settings.reason,
                         !!settings.concurrent);

    add_to_history();

    uint32_t current_memory_load = 0;

#ifdef BGC_SERVO_TUNING
    if (bgc_tuning::enable_fl_tuning)
    {
        uint64_t current_available_physical = 0;
        size_t gen2_physical_size = 0;
        size_t gen3_physical_size = 0;
        ptrdiff_t gen2_virtual_fl_size = 0;
        ptrdiff_t gen3_virtual_fl_size = 0;
        ptrdiff_t vfl_from_kp = 0;
        ptrdiff_t vfl_from_ki = 0;

        gen2_physical_size = get_total_generation_size (max_generation);
        gen3_physical_size = get_total_generation_size (loh_generation);

        get_memory_info (&current_memory_load, &current_available_physical);
        if ((settings.condemned_generation == max_generation) && !settings.concurrent)
        {
            double gen2_size_ratio = (double)gen2_physical_size / ((double)gen2_physical_size + (double)gen3_physical_size);

            double total_virtual_fl_size = bgc_tuning::calculate_ml_tuning (current_available_physical, true, &vfl_from_kp, &vfl_from_ki);
            gen2_virtual_fl_size = (ptrdiff_t)(total_virtual_fl_size * gen2_size_ratio);
            gen3_virtual_fl_size = (ptrdiff_t)(total_virtual_fl_size * (1.0 - gen2_size_ratio));

#ifdef SIMPLE_DPRINTF
            dprintf (BGC_TUNING_LOG, ("BTL: ml: %d (g: %d)(%s), a: %zd (g: %zd, elg: %zd+%zd=%zd, %zd+%zd=%zd), vfl: %zd=%zd+%zd(NGC2)",
                current_memory_load, bgc_tuning::memory_load_goal,
                ((current_available_physical > bgc_tuning::available_memory_goal) ? "above" : "below"),
                current_available_physical, bgc_tuning::available_memory_goal,
                gen2_physical_size, gen2_virtual_fl_size, (gen2_physical_size + gen2_virtual_fl_size),
                gen3_physical_size, gen3_virtual_fl_size, (gen3_physical_size + gen3_virtual_fl_size),
                (ptrdiff_t)total_virtual_fl_size, vfl_from_kp, vfl_from_ki));
#endif //SIMPLE_DPRINTF
        }

        check_and_adjust_bgc_tuning (max_generation, gen2_physical_size, gen2_virtual_fl_size);
        check_and_adjust_bgc_tuning (loh_generation, gen3_physical_size, gen3_virtual_fl_size);
    }
#endif //BGC_SERVO_TUNING

    dprintf (6666, (ThreadStressLog::gcDetailedEndMsg(),
        VolatileLoad (&settings.gc_index),
        dd_collection_count (hp->dynamic_data_of (0)),
        (get_total_heap_size() / 1000.0 / 1000.0),
        settings.condemned_generation,
        get_str_gc_type(),
        (settings.compaction ? "C" : "S"),
        (settings.promotion ? "P" : "S"),
        settings.entry_memory_load,
        current_memory_load));

#if defined(TRACE_GC) && defined(SIMPLE_DPRINTF)
    flush_gc_log (false);
#endif //TRACE_GC && SIMPLE_DPRINTF

    // Now record the gc info.
    last_recorded_gc_info* last_gc_info = 0;
#ifdef BACKGROUND_GC
    if (settings.concurrent)
    {
        last_gc_info = &last_bgc_info[last_bgc_info_index];
        assert (last_gc_info->index == settings.gc_index);
    }
    else
#endif //BACKGROUND_GC
    {
        last_gc_info = ((settings.condemned_generation == max_generation) ?
                        &last_full_blocking_gc_info : &last_ephemeral_gc_info);
        last_gc_info->index = settings.gc_index;
    }
    size_t total_heap_committed = get_total_committed_size();
    last_gc_info->total_committed = total_heap_committed;
    last_gc_info->promoted = get_total_promoted();
    last_gc_info->pinned_objects = get_total_pinned_objects();
    last_gc_info->finalize_promoted_objects = GCHeap::GetFinalizablePromotedCount();

    if (!settings.concurrent)
    {
        // If it's a normal blocking GC with its own SuspendEE, we simply get the elapsed time recoreded
        // and add the time between SuspendEE start and GC start.
        dynamic_data* dd = hp->dynamic_data_of (settings.condemned_generation);
        uint64_t gc_start_ts = dd_time_clock (dd);
        size_t pause_duration = (size_t)(end_gc_time - dd_time_clock (dd));

#ifdef BACKGROUND_GC
        if ((hp->current_bgc_state != bgc_initialized) && (settings.reason != reason_pm_full_gc))
        {
            pause_duration += (size_t)(gc_start_ts - suspended_start_time);
        }
#endif //BACKGROUND_GC

        last_gc_info->pause_durations[0] = pause_duration;
        total_suspended_time += pause_duration;
        last_gc_info->pause_durations[1] = 0;
    }

    uint64_t total_process_time = end_gc_time - process_start_time;
    last_gc_info->pause_percentage = (float)(total_process_time ?
        ((double)total_suspended_time / (double)total_process_time * 100.0) : 0);

    update_recorded_gen_data (last_gc_info);
    last_gc_info->heap_size = get_total_heap_size();
    last_gc_info->fragmentation = get_total_fragmentation();
    if (settings.exit_memory_load != 0)
        last_gc_info->memory_load = settings.exit_memory_load;
    else if (settings.entry_memory_load != 0)
        last_gc_info->memory_load = settings.entry_memory_load;
    last_gc_info->condemned_generation = (uint8_t)settings.condemned_generation;
    last_gc_info->compaction = settings.compaction;
    last_gc_info->concurrent = settings.concurrent;

#ifdef BACKGROUND_GC
    is_last_recorded_bgc = settings.concurrent;
#endif //BACKGROUND_GC

#ifdef TRACE_GC
    if (heap_hard_limit)
    {
        size_t total_heap_committed_recorded = current_total_committed - current_total_committed_bookkeeping;
        dprintf (1, ("(%d)GC commit END #%zd: %zd (recorded: %zd=%zd-%zd), heap %zd, frag: %zd",
            settings.condemned_generation,
            (size_t)settings.gc_index, total_heap_committed, total_heap_committed_recorded,
            current_total_committed, current_total_committed_bookkeeping,
            last_gc_info->heap_size, last_gc_info->fragmentation));
    }
#endif //TRACE_GC

    // Note we only do this at the end of full blocking GCs because we do not want
    // to turn on this provisional mode during the middle of a BGC.
    if ((settings.condemned_generation == max_generation) && (!settings.concurrent))
    {
        if (pm_stress_on)
        {
            size_t full_compacting_gc_count = full_gc_counts[gc_type_compacting];
            if (provisional_mode_triggered)
            {
                uint64_t r = gc_rand::get_rand(10);
                if ((full_compacting_gc_count - provisional_triggered_gc_count) >= r)
                {
                    provisional_mode_triggered = false;
                    provisional_off_gc_count = full_compacting_gc_count;
                    dprintf (GTC_LOG, ("%zd NGC2s when turned on, %zd NGCs since(%zd)",
                        provisional_triggered_gc_count, (full_compacting_gc_count - provisional_triggered_gc_count),
                        num_provisional_triggered));
                }
            }
            else
            {
                uint64_t r = gc_rand::get_rand(5);
                if ((full_compacting_gc_count - provisional_off_gc_count) >= r)
                {
                    provisional_mode_triggered = true;
                    provisional_triggered_gc_count = full_compacting_gc_count;
                    num_provisional_triggered++;
                    dprintf (GTC_LOG, ("%zd NGC2s when turned off, %zd NGCs since(%zd)",
                        provisional_off_gc_count, (full_compacting_gc_count - provisional_off_gc_count),
                        num_provisional_triggered));
                }
            }
        }
        else
        {
            if (provisional_mode_triggered)
            {
                if ((settings.entry_memory_load < high_memory_load_th) ||
                    !is_pm_ratio_exceeded())
                {
                    dprintf (GTC_LOG, ("turning off PM"));
                    provisional_mode_triggered = false;
                }
            }
            else if ((settings.entry_memory_load >= high_memory_load_th) && is_pm_ratio_exceeded())
            {
                dprintf (GTC_LOG, ("highmem && highfrag - turning on PM"));
                provisional_mode_triggered = true;
                num_provisional_triggered++;
            }
        }
    }

    if (!settings.concurrent)
    {
        fire_committed_usage_event ();
    }
    GCHeap::UpdatePostGCCounters();

    // We need to reinitialize the number of pinned objects because it's used in the GCHeapStats
    // event fired in GCHeap::UpdatePostGCCounters. For BGC, we will get that event following an
    // FGC's GCHeapStats and we wouldn't want that FGC's info to carry over to the BGC.
    reinit_pinned_objects();

#ifdef STRESS_LOG
    STRESS_LOG_GC_END(VolatileLoad(&settings.gc_index),
                      (uint32_t)settings.condemned_generation,
                      (uint32_t)settings.reason);
#endif // STRESS_LOG

#ifdef GC_CONFIG_DRIVEN
    if (!settings.concurrent)
    {
        if (settings.compaction)
            (compact_or_sweep_gcs[0])++;
        else
            (compact_or_sweep_gcs[1])++;
    }

#ifdef MULTIPLE_HEAPS
    for (int i = 0; i < n_heaps; i++)
        g_heaps[i]->record_interesting_info_per_heap();
#else
    record_interesting_info_per_heap();
#endif //MULTIPLE_HEAPS

    record_global_mechanisms();
#endif //GC_CONFIG_DRIVEN

    if (mark_list_overflow)
    {
        grow_mark_list();
        mark_list_overflow = false;
    }
}

unsigned GCHeap::GetGcCount()
{
    return (unsigned int)VolatileLoad(&pGenGCHeap->settings.gc_index);
}

size_t
GCHeap::GarbageCollectGeneration (unsigned int gen, gc_reason reason)
{
    dprintf (2, ("triggered a GC!"));

#ifdef COMMITTED_BYTES_SHADOW
    // This stress the refresh memory limit work by
    // refreshing all the time when a GC happens.
    GCHeap::RefreshMemoryLimit();
#endif //COMMITTED_BYTES_SHADOW

#ifdef MULTIPLE_HEAPS
    gc_heap* hpt = gc_heap::g_heaps[0];
#else
    gc_heap* hpt = 0;
#endif //MULTIPLE_HEAPS
    bool cooperative_mode = true;
    dynamic_data* dd = hpt->dynamic_data_of (gen);
    size_t localCount = dd_collection_count (dd);

    enter_spin_lock (&gc_heap::gc_lock);
    dprintf (SPINLOCK_LOG, ("GC Egc"));
    ASSERT_HOLDING_SPIN_LOCK(&gc_heap::gc_lock);

    //don't trigger another GC if one was already in progress
    //while waiting for the lock
    {
        size_t col_count = dd_collection_count (dd);

        if (localCount != col_count)
        {
#ifdef SYNCHRONIZATION_STATS
            gc_lock_contended++;
#endif //SYNCHRONIZATION_STATS
            dprintf (SPINLOCK_LOG, ("no need GC Lgc"));
            leave_spin_lock (&gc_heap::gc_lock);

            // We don't need to release msl here 'cause this means a GC
            // has happened and would have release all msl's.
            return col_count;
         }
    }

    gc_heap::g_low_memory_status = (reason == reason_lowmemory) ||
                                    (reason == reason_lowmemory_blocking) ||
                                    (gc_heap::latency_level == latency_level_memory_footprint);

    gc_trigger_reason = reason;

#ifdef MULTIPLE_HEAPS
    for (int i = 0; i < gc_heap::n_heaps; i++)
    {
        gc_heap::g_heaps[i]->reset_gc_done();
    }
#else
    gc_heap::reset_gc_done();
#endif //MULTIPLE_HEAPS

    gc_heap::gc_started = TRUE;

    {
        init_sync_log_stats();

#ifndef MULTIPLE_HEAPS
        cooperative_mode = gc_heap::enable_preemptive ();

        dprintf (2, ("Suspending EE"));
        gc_heap::suspended_start_time = GetHighPrecisionTimeStamp();
        BEGIN_TIMING(suspend_ee_during_log);
        GCToEEInterface::SuspendEE(SUSPEND_FOR_GC);
        END_TIMING(suspend_ee_during_log);
        gc_heap::proceed_with_gc_p = gc_heap::should_proceed_with_gc();
        gc_heap::disable_preemptive (cooperative_mode);
        if (gc_heap::proceed_with_gc_p)
            pGenGCHeap->settings.init_mechanisms();
        else
            gc_heap::update_collection_counts_for_no_gc();

#endif //!MULTIPLE_HEAPS
    }

    unsigned int condemned_generation_number = gen;

    // We want to get a stack from the user thread that triggered the GC
    // instead of on the GC thread which is the case for Server GC.
    // But we are doing it for Workstation GC as well to be uniform.
    FIRE_EVENT(GCTriggered, static_cast<uint32_t>(reason));

#ifdef MULTIPLE_HEAPS
    GcCondemnedGeneration = condemned_generation_number;

    cooperative_mode = gc_heap::enable_preemptive ();

    BEGIN_TIMING(gc_during_log);
    gc_heap::ee_suspend_event.Set();
    gc_heap::wait_for_gc_done();
    END_TIMING(gc_during_log);

    gc_heap::disable_preemptive (cooperative_mode);

    condemned_generation_number = GcCondemnedGeneration;
#else
    if (gc_heap::proceed_with_gc_p)
    {
        BEGIN_TIMING(gc_during_log);
        pGenGCHeap->garbage_collect (condemned_generation_number);
        if (gc_heap::pm_trigger_full_gc)
        {
            pGenGCHeap->garbage_collect_pm_full_gc();
        }
        END_TIMING(gc_during_log);
    }
#endif //MULTIPLE_HEAPS

#ifndef MULTIPLE_HEAPS
#ifdef BACKGROUND_GC
    if (!gc_heap::dont_restart_ee_p)
#endif //BACKGROUND_GC
    {
#ifdef BACKGROUND_GC
        gc_heap::add_bgc_pause_duration_0();
#endif //BACKGROUND_GC
        BEGIN_TIMING(restart_ee_during_log);
        GCToEEInterface::RestartEE(TRUE);
        END_TIMING(restart_ee_during_log);
    }
#endif //!MULTIPLE_HEAPS

#ifndef MULTIPLE_HEAPS
    process_sync_log_stats();
    gc_heap::gc_started = FALSE;
    gc_heap::set_gc_done();
    dprintf (SPINLOCK_LOG, ("GC Lgc"));
    leave_spin_lock (&gc_heap::gc_lock);
#endif //!MULTIPLE_HEAPS

#ifdef FEATURE_PREMORTEM_FINALIZATION
    GCToEEInterface::EnableFinalization(!pGenGCHeap->settings.concurrent && pGenGCHeap->settings.found_finalizers);
#endif // FEATURE_PREMORTEM_FINALIZATION

    return dd_collection_count (dd);
}

size_t GCHeap::GetTotalBytesInUse ()
{
    // take lock here to ensure gc_heap::n_heaps doesn't change under us
    enter_spin_lock (&pGenGCHeap->gc_lock);

#ifdef MULTIPLE_HEAPS
    //enumerate all the heaps and get their size.
    size_t tot_size = 0;
    for (int i = 0; i < gc_heap::n_heaps; i++)
    {
        GCHeap* Hp = gc_heap::g_heaps [i]->vm_heap;
        tot_size += Hp->ApproxTotalBytesInUse();
    }
#else
    size_t tot_size = ApproxTotalBytesInUse();
#endif //MULTIPLE_HEAPS
    leave_spin_lock (&pGenGCHeap->gc_lock);

    return tot_size;
}

// Get the total allocated bytes
uint64_t GCHeap::GetTotalAllocatedBytes()
{
#ifdef MULTIPLE_HEAPS
    uint64_t total_alloc_bytes = 0;
    for (int i = 0; i < gc_heap::n_heaps; i++)
    {
        gc_heap* hp = gc_heap::g_heaps[i];
        total_alloc_bytes += hp->total_alloc_bytes_soh;
        total_alloc_bytes += hp->total_alloc_bytes_uoh;
    }
    return total_alloc_bytes;
#else
    return (pGenGCHeap->total_alloc_bytes_soh +  pGenGCHeap->total_alloc_bytes_uoh);
#endif //MULTIPLE_HEAPS
}

int GCHeap::CollectionCount (int generation, int get_bgc_fgc_count)
{
    if (get_bgc_fgc_count != 0)
    {
#ifdef BACKGROUND_GC
        if (generation == max_generation)
        {
            return (int)(gc_heap::full_gc_counts[gc_type_background]);
        }
        else
        {
            return (int)(gc_heap::ephemeral_fgc_counts[generation]);
        }
#else
        return 0;
#endif //BACKGROUND_GC
    }

#ifdef MULTIPLE_HEAPS
    gc_heap* hp = gc_heap::g_heaps [0];
#else  //MULTIPLE_HEAPS
    gc_heap* hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS
    if (generation > max_generation)
        return 0;
    else
        return (int)dd_collection_count (hp->dynamic_data_of (generation));
}

size_t GCHeap::ApproxTotalBytesInUse(BOOL small_heap_only)
{
    size_t totsize = 0;

    // For gen0 it's a bit complicated because we are currently allocating in it. We get the fragmentation first
    // just so that we don't give a negative number for the resulting size.
    generation* gen = pGenGCHeap->generation_of (0);
    size_t gen0_frag = generation_free_list_space (gen) + generation_free_obj_space (gen);
    uint8_t* current_alloc_allocated = pGenGCHeap->alloc_allocated;
    heap_segment* current_eph_seg = pGenGCHeap->ephemeral_heap_segment;
    size_t gen0_size = 0;
#ifdef USE_REGIONS
    heap_segment* gen0_seg = generation_start_segment (gen);
    while (gen0_seg)
    {
        uint8_t* end = in_range_for_segment (current_alloc_allocated, gen0_seg) ?
                       current_alloc_allocated : heap_segment_allocated (gen0_seg);
        gen0_size += end - heap_segment_mem (gen0_seg);

        if (gen0_seg == current_eph_seg)
        {
            break;
        }

        gen0_seg = heap_segment_next (gen0_seg);
    }
#else //USE_REGIONS
    // For segments ephemeral seg does not change.
    gen0_size = current_alloc_allocated - heap_segment_mem (current_eph_seg);
#endif //USE_REGIONS

    totsize = gen0_size - gen0_frag;

    int stop_gen_index = max_generation;

#ifdef BACKGROUND_GC
    if (gc_heap::current_c_gc_state == c_gc_state_planning)
    {
        // During BGC sweep since we can be deleting SOH segments, we avoid walking the segment
        // list.
        generation* oldest_gen = pGenGCHeap->generation_of (max_generation);
        totsize = pGenGCHeap->background_soh_size_end_mark - generation_free_list_space (oldest_gen) - generation_free_obj_space (oldest_gen);
        stop_gen_index--;
    }
#endif //BACKGROUND_GC

    for (int i = (max_generation - 1); i <= stop_gen_index; i++)
    {
        generation* gen = pGenGCHeap->generation_of (i);
        totsize += pGenGCHeap->generation_size (i) - generation_free_list_space (gen) - generation_free_obj_space (gen);
    }

    if (!small_heap_only)
    {
        for (int i = uoh_start_generation; i < total_generation_count; i++)
        {
            generation* gen = pGenGCHeap->generation_of (i);
            totsize += pGenGCHeap->generation_size (i) - generation_free_list_space (gen) - generation_free_obj_space (gen);
        }
    }

    return totsize;
}

#ifdef MULTIPLE_HEAPS
void GCHeap::AssignHeap (alloc_context* acontext)
{
    // Assign heap based on processor
    acontext->set_alloc_heap(GetHeap(heap_select::select_heap(acontext)));
    acontext->set_home_heap(acontext->get_alloc_heap());
    acontext->init_handle_info();
}

GCHeap* GCHeap::GetHeap (int n)
{
    assert (n < gc_heap::n_heaps);
    return gc_heap::g_heaps[n]->vm_heap;
}
#endif //MULTIPLE_HEAPS

bool GCHeap::IsThreadUsingAllocationContextHeap(gc_alloc_context* context, int thread_number)
{
    alloc_context* acontext = static_cast<alloc_context*>(context);
#ifdef MULTIPLE_HEAPS
    // the thread / heap number must be in range
    assert (thread_number < gc_heap::n_heaps);
    assert ((acontext->get_home_heap() == 0) ||
            (acontext->get_home_heap()->pGenGCHeap->heap_number < gc_heap::n_heaps));

    return ((acontext->get_home_heap() == GetHeap(thread_number)) ||
            ((acontext->get_home_heap() == 0) && (thread_number == 0)));
#else
    UNREFERENCED_PARAMETER(acontext);
    UNREFERENCED_PARAMETER(thread_number);
    return true;
#endif //MULTIPLE_HEAPS
}

// Returns the number of processors required to trigger the use of thread based allocation contexts
int GCHeap::GetNumberOfHeaps ()
{
#ifdef MULTIPLE_HEAPS
    return gc_heap::n_heaps;
#else
    return 1;
#endif //MULTIPLE_HEAPS
}

/*
  in this way we spend extra time cycling through all the heaps while create the handle
  it ought to be changed by keeping alloc_context.home_heap as number (equals heap_number)
*/
int GCHeap::GetHomeHeapNumber ()
{
#ifdef MULTIPLE_HEAPS
    gc_alloc_context* ctx = GCToEEInterface::GetAllocContext();
    if (!ctx)
    {
        return 0;
    }

    GCHeap *hp = static_cast<alloc_context*>(ctx)->get_home_heap();
    return (hp ? hp->pGenGCHeap->heap_number : 0);
#else
    return 0;
#endif //MULTIPLE_HEAPS
}

unsigned int GCHeap::GetCondemnedGeneration()
{
    return gc_heap::settings.condemned_generation;
}

void GCHeap::GetMemoryInfo(uint64_t* highMemLoadThresholdBytes,
                           uint64_t* totalAvailableMemoryBytes,
                           uint64_t* lastRecordedMemLoadBytes,
                           uint64_t* lastRecordedHeapSizeBytes,
                           uint64_t* lastRecordedFragmentationBytes,
                           uint64_t* totalCommittedBytes,
                           uint64_t* promotedBytes,
                           uint64_t* pinnedObjectCount,
                           uint64_t* finalizationPendingCount,
                           uint64_t* index,
                           uint32_t* generation,
                           uint32_t* pauseTimePct,
                           bool* isCompaction,
                           bool* isConcurrent,
                           uint64_t* genInfoRaw,
                           uint64_t* pauseInfoRaw,
                           int kind)
{
    last_recorded_gc_info* last_gc_info = 0;

    if ((gc_kind)kind == gc_kind_ephemeral)
    {
        last_gc_info = &gc_heap::last_ephemeral_gc_info;
    }
    else if ((gc_kind)kind == gc_kind_full_blocking)
    {
        last_gc_info = &gc_heap::last_full_blocking_gc_info;
    }
#ifdef BACKGROUND_GC
    else if ((gc_kind)kind == gc_kind_background)
    {
        last_gc_info = gc_heap::get_completed_bgc_info();
    }
#endif //BACKGROUND_GC
    else
    {
        assert ((gc_kind)kind == gc_kind_any);
#ifdef BACKGROUND_GC
        if (gc_heap::is_last_recorded_bgc)
        {
            last_gc_info = gc_heap::get_completed_bgc_info();
        }
        else
#endif //BACKGROUND_GC
        {
            last_gc_info = ((gc_heap::last_ephemeral_gc_info.index > gc_heap::last_full_blocking_gc_info.index) ?
                &gc_heap::last_ephemeral_gc_info : &gc_heap::last_full_blocking_gc_info);
        }
    }

    *highMemLoadThresholdBytes = (uint64_t) (((double)(gc_heap::high_memory_load_th)) / 100 * gc_heap::total_physical_mem);
    *totalAvailableMemoryBytes = gc_heap::heap_hard_limit != 0 ? gc_heap::heap_hard_limit : gc_heap::total_physical_mem;
    *lastRecordedMemLoadBytes = (uint64_t) (((double)(last_gc_info->memory_load)) / 100 * gc_heap::total_physical_mem);
    *lastRecordedHeapSizeBytes = last_gc_info->heap_size;
    *lastRecordedFragmentationBytes = last_gc_info->fragmentation;
    *totalCommittedBytes = last_gc_info->total_committed;
    *promotedBytes = last_gc_info->promoted;
    *pinnedObjectCount = last_gc_info->pinned_objects;
    *finalizationPendingCount = last_gc_info->finalize_promoted_objects;
    *index = last_gc_info->index;
    *generation = last_gc_info->condemned_generation;
    *pauseTimePct = (int)(last_gc_info->pause_percentage * 100);
    *isCompaction = last_gc_info->compaction;
    *isConcurrent = last_gc_info->concurrent;
    int genInfoIndex = 0;
    for (int i = 0; i < total_generation_count; i++)
    {
        genInfoRaw[genInfoIndex++] = last_gc_info->gen_info[i].size_before;
        genInfoRaw[genInfoIndex++] = last_gc_info->gen_info[i].fragmentation_before;
        genInfoRaw[genInfoIndex++] = last_gc_info->gen_info[i].size_after;
        genInfoRaw[genInfoIndex++] = last_gc_info->gen_info[i].fragmentation_after;
    }
    for (int i = 0; i < 2; i++)
    {
        // convert it to 100-ns units that TimeSpan needs.
        pauseInfoRaw[i] = (uint64_t)(last_gc_info->pause_durations[i]) * 10;
    }

#ifdef _DEBUG
    if (VolatileLoadWithoutBarrier (&last_gc_info->index) != 0)
    {
        if ((gc_kind)kind == gc_kind_ephemeral)
        {
            assert (last_gc_info->condemned_generation < max_generation);
        }
        else if ((gc_kind)kind == gc_kind_full_blocking)
        {
            assert (last_gc_info->condemned_generation == max_generation);
            assert (last_gc_info->concurrent == false);
        }
#ifdef BACKGROUND_GC
        else if ((gc_kind)kind == gc_kind_background)
        {
            assert (last_gc_info->condemned_generation == max_generation);
            assert (last_gc_info->concurrent == true);
        }
#endif //BACKGROUND_GC
    }
#endif //_DEBUG
}

int64_t GCHeap::GetTotalPauseDuration()
{
    return (int64_t)(gc_heap::total_suspended_time * 10);
}

void GCHeap::EnumerateConfigurationValues(void* context, ConfigurationValueFunc configurationValueFunc)
{
    GCConfig::EnumerateConfigurationValues(context, configurationValueFunc);
}

uint32_t GCHeap::GetMemoryLoad()
{
    uint32_t memory_load = 0;
    if (gc_heap::settings.exit_memory_load != 0)
        memory_load = gc_heap::settings.exit_memory_load;
    else if (gc_heap::settings.entry_memory_load != 0)
        memory_load = gc_heap::settings.entry_memory_load;

    return memory_load;
}

int GCHeap::GetGcLatencyMode()
{
    return (int)(pGenGCHeap->settings.pause_mode);
}

int GCHeap::SetGcLatencyMode (int newLatencyMode)
{
    if (gc_heap::settings.pause_mode == pause_no_gc)
        return (int)set_pause_mode_no_gc;

    gc_pause_mode new_mode = (gc_pause_mode)newLatencyMode;

    if (new_mode == pause_low_latency)
    {
#ifndef MULTIPLE_HEAPS
        pGenGCHeap->settings.pause_mode = new_mode;
#endif //!MULTIPLE_HEAPS
    }
    else if (new_mode == pause_sustained_low_latency)
    {
#ifdef BACKGROUND_GC
        if (gc_heap::gc_can_use_concurrent)
        {
            pGenGCHeap->settings.pause_mode = new_mode;
        }
#endif //BACKGROUND_GC
    }
    else
    {
        pGenGCHeap->settings.pause_mode = new_mode;
    }

#ifdef BACKGROUND_GC
    if (gc_heap::background_running_p())
    {
        // If we get here, it means we are doing an FGC. If the pause
        // mode was altered we will need to save it in the BGC settings.
        if (gc_heap::saved_bgc_settings.pause_mode != new_mode)
        {
            gc_heap::saved_bgc_settings.pause_mode = new_mode;
        }
    }
#endif //BACKGROUND_GC

    return (int)set_pause_mode_success;
}

int GCHeap::GetLOHCompactionMode()
{
#ifdef FEATURE_LOH_COMPACTION
    return pGenGCHeap->loh_compaction_mode;
#else
    return loh_compaction_default;
#endif //FEATURE_LOH_COMPACTION
}

void GCHeap::SetLOHCompactionMode (int newLOHCompactionMode)
{
#ifdef FEATURE_LOH_COMPACTION
    pGenGCHeap->loh_compaction_mode = (gc_loh_compaction_mode)newLOHCompactionMode;
#endif //FEATURE_LOH_COMPACTION
}

bool GCHeap::RegisterForFullGCNotification(uint32_t gen2Percentage,
                                           uint32_t lohPercentage)
{
#ifdef MULTIPLE_HEAPS
    for (int hn = 0; hn < gc_heap::n_heaps; hn++)
    {
        gc_heap* hp = gc_heap::g_heaps [hn];
        hp->fgn_last_alloc = dd_new_allocation (hp->dynamic_data_of (0));
        hp->fgn_maxgen_percent = gen2Percentage;
    }
#else //MULTIPLE_HEAPS
    pGenGCHeap->fgn_last_alloc = dd_new_allocation (pGenGCHeap->dynamic_data_of (0));
    pGenGCHeap->fgn_maxgen_percent = gen2Percentage;
#endif //MULTIPLE_HEAPS

    pGenGCHeap->full_gc_approach_event.Reset();
    pGenGCHeap->full_gc_end_event.Reset();
    pGenGCHeap->full_gc_approach_event_set = false;

    pGenGCHeap->fgn_loh_percent = lohPercentage;

    return TRUE;
}

bool GCHeap::CancelFullGCNotification()
{
#ifdef MULTIPLE_HEAPS
    for (int hn = 0; hn < gc_heap::n_heaps; hn++)
    {
        gc_heap* hp = gc_heap::g_heaps [hn];
        hp->fgn_maxgen_percent = 0;
    }
#else //MULTIPLE_HEAPS
    pGenGCHeap->fgn_maxgen_percent = 0;
#endif //MULTIPLE_HEAPS

    pGenGCHeap->fgn_loh_percent = 0;
    pGenGCHeap->full_gc_approach_event.Set();
    pGenGCHeap->full_gc_end_event.Set();

    return TRUE;
}

int GCHeap::WaitForFullGCApproach(int millisecondsTimeout)
{
    dprintf (2, ("WFGA: Begin wait"));
    int result = gc_heap::full_gc_wait (&(pGenGCHeap->full_gc_approach_event), millisecondsTimeout);
    dprintf (2, ("WFGA: End wait"));
    return result;
}

int GCHeap::WaitForFullGCComplete(int millisecondsTimeout)
{
    dprintf (2, ("WFGE: Begin wait"));
    int result = gc_heap::full_gc_wait (&(pGenGCHeap->full_gc_end_event), millisecondsTimeout);
    dprintf (2, ("WFGE: End wait"));
    return result;
}

int GCHeap::StartNoGCRegion(uint64_t totalSize, bool lohSizeKnown, uint64_t lohSize, bool disallowFullBlockingGC)
{
    NoGCRegionLockHolder lh;

    dprintf (1, ("begin no gc called"));
    start_no_gc_region_status status = gc_heap::prepare_for_no_gc_region (totalSize, lohSizeKnown, lohSize, disallowFullBlockingGC);
    if (status == start_no_gc_success)
    {
        GarbageCollect (max_generation);
        status = gc_heap::get_start_no_gc_region_status();
    }

    if (status != start_no_gc_success)
        gc_heap::handle_failure_for_no_gc();

    return (int)status;
}

int GCHeap::EndNoGCRegion()
{
    NoGCRegionLockHolder lh;
    return (int)gc_heap::end_no_gc_region();
}

void GCHeap::PublishObject (uint8_t* Obj)
{
#ifdef BACKGROUND_GC
    gc_heap* hp = gc_heap::heap_of (Obj);
    hp->bgc_alloc_lock->uoh_alloc_done (Obj);
    hp->bgc_untrack_uoh_alloc();
#endif //BACKGROUND_GC
}

// Get the segment size to use, making sure it conforms.
size_t GCHeap::GetValidSegmentSize(bool large_seg)
{
#ifdef USE_REGIONS
    return (large_seg ? global_region_allocator.get_large_region_alignment() :
                        global_region_allocator.get_region_alignment());
#else
    return (large_seg ? gc_heap::min_uoh_segment_size : gc_heap::soh_segment_size);
#endif //USE_REGIONS
}

size_t gc_heap::get_gen0_min_size()
{
    size_t gen0size = static_cast<size_t>(GCConfig::GetGen0Size());
    bool is_config_invalid = ((gen0size == 0) || !g_theGCHeap->IsValidGen0MaxSize(gen0size));
    if (is_config_invalid)
    {
#ifdef SERVER_GC
        // performance data seems to indicate halving the size results
        // in optimal perf.  Ask for adjusted gen0 size.
        gen0size = max(GCToOSInterface::GetCacheSizePerLogicalCpu(FALSE), (size_t)(256*1024));

        // if gen0 size is too large given the available memory, reduce it.
        // Get true cache size, as we don't want to reduce below this.
        size_t trueSize = max(GCToOSInterface::GetCacheSizePerLogicalCpu(TRUE), (size_t)(256*1024));
        dprintf (1, ("cache: %zd-%zd",
            GCToOSInterface::GetCacheSizePerLogicalCpu(FALSE),
            GCToOSInterface::GetCacheSizePerLogicalCpu(TRUE)));

        int n_heaps = gc_heap::n_heaps;
#else //SERVER_GC
        size_t trueSize = GCToOSInterface::GetCacheSizePerLogicalCpu(TRUE);
        gen0size = max((4*trueSize/5),(size_t)(256*1024));
        trueSize = max(trueSize, (size_t)(256*1024));
        int n_heaps = 1;
#endif //SERVER_GC

        llc_size = trueSize;

#ifdef DYNAMIC_HEAP_COUNT
        if (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes)
        {
            // if we are asked to be stingy with memory, limit gen 0 size
            gen0size = min (gen0size, (size_t)(4*1024*1024));
        }
#endif //DYNAMIC_HEAP_COUNT

        dprintf (1, ("gen0size: %zd * %d = %zd, physical mem: %zd / 6 = %zd",
                gen0size, n_heaps, (gen0size * n_heaps),
                gc_heap::total_physical_mem,
                gc_heap::total_physical_mem / 6));

        // if the total min GC across heaps will exceed 1/6th of available memory,
        // then reduce the min GC size until it either fits or has been reduced to cache size.
        while ((gen0size * n_heaps) > (gc_heap::total_physical_mem / 6))
        {
            gen0size = gen0size / 2;
            if (gen0size <= trueSize)
            {
                gen0size = trueSize;
                break;
            }
        }
    }
#ifdef FEATURE_EVENT_TRACE
    else
    {
        gen0_min_budget_from_config = gen0size;
    }
#endif //FEATURE_EVENT_TRACE

    size_t seg_size = gc_heap::soh_segment_size;
    assert (seg_size);

    // Generation 0 must never be more than 1/2 the segment size.
    if (gen0size >= (seg_size / 2))
        gen0size = seg_size / 2;

    // If the value from config is valid we use it as is without this adjustment.
    if (is_config_invalid)
    {
        if (heap_hard_limit)
        {
            size_t gen0size_seg = seg_size / 8;
            if (gen0size >= gen0size_seg)
            {
                dprintf (1, ("gen0 limited by seg size %zd->%zd", gen0size, gen0size_seg));
                gen0size = gen0size_seg;
            }
        }

        gen0size = gen0size / 8 * 5;
    }

#ifdef STRESS_REGIONS
    // This is just so we can test allocation using more than one region on machines with very
    // small caches.
    gen0size = ((size_t)1 << min_segment_size_shr) * 3;
#endif //STRESS_REGIONS

    gen0size = Align (gen0size);

    return gen0size;
}

void GCHeap::SetReservedVMLimit (size_t vmlimit)
{
    gc_heap::reserved_memory_limit = vmlimit;
}

//versions of same method on each heap

#ifdef FEATURE_PREMORTEM_FINALIZATION

Object* GCHeap::GetNextFinalizableObject()
{

#ifdef MULTIPLE_HEAPS

    //return the first non critical one in the first queue.
    for (int hn = 0; hn < gc_heap::n_heaps; hn++)
    {
        gc_heap* hp = gc_heap::g_heaps [hn];
        Object* O = hp->finalize_queue->GetNextFinalizableObject(TRUE);
        if (O)
            return O;
    }
    //return the first non critical/critical one in the first queue.
    for (int hn = 0; hn < gc_heap::n_heaps; hn++)
    {
        gc_heap* hp = gc_heap::g_heaps [hn];
        Object* O = hp->finalize_queue->GetNextFinalizableObject(FALSE);
        if (O)
            return O;
    }
    return 0;


#else //MULTIPLE_HEAPS
    return pGenGCHeap->finalize_queue->GetNextFinalizableObject();
#endif //MULTIPLE_HEAPS

}

size_t GCHeap::GetNumberFinalizableObjects()
{
#ifdef MULTIPLE_HEAPS
    size_t cnt = 0;
    for (int hn = 0; hn < gc_heap::n_heaps; hn++)
    {
        gc_heap* hp = gc_heap::g_heaps [hn];
        cnt += hp->finalize_queue->GetNumberFinalizableObjects();
    }
    return cnt;


#else //MULTIPLE_HEAPS
    return pGenGCHeap->finalize_queue->GetNumberFinalizableObjects();
#endif //MULTIPLE_HEAPS
}

size_t GCHeap::GetFinalizablePromotedCount()
{
#ifdef MULTIPLE_HEAPS
    size_t cnt = 0;

    for (int hn = 0; hn < gc_heap::n_heaps; hn++)
    {
        gc_heap* hp = gc_heap::g_heaps [hn];
        cnt += hp->finalize_queue->GetPromotedCount();
    }
    return cnt;

#else //MULTIPLE_HEAPS
    return pGenGCHeap->finalize_queue->GetPromotedCount();
#endif //MULTIPLE_HEAPS
}

//---------------------------------------------------------------------------
// Finalized class tracking
//---------------------------------------------------------------------------

bool GCHeap::RegisterForFinalization (int gen, Object* obj)
{
    if (gen == -1)
        gen = 0;
    if (((((CObjectHeader*)obj)->GetHeader()->GetBits()) & BIT_SBLK_FINALIZER_RUN))
    {
        ((CObjectHeader*)obj)->GetHeader()->ClrBit(BIT_SBLK_FINALIZER_RUN);
        return true;
    }
    else
    {
        gc_heap* hp = gc_heap::heap_of ((uint8_t*)obj);
        return hp->finalize_queue->RegisterForFinalization (gen, obj);
    }
}

void GCHeap::SetFinalizationRun (Object* obj)
{
    ((CObjectHeader*)obj)->GetHeader()->SetBit(BIT_SBLK_FINALIZER_RUN);
}


//--------------------------------------------------------------------
//
//          Support for finalization
//
//--------------------------------------------------------------------

inline
unsigned int gen_segment (int gen)
{
    assert (((signed)total_generation_count - gen - 1)>=0);
    return (total_generation_count - gen - 1);
}

bool CFinalize::Initialize()
{
    CONTRACTL {
        NOTHROW;
        GC_NOTRIGGER;
    } CONTRACTL_END;

    const int INITIAL_FINALIZER_ARRAY_SIZE = 100;
    m_Array = new (nothrow)(Object*[INITIAL_FINALIZER_ARRAY_SIZE]);

    if (!m_Array)
    {
        ASSERT (m_Array);
        STRESS_LOG_OOM_STACK(sizeof(Object*[INITIAL_FINALIZER_ARRAY_SIZE]));
        if (GCConfig::GetBreakOnOOM())
        {
            GCToOSInterface::DebugBreak();
        }
        return false;
    }
    m_EndArray = &m_Array[INITIAL_FINALIZER_ARRAY_SIZE];

    for (int i =0; i < FreeList; i++)
    {
        SegQueueLimit (i) = m_Array;
    }
    m_PromotedCount = 0;
    lock = -1;
#ifdef _DEBUG
    lockowner_threadid.Clear();
#endif // _DEBUG

    return true;
}

CFinalize::~CFinalize()
{
    delete[] m_Array;
}

size_t CFinalize::GetPromotedCount ()
{
    return m_PromotedCount;
}

// An explanation of locking for finalization:
//
// Multiple threads allocate objects.  During the allocation, they are serialized by
// the AllocLock above.  But they release that lock before they register the object
// for finalization.  That's because there is much contention for the alloc lock, but
// finalization is presumed to be a rare case.
//
// So registering an object for finalization must be protected by the FinalizeLock.
//
// There is another logical queue that involves finalization.  When objects registered
// for finalization become unreachable, they are moved from the "registered" queue to
// the "unreachable" queue.  Note that this only happens inside a GC, so no other
// threads can be manipulating either queue at that time.  Once the GC is over and
// threads are resumed, the Finalizer thread will dequeue objects from the "unreachable"
// queue and call their finalizers.  This dequeue operation is also protected with
// the finalize lock.
//
// At first, this seems unnecessary.  Only one thread is ever enqueuing or dequeuing
// on the unreachable queue (either the GC thread during a GC or the finalizer thread
// when a GC is not in progress).  The reason we share a lock with threads enqueuing
// on the "registered" queue is that the "registered" and "unreachable" queues are
// interrelated.
//
// They are actually two regions of a longer list, which can only grow at one end.
// So to enqueue an object to the "registered" list, you actually rotate an unreachable
// object at the boundary between the logical queues, out to the other end of the
// unreachable queue -- where all growing takes place.  Then you move the boundary
// pointer so that the gap we created at the boundary is now on the "registered"
// side rather than the "unreachable" side.  Now the object can be placed into the
// "registered" side at that point.  This is much more efficient than doing moves
// of arbitrarily long regions, but it causes the two queues to require a shared lock.
//
// Notice that Enter/LeaveFinalizeLock is not a GC-aware spin lock.  Instead, it relies
// on the fact that the lock will only be taken for a brief period and that it will
// never provoke or allow a GC while the lock is held.  This is critical.  If the
// FinalizeLock used enter_spin_lock (and thus sometimes enters preemptive mode to
// allow a GC), then the Alloc client would have to GC protect a finalizable object
// to protect against that eventuality.  That is too slow!
inline
void CFinalize::EnterFinalizeLock()
{
    _ASSERTE(dbgOnly_IsSpecialEEThread() ||
             GCToEEInterface::GetThread() == 0 ||
             GCToEEInterface::IsPreemptiveGCDisabled());

retry:
    if (Interlocked::CompareExchange(&lock, 0, -1) >= 0)
    {
        unsigned int i = 0;
        while (lock >= 0)
        {
            if (g_num_processors > 1)
            {
                int spin_count = 128 * yp_spin_count_unit;
                for (int j = 0; j < spin_count; j++)
                {
                    if (lock < 0)
                        break;
                    // give the HT neighbor a chance to run
                    YieldProcessor ();
                }
            }
            if (lock < 0)
                break;
            if (++i & 7)
                GCToOSInterface::YieldThread (0);
            else
                GCToOSInterface::Sleep (5);
        }
        goto retry;
    }

#ifdef _DEBUG
    lockowner_threadid.SetToCurrentThread();
#endif // _DEBUG
}

inline
void CFinalize::LeaveFinalizeLock()
{
    _ASSERTE(dbgOnly_IsSpecialEEThread() ||
             GCToEEInterface::GetThread() == 0 ||
             GCToEEInterface::IsPreemptiveGCDisabled());

#ifdef _DEBUG
    lockowner_threadid.Clear();
#endif // _DEBUG
    lock = -1;
}

bool
CFinalize::RegisterForFinalization (int gen, Object* obj, size_t size)
{
    CONTRACTL {
        NOTHROW;
        GC_NOTRIGGER;
    } CONTRACTL_END;

    EnterFinalizeLock();

    // Adjust gen
    unsigned int dest = gen_segment (gen);

    // Adjust boundary for segments so that GC will keep objects alive.
    Object*** s_i = &SegQueue (FreeListSeg);
    if ((*s_i) == SegQueueLimit(FreeListSeg))
    {
        if (!GrowArray())
        {
            LeaveFinalizeLock();
            if (method_table(obj) == NULL)
            {
                // If the object is uninitialized, a valid size should have been passed.
                assert (size >= Align (min_obj_size));
                dprintf (3, (ThreadStressLog::gcMakeUnusedArrayMsg(), (size_t)obj, (size_t)(obj+size)));
                ((CObjectHeader*)obj)->SetFree(size);
            }
            STRESS_LOG_OOM_STACK(0);
            if (GCConfig::GetBreakOnOOM())
            {
                GCToOSInterface::DebugBreak();
            }
            return false;
        }
    }
    Object*** end_si = &SegQueueLimit (dest);
    do
    {
        //is the segment empty?
        if (!(*s_i == *(s_i-1)))
        {
            //no, move the first element of the segment to the (new) last location in the segment
            *(*s_i) = *(*(s_i-1));
        }
        //increment the fill pointer
        (*s_i)++;
        //go to the next segment.
        s_i--;
    } while (s_i > end_si);

    // We have reached the destination segment
    // store the object
    **s_i = obj;
    // increment the fill pointer
    (*s_i)++;

    LeaveFinalizeLock();

    return true;
}

Object*
CFinalize::GetNextFinalizableObject (BOOL only_non_critical)
{
    Object* obj = 0;
    EnterFinalizeLock();

    if (!IsSegEmpty(FinalizerListSeg))
    {
        obj =  *(--SegQueueLimit (FinalizerListSeg));
    }
    else if (!only_non_critical && !IsSegEmpty(CriticalFinalizerListSeg))
    {
        //the FinalizerList is empty, we can adjust both
        // limit instead of moving the object to the free list
        obj =  *(--SegQueueLimit (CriticalFinalizerListSeg));
        --SegQueueLimit (FinalizerListSeg);
    }
    if (obj)
    {
        dprintf (3, ("running finalizer for %p (mt: %p)", obj, method_table (obj)));
    }
    LeaveFinalizeLock();
    return obj;
}

size_t
CFinalize::GetNumberFinalizableObjects()
{
    return SegQueueLimit(FinalizerMaxSeg) - SegQueue(FinalizerStartSeg);
}

void
CFinalize::MoveItem (Object** fromIndex,
                     unsigned int fromSeg,
                     unsigned int toSeg)
{

    int step;
    ASSERT (fromSeg != toSeg);
    if (fromSeg > toSeg)
        step = -1;
    else
        step = +1;
    // Each iteration places the element at the boundary closest to dest
    // and then adjusts the boundary to move that element one segment closer
    // to dest.
    Object** srcIndex = fromIndex;
    for (unsigned int i = fromSeg; i != toSeg; i+= step)
    {
        // Select SegQueue[i] for step==-1, SegQueueLimit[i] for step==1
        Object**& destFill = m_FillPointers[i+(step - 1 )/2];
        // Select SegQueue[i] for step==-1, SegQueueLimit[i]-1 for step==1
        //   (SegQueueLimit[i]-1 is the last entry in segment i)
        Object** destIndex = destFill - (step + 1)/2;
        if (srcIndex != destIndex)
        {
            Object* tmp = *srcIndex;
            *srcIndex = *destIndex;
            *destIndex = tmp;
        }
        destFill -= step;
        srcIndex = destIndex;
    }
}

void
CFinalize::GcScanRoots (promote_func* fn, int hn, ScanContext *pSC)
{
    ScanContext sc;
    if (pSC == 0)
        pSC = &sc;

    pSC->thread_number = hn;

    //scan the finalization queue
    Object** startIndex  = SegQueue (FinalizerStartSeg);
    Object** stopIndex  = SegQueueLimit (FinalizerMaxSeg);

    for (Object** po = startIndex; po < stopIndex; po++)
    {
        Object* o = *po;
        //dprintf (3, ("scan freacheable %zx", (size_t)o));
        dprintf (3, ("scan f %zx", (size_t)o));

        (*fn)(po, pSC, 0);
    }
}

void CFinalize::WalkFReachableObjects (fq_walk_fn fn)
{
    Object** startIndex = SegQueue (FinalizerListSeg);
    Object** stopIndex = SegQueueLimit (FinalizerListSeg);
    for (Object** po = startIndex; po < stopIndex; po++)
    {
        bool isCriticalFinalizer = false;
        fn(isCriticalFinalizer, *po);
    }

    startIndex = SegQueue (CriticalFinalizerListSeg);
    stopIndex = SegQueueLimit (CriticalFinalizerListSeg);
    for (Object** po = startIndex; po < stopIndex; po++)
    {
        bool isCriticalFinalizer = true;
        fn(isCriticalFinalizer, *po);
    }
}

BOOL
CFinalize::ScanForFinalization (promote_func* pfn, int gen, gc_heap* hp)
{
    ScanContext sc;
    sc.promotion = TRUE;
#ifdef MULTIPLE_HEAPS
    sc.thread_number = hp->heap_number;
    sc.thread_count = gc_heap::n_heaps;
#else
    UNREFERENCED_PARAMETER(hp);
    sc.thread_count = 1;
#endif //MULTIPLE_HEAPS

    BOOL finalizedFound = FALSE;

    //start with gen and explore all the younger generations.
    unsigned int startSeg = gen_segment (gen);
    {
        m_PromotedCount = 0;
        for (unsigned int Seg = startSeg; Seg <= gen_segment(0); Seg++)
        {
            Object** endIndex = SegQueue (Seg);
            for (Object** i = SegQueueLimit (Seg)-1; i >= endIndex ;i--)
            {
                CObjectHeader* obj = (CObjectHeader*)*i;
                dprintf (3, ("scanning: %zx", (size_t)obj));
                if (!g_theGCHeap->IsPromoted (obj))
                {
                    dprintf (3, ("freacheable: %zx", (size_t)obj));

                    assert (method_table(obj)->HasFinalizer());

                    if (GCToEEInterface::EagerFinalized(obj))
                    {
                        MoveItem (i, Seg, FreeListSeg);
                    }
                    else if ((obj->GetHeader()->GetBits()) & BIT_SBLK_FINALIZER_RUN)
                    {
                        //remove the object because we don't want to
                        //run the finalizer
                        MoveItem (i, Seg, FreeListSeg);

                        //Reset the bit so it will be put back on the queue
                        //if resurrected and re-registered.
                        obj->GetHeader()->ClrBit (BIT_SBLK_FINALIZER_RUN);

                    }
                    else
                    {
                        m_PromotedCount++;

                        if (method_table(obj)->HasCriticalFinalizer())
                        {
                            MoveItem (i, Seg, CriticalFinalizerListSeg);
                        }
                        else
                        {
                            MoveItem (i, Seg, FinalizerListSeg);
                        }
                    }
                }
#ifdef BACKGROUND_GC
                else
                {
                    if ((gen == max_generation) && (gc_heap::background_running_p()))
                    {
                        // TODO - fix the following line.
                        //assert (gc_heap::background_object_marked ((uint8_t*)obj, FALSE));
                        dprintf (3, ("%zx is marked", (size_t)obj));
                    }
                }
#endif //BACKGROUND_GC
            }
        }
    }
    finalizedFound = !IsSegEmpty(FinalizerListSeg) ||
                     !IsSegEmpty(CriticalFinalizerListSeg);

    if (finalizedFound)
    {
        //Promote the f-reachable objects
        GcScanRoots (pfn,
#ifdef MULTIPLE_HEAPS
                     hp->heap_number
#else
                     0
#endif //MULTIPLE_HEAPS
                     , 0);

        hp->settings.found_finalizers = TRUE;

#ifdef BACKGROUND_GC
        if (hp->settings.concurrent)
        {
            hp->settings.found_finalizers = !(IsSegEmpty(FinalizerListSeg) && IsSegEmpty(CriticalFinalizerListSeg));
        }
#endif //BACKGROUND_GC
        if (hp->settings.concurrent && hp->settings.found_finalizers)
        {
            GCToEEInterface::EnableFinalization(true);
        }
    }

    return finalizedFound;
}

//Relocates all of the objects in the finalization array
void
CFinalize::RelocateFinalizationData (int gen, gc_heap* hp)
{
    ScanContext sc;
    sc.promotion = FALSE;
#ifdef MULTIPLE_HEAPS
    sc.thread_number = hp->heap_number;
    sc.thread_count = gc_heap::n_heaps;
#else
    UNREFERENCED_PARAMETER(hp);
    sc.thread_count = 1;
#endif //MULTIPLE_HEAPS

    unsigned int Seg = gen_segment (gen);

    Object** startIndex = SegQueue (Seg);

    dprintf (3, ("RelocateFinalizationData gen=%d, [%p,%p[", gen, startIndex, SegQueue (FreeList)));

    for (Object** po = startIndex; po < SegQueue (FreeList);po++)
    {
        GCHeap::Relocate (po, &sc);
    }
}

void
CFinalize::UpdatePromotedGenerations (int gen, BOOL gen_0_empty_p)
{
    dprintf(3, ("UpdatePromotedGenerations gen=%d, gen_0_empty_p=%d", gen, gen_0_empty_p));

    // update the generation fill pointers.
    // if gen_0_empty is FALSE, test each object to find out if
    // it was promoted or not
    if (gen_0_empty_p)
    {
        for (int i = min (gen+1, (int)max_generation); i > 0; i--)
        {
            m_FillPointers [gen_segment(i)] = m_FillPointers [gen_segment(i-1)];
        }
    }
    else
    {
        //Look for demoted or promoted objects
        for (int i = gen; i >= 0; i--)
        {
            unsigned int Seg = gen_segment (i);
            Object** startIndex = SegQueue (Seg);

            for (Object** po = startIndex;
                 po < SegQueueLimit (gen_segment(i)); po++)
            {
                int new_gen = g_theGCHeap->WhichGeneration (*po);
                if (new_gen != i)
                {
                    // We never promote objects to a non-GC heap
                    assert (new_gen <= max_generation);

                    dprintf (3, ("Moving object %p->%p from gen %d to gen %d", po, *po, i, new_gen));

                    if (new_gen > i)
                    {
                        //promotion
                        MoveItem (po, gen_segment (i), gen_segment (new_gen));
                    }
                    else
                    {
                        //demotion
                        MoveItem (po, gen_segment (i), gen_segment (new_gen));
                        //back down in order to see all objects.
                        po--;
                    }
                }
            }
        }
    }
}

BOOL
CFinalize::GrowArray()
{
    size_t oldArraySize = (m_EndArray - m_Array);
    size_t newArraySize =  (size_t)(((float)oldArraySize / 10) * 12);

    Object** newArray = new (nothrow) Object*[newArraySize];
    if (!newArray)
    {
        return FALSE;
    }
    memcpy (newArray, m_Array, oldArraySize*sizeof(Object*));

    dprintf (3, ("Grow finalizer array [%p,%p[ -> [%p,%p[", m_Array, m_EndArray, newArray, &m_Array[newArraySize]));

    //adjust the fill pointers
    for (int i = 0; i < FreeList; i++)
    {
        m_FillPointers [i] += (newArray - m_Array);
    }
    delete[] m_Array;
    m_Array = newArray;
    m_EndArray = &m_Array [newArraySize];

    return TRUE;
}

// merge finalization data from another queue into this one
// return false in case of failure - in this case, move no items
bool CFinalize::MergeFinalizationData (CFinalize* other_fq)
{
    // compute how much space we will need for the merged data
    size_t otherNeededArraySize = other_fq->UsedCount();
    if (otherNeededArraySize == 0)
    {
        // the other queue is empty - nothing to do!
        return true;
    }
    size_t thisArraySize = (m_EndArray - m_Array);
    size_t thisNeededArraySize = UsedCount();
    size_t neededArraySize = thisNeededArraySize + otherNeededArraySize;

    Object ** newArray = m_Array;

    // check if the space we have is sufficient
    if (thisArraySize < neededArraySize)
    {
        // if not allocate new array
        newArray = new (nothrow) Object*[neededArraySize];

        // if unsuccessful, return false without changing anything
        if (!newArray)
        {
            dprintf (3, ("ran out of space merging finalization data"));
            return false;
        }
    }

    // Since the target might be the original array (with the original data),
    // the order of copying must not overwrite any data until it has been
    // copied.

    // copy the finalization data from this and the other finalize queue
    for (int i = FreeList - 1; i >= 0; i--)
    {
        size_t thisIndex = SegQueue (i) - m_Array;
        size_t otherIndex = other_fq->SegQueue (i) - other_fq->m_Array;
        size_t thisLimit = SegQueueLimit (i) - m_Array;
        size_t otherLimit = other_fq->SegQueueLimit (i) - other_fq->m_Array;
        size_t thisSize = thisLimit - thisIndex;
        size_t otherSize = otherLimit - otherIndex;

        memmove (&newArray[thisIndex + otherIndex],           &m_Array[thisIndex ], sizeof(newArray[0])*thisSize );
        memmove (&newArray[thisLimit + otherIndex], &other_fq->m_Array[otherIndex], sizeof(newArray[0])*otherSize);
    }

    // adjust the m_FillPointers to reflect the sum of both queues on this queue,
    // and reflect that the other queue is now empty
    for (int i = FreeList - 1; i >= 0; i--)
    {
        size_t thisLimit = SegQueueLimit (i) - m_Array;
        size_t otherLimit = other_fq->SegQueueLimit (i) - other_fq->m_Array;

        SegQueueLimit (i) = &newArray[thisLimit + otherLimit];

        other_fq->SegQueueLimit (i) = other_fq->m_Array;
    }
    if (m_Array != newArray)
    {
        delete[] m_Array;
        m_Array = newArray;
        m_EndArray = &m_Array [neededArraySize];
    }
    return true;
}

// split finalization data from this queue with another queue
// return false in case of failure - in this case, move no items
bool CFinalize::SplitFinalizationData (CFinalize* other_fq)
{
    // the other finalization queue is assumed to be empty at this point
    size_t otherCurrentArraySize = other_fq->UsedCount();
    assert (otherCurrentArraySize == 0);

    size_t thisCurrentArraySize = UsedCount();
    if (thisCurrentArraySize == 0)
    {
        // this queue is empty - nothing to split!
        return true;
    }

    size_t otherNeededArraySize = thisCurrentArraySize / 2;

    // do we have a big enough array allocated on the other queue to move the intended size?
    size_t otherArraySize = other_fq->m_EndArray - other_fq->m_Array;
    if (otherArraySize < otherNeededArraySize)
    {
        // if not, allocate new array
        Object ** newArray = new (nothrow) Object*[otherNeededArraySize];
        if (!newArray)
        {
            // if unsuccessful, return false without changing anything
            return false;
        }
        delete[] other_fq->m_Array;
        other_fq->m_Array = newArray;
        other_fq->m_EndArray = &other_fq->m_Array[otherNeededArraySize];
    }

    // move half of the items in each section over to the other queue
    PTR_PTR_Object newFillPointers[MaxSeg];
    PTR_PTR_Object segQueue = m_Array;
    for (int i = 0; i < FreeList; i++)
    {
        size_t thisIndex = SegQueue (i) - m_Array;
        size_t thisLimit = SegQueueLimit (i) - m_Array;
        size_t thisSize = thisLimit - thisIndex;

        // we move half to the other queue
        size_t otherSize = thisSize / 2;
        size_t otherIndex = other_fq->SegQueue (i) - other_fq->m_Array;
        size_t thisNewSize = thisSize - otherSize;

        memmove (&other_fq->m_Array[otherIndex], &m_Array[thisIndex + thisNewSize], sizeof(other_fq->m_Array[0])*otherSize);
        other_fq->SegQueueLimit (i) = &other_fq->m_Array[otherIndex + otherSize];

        // slide the unmoved half to its new position in the queue
        // (this will delete the moved half once copies and m_FillPointers updates are completed)
        memmove (segQueue, &m_Array[thisIndex], sizeof(m_Array[0])*thisNewSize);
        segQueue += thisNewSize;
        newFillPointers[i] = segQueue;
    }

    // finally update the fill pointers from the new copy we generated
    for (int i = 0; i < MaxSeg; i++)
    {
        m_FillPointers[i] = newFillPointers[i];
    }

    return true;
}

#ifdef VERIFY_HEAP
void CFinalize::CheckFinalizerObjects()
{
    for (int i = 0; i <= max_generation; i++)
    {
        Object **startIndex = SegQueue (gen_segment (i));
        Object **stopIndex  = SegQueueLimit (gen_segment (i));

        for (Object **po = startIndex; po < stopIndex; po++)
        {
            if ((int)g_theGCHeap->WhichGeneration (*po) < i)
                FATAL_GC_ERROR ();
            ((CObjectHeader*)*po)->Validate();
        }
    }
}
#endif //VERIFY_HEAP

#endif // FEATURE_PREMORTEM_FINALIZATION


//------------------------------------------------------------------------------
//
//                      End of VM specific support
//
//------------------------------------------------------------------------------
void gc_heap::walk_heap_per_heap (walk_fn fn, void* context, int gen_number, BOOL walk_large_object_heap_p)
{
    generation* gen = gc_heap::generation_of (gen_number);
    heap_segment*    seg = generation_start_segment (gen);
    uint8_t* x = ((gen_number == max_generation) ? heap_segment_mem (seg) : get_soh_start_object (seg, gen));
    uint8_t*       end = heap_segment_allocated (seg);
    int align_const = get_alignment_constant (TRUE);
    BOOL walk_pinned_object_heap = walk_large_object_heap_p;

    while (1)
    {
        if (x >= end)
        {
            if ((seg = heap_segment_next (seg)) != 0)
            {
                x = heap_segment_mem (seg);
                end = heap_segment_allocated (seg);
                continue;
            }
#ifdef USE_REGIONS
            else if (gen_number > 0)
            {
                // advance to next lower generation
                gen_number--;
                gen = gc_heap::generation_of (gen_number);
                seg = generation_start_segment (gen);

                x = heap_segment_mem (seg);
                end = heap_segment_allocated (seg);
                continue;
            }
#endif // USE_REGIONS
            else
            {
                if (walk_large_object_heap_p)
                {
                    walk_large_object_heap_p = FALSE;
                    seg = generation_start_segment (large_object_generation);
                }
                else if (walk_pinned_object_heap)
                {
                    walk_pinned_object_heap = FALSE;
                    seg = generation_start_segment (pinned_object_generation);
                }
                else
                {
                    break;
                }

                align_const = get_alignment_constant (FALSE);
                x = heap_segment_mem (seg);
                end = heap_segment_allocated (seg);
                continue;
            }
        }

        size_t s = size (x);
        CObjectHeader* o = (CObjectHeader*)x;

        if (!o->IsFree())

        {
            _ASSERTE(((size_t)o & 0x3) == 0); // Last two bits should never be set at this point

            if (!fn (o->GetObjectBase(), context))
                return;
        }
        x = x + Align (s, align_const);
    }
}

void gc_heap::walk_finalize_queue (fq_walk_fn fn)
{
#ifdef FEATURE_PREMORTEM_FINALIZATION
    finalize_queue->WalkFReachableObjects (fn);
#endif //FEATURE_PREMORTEM_FINALIZATION
}

void gc_heap::walk_heap (walk_fn fn, void* context, int gen_number, BOOL walk_large_object_heap_p)
{
#ifdef MULTIPLE_HEAPS
    for (int hn = 0; hn < gc_heap::n_heaps; hn++)
    {
        gc_heap* hp = gc_heap::g_heaps [hn];

        hp->walk_heap_per_heap (fn, context, gen_number, walk_large_object_heap_p);
    }
#else
    walk_heap_per_heap(fn, context, gen_number, walk_large_object_heap_p);
#endif //MULTIPLE_HEAPS
}

void GCHeap::DiagWalkObject (Object* obj, walk_fn fn, void* context)
{
    uint8_t* o = (uint8_t*)obj;
    if (o)
    {
        go_through_object_cl (method_table (o), o, size(o), oo,
                                    {
                                        if (*oo)
                                        {
                                            Object *oh = (Object*)*oo;
                                            if (!fn (oh, context))
                                                return;
                                        }
                                    }
            );
    }
}

void GCHeap::DiagWalkObject2 (Object* obj, walk_fn2 fn, void* context)
{
    uint8_t* o = (uint8_t*)obj;
    if (o)
    {
        go_through_object_cl (method_table (o), o, size(o), oo,
                                    {
                                        if (*oo)
                                        {
                                            if (!fn (obj, oo, context))
                                                return;
                                        }
                                    }
            );
    }
}

void GCHeap::DiagWalkSurvivorsWithType (void* gc_context, record_surv_fn fn, void* diag_context, walk_surv_type type, int gen_number)
{
    gc_heap* hp = (gc_heap*)gc_context;

    if (type == walk_for_uoh)
    {
        hp->walk_survivors_for_uoh (diag_context, fn, gen_number);
    }
    else
    {
        hp->walk_survivors (fn, diag_context, type);
    }
}

void GCHeap::DiagWalkHeap (walk_fn fn, void* context, int gen_number, bool walk_large_object_heap_p)
{
    gc_heap::walk_heap (fn, context, gen_number, walk_large_object_heap_p);
}

// Walking the GC Heap requires that the EE is suspended and all heap allocation contexts are fixed.
// DiagWalkHeap is invoked only during a GC, where both requirements are met.
// So DiagWalkHeapWithACHandling facilitates a GC Heap walk outside of a GC by handling allocation contexts logic,
// and it leaves the responsibility of suspending and resuming EE to the callers.
void GCHeap::DiagWalkHeapWithACHandling (walk_fn fn, void* context, int gen_number, bool walk_large_object_heap_p)
{
#ifdef MULTIPLE_HEAPS
    for (int hn = 0; hn < gc_heap::n_heaps; hn++)
    {
        gc_heap* hp = gc_heap::g_heaps [hn];
#else
    {
        gc_heap* hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS
        hp->fix_allocation_contexts (FALSE);
    }

    DiagWalkHeap (fn, context, gen_number, walk_large_object_heap_p);


#ifdef MULTIPLE_HEAPS
    for (int hn = 0; hn < gc_heap::n_heaps; hn++)
    {
        gc_heap* hp = gc_heap::g_heaps [hn];
#else
    {
        gc_heap* hp = pGenGCHeap;
#endif //MULTIPLE_HEAPS
        hp->repair_allocation_contexts (TRUE);
    }
}

void GCHeap::DiagWalkFinalizeQueue (void* gc_context, fq_walk_fn fn)
{
    gc_heap* hp = (gc_heap*)gc_context;
    hp->walk_finalize_queue (fn);
}

void GCHeap::DiagScanFinalizeQueue (fq_scan_fn fn, ScanContext* sc)
{
#ifdef MULTIPLE_HEAPS
    for (int hn = 0; hn < gc_heap::n_heaps; hn++)
    {
        gc_heap* hp = gc_heap::g_heaps [hn];
        hp->finalize_queue->GcScanRoots(fn, hn, sc);
    }
#else
        pGenGCHeap->finalize_queue->GcScanRoots(fn, 0, sc);
#endif //MULTIPLE_HEAPS
}

void GCHeap::DiagScanHandles (handle_scan_fn fn, int gen_number, ScanContext* context)
{
    GCScan::GcScanHandlesForProfilerAndETW (gen_number, context, fn);
}

void GCHeap::DiagScanDependentHandles (handle_scan_fn fn, int gen_number, ScanContext* context)
{
    GCScan::GcScanDependentHandlesForProfilerAndETW (gen_number, context, fn);
}

size_t GCHeap::GetLOHThreshold()
{
    return loh_size_threshold;
}

void GCHeap::DiagGetGCSettings(EtwGCSettingsInfo* etw_settings)
{
#ifdef FEATURE_EVENT_TRACE
    etw_settings->heap_hard_limit = gc_heap::heap_hard_limit;
    etw_settings->loh_threshold = loh_size_threshold;
    etw_settings->physical_memory_from_config = gc_heap::physical_memory_from_config;
    etw_settings->gen0_min_budget_from_config = gc_heap::gen0_min_budget_from_config;
    etw_settings->gen0_max_budget_from_config = gc_heap::gen0_max_budget_from_config;
    etw_settings->high_mem_percent_from_config = gc_heap::high_mem_percent_from_config;
#ifdef BACKGROUND_GC
    etw_settings->concurrent_gc_p = gc_heap::gc_can_use_concurrent;
#else
    etw_settings->concurrent_gc_p = false;
#endif //BACKGROUND_GC
    etw_settings->use_large_pages_p = gc_heap::use_large_pages_p;
    etw_settings->use_frozen_segments_p = gc_heap::use_frozen_segments_p;
    etw_settings->hard_limit_config_p = gc_heap::hard_limit_config_p;
    etw_settings->no_affinitize_p =
#ifdef MULTIPLE_HEAPS
        gc_heap::gc_thread_no_affinitize_p;
#else
        true;
#endif //MULTIPLE_HEAPS
#endif //FEATURE_EVENT_TRACE
}

void GCHeap::NullBridgeObjectsWeakRefs(size_t length, void* unreachableObjectHandles)
{
#ifdef FEATURE_JAVAMARSHAL
    Ref_NullBridgeObjectsWeakRefs(length, unreachableObjectHandles);
#else
    assert(false);
#endif
}

#if defined(WRITE_BARRIER_CHECK) && !defined (SERVER_GC)
// This code is designed to catch the failure to update the write barrier
// The way it works is to copy the whole heap right after every GC.  The write
// barrier code has been modified so that it updates the shadow as well as the
// real GC heap.  Before doing the next GC, we walk the heap, looking for pointers
// that were updated in the real heap, but not the shadow.  A mismatch indicates
// an error.  The offending code can be found by breaking after the correct GC,
// and then placing a data breakpoint on the Heap location that was updated without
// going through the write barrier.

// Called at process shutdown
void deleteGCShadow()
{
    if (g_GCShadow != 0)
        GCToOSInterface::VirtualRelease (g_GCShadow, g_GCShadowEnd - g_GCShadow);
    g_GCShadow = 0;
    g_GCShadowEnd = 0;
}

// Called at startup and right after a GC, get a snapshot of the GC Heap
void initGCShadow()
{
    if (!(GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_BARRIERCHECK))
        return;

    uint8_t* highest = nullptr;

#ifdef USE_REGIONS
    highest = global_region_allocator.get_left_used_unsafe();
#else
    highest = g_gc_highest_address;
#endif

    size_t len = g_gc_highest_address - g_gc_lowest_address;
    size_t commit_len = highest - g_gc_lowest_address;
    if (len > (size_t)(g_GCShadowEnd - g_GCShadow))
    {
        deleteGCShadow();
        g_GCShadowEnd = g_GCShadow = (uint8_t *)GCToOSInterface::VirtualReserve (len, 0, VirtualReserveFlags::None);
        if (g_GCShadow == NULL || !GCToOSInterface::VirtualCommit (g_GCShadow, commit_len))
        {
            _ASSERTE(!"Not enough memory to run HeapVerify level 2");
            // If after the assert we decide to allow the program to continue
            // running we need to be in a state that will not trigger any
            // additional AVs while we fail to allocate a shadow segment, i.e.
            // ensure calls to updateGCShadow() checkGCWriteBarrier() don't AV
            deleteGCShadow();
            return;
        }

        g_GCShadowEnd += commit_len;
    }

    // save the value of g_gc_lowest_address at this time.  If this value changes before
    // the next call to checkGCWriteBarrier() it means we extended the heap (with a
    // large object segment most probably), and the whole shadow segment is inconsistent.
    g_shadow_lowest_address = g_gc_lowest_address;

    //****** Copy the whole GC heap ******
    //
    // NOTE: This is the one situation where the combination of heap_segment_rw(gen_start_segment())
    // can produce a NULL result.  This is because the initialization has not completed.
    //
    for (int i = get_start_generation_index(); i < total_generation_count; i++)
    {
        generation* gen = gc_heap::generation_of (i);
        heap_segment* seg = heap_segment_rw (generation_start_segment (gen));

        ptrdiff_t delta = g_GCShadow - g_gc_lowest_address;
        while (seg)
        {
            // Copy the segment
            uint8_t* start = heap_segment_mem (seg);
            uint8_t* end = heap_segment_allocated (seg);
            memcpy (start + delta, start, end - start);
            seg = heap_segment_next_rw (seg);
        }
    }
}

#define INVALIDGCVALUE (void*)((size_t)0xcccccccd)

// test to see if 'ptr' was only updated via the write barrier.
inline void testGCShadow(Object** ptr)
{
    Object** shadow = (Object**) &g_GCShadow[((uint8_t*) ptr - g_gc_lowest_address)];
    if (*ptr != 0 && (uint8_t*) shadow < g_GCShadowEnd && *ptr != *shadow)
    {
        // If you get this assertion, someone updated a GC pointer in the heap without
        // using the write barrier.  To find out who, check the value of
        // dd_collection_count (dynamic_data_of (0)). Also
        // note the value of 'ptr'.  Rerun the App that the previous GC just occurred.
        // Then put a data breakpoint for the value of 'ptr'  Then check every write
        // to pointer between the two GCs.  The last one is not using the write barrier.

        // If the memory of interest does not exist at system startup,
        // you need to set the data breakpoint right after the memory gets committed
        // Set a breakpoint at the end of grow_heap_segment, and put the value of 'ptr'
        // in the memory window.  run until the memory gets mapped. Then you can set
        // your breakpoint

        // Note a recent change, we've identified race conditions when updating the gc shadow.
        // Throughout the runtime, code will update an address in the gc heap, then erect the
        // write barrier, which calls updateGCShadow. With an app that pounds one heap location
        // from multiple threads, you can hit this assert even though all involved are using the
        // write barrier properly. Thusly, we detect the race and set this location to INVALIDGCVALUE.
        // TODO: the code in jithelp.asm doesn't call updateGCShadow, and hasn't been
        // TODO: fixed to detect the race. We've only seen this race from VolatileWritePtr,
        // TODO: so elect not to fix jithelp.asm at this time. It should be done if we start hitting
        // TODO: erroneous asserts in here.
        if(*shadow!=INVALIDGCVALUE)
        {
#ifdef FEATURE_BASICFREEZE
            // Write barriers for stores of references to frozen objects may be optimized away.
            if (!g_theGCHeap->IsInFrozenSegment (*ptr))
#endif // FEATURE_BASICFREEZE
            {
                _ASSERTE(!"Pointer updated without using write barrier");
            }
        }
        /*
        else
        {
             printf("saw a INVALIDGCVALUE. (just to let you know)\n");
        }
        */
    }
}

void testGCShadowHelper (uint8_t* x)
{
    size_t s = size (x);
    if (contain_pointers (x))
    {
        go_through_object_nostart (method_table(x), x, s, oo,
                           { testGCShadow((Object**) oo); });
    }
}

// Walk the whole heap, looking for pointers that were not updated with the write barrier.
void checkGCWriteBarrier()
{
    // g_shadow_lowest_address != g_gc_lowest_address means the GC heap was extended by a segment
    // and the GC shadow segment did not track that change!
    if (g_GCShadowEnd <= g_GCShadow || g_shadow_lowest_address != g_gc_lowest_address)
    {
        // No shadow heap, nothing to check.
        return;
    }

    for (int i = get_start_generation_index(); i < total_generation_count; i++)
    {
        int alignment = get_alignment_constant(i <= max_generation);
        {
            generation* gen = gc_heap::generation_of (i);
            heap_segment* seg = heap_segment_rw (generation_start_segment (gen));

            _ASSERTE(seg != NULL);

            while(seg)
            {
                uint8_t* x = heap_segment_mem (seg);
                while (x < heap_segment_allocated (seg))
                {
                    size_t s = size (x);
                    testGCShadowHelper (x);
                    x = x + Align (s, alignment);
                }
                seg = heap_segment_next_rw (seg);
            }
        }
    }
}
#endif //WRITE_BARRIER_CHECK && !SERVER_GC

#ifdef FEATURE_BASICFREEZE
void gc_heap::walk_read_only_segment(heap_segment *seg, void *pvContext, object_callback_func pfnMethodTable, object_callback_func pfnObjRef)
{
    uint8_t *o = heap_segment_mem(seg);

    int alignment = get_alignment_constant(TRUE);

    while (o < heap_segment_allocated(seg))
    {
        pfnMethodTable(pvContext, o);

        if (contain_pointers (o))
        {
            go_through_object_nostart (method_table (o), o, size(o), oo,
                   {
                       if (*oo)
                           pfnObjRef(pvContext, oo);
                   }
            );
        }

        o += Align(size(o), alignment);
    }
}
#endif // FEATURE_BASICFREEZE

HRESULT GCHeap::WaitUntilConcurrentGCCompleteAsync(int millisecondsTimeout)
{
#ifdef BACKGROUND_GC
    if (gc_heap::background_running_p())
    {
        uint32_t dwRet = pGenGCHeap->background_gc_wait(awr_ignored, millisecondsTimeout);
        if (dwRet == WAIT_OBJECT_0)
            return S_OK;
        else if (dwRet == WAIT_TIMEOUT)
            return HRESULT_FROM_WIN32(ERROR_TIMEOUT);
        else
            return E_FAIL;      // It is not clear if what the last error would be if the wait failed,
                                // as there are too many layers in between. The best we can do is to return E_FAIL;
    }
#endif

    return S_OK;
}

void GCHeap::TemporaryEnableConcurrentGC()
{
#ifdef BACKGROUND_GC
    gc_heap::temp_disable_concurrent_p = false;
#endif //BACKGROUND_GC
}

void GCHeap::TemporaryDisableConcurrentGC()
{
#ifdef BACKGROUND_GC
    gc_heap::temp_disable_concurrent_p = true;
#endif //BACKGROUND_GC
}

bool GCHeap::IsConcurrentGCEnabled()
{
#ifdef BACKGROUND_GC
    return (gc_heap::gc_can_use_concurrent && !(gc_heap::temp_disable_concurrent_p));
#else
    return FALSE;
#endif //BACKGROUND_GC
}

#ifdef GC_DESCRIPTOR
extern "C"
{
    struct ContractDescriptor;
    extern ContractDescriptor GCContractDescriptorWKS;
#if FEATURE_SVR_GC
    extern ContractDescriptor GCContractDescriptorSVR;
#endif // FEATURE_SVR_GC
}
#endif // GC_DESCRIPTOR

void PopulateDacVars(GcDacVars *gcDacVars)
{
    bool v2 = gcDacVars->minor_version_number >= 2;
    bool v4 = gcDacVars->minor_version_number >= 4;
    bool v6 = gcDacVars->minor_version_number >= 6;

#define DEFINE_FIELD(field_name, field_type) offsetof(CLASS_NAME, field_name),
#define DEFINE_DPTR_FIELD(field_name, field_type) offsetof(CLASS_NAME, field_name),
#define DEFINE_ARRAY_FIELD(field_name, field_type, array_length) offsetof(CLASS_NAME, field_name),
#define DEFINE_MISSING_FIELD(field_name) -1,

#ifdef MULTIPLE_HEAPS
    static int gc_heap_field_offsets[] = {
#define CLASS_NAME gc_heap
#include "dac_gcheap_fields.h"
#undef CLASS_NAME
    };
#endif //MULTIPLE_HEAPS
    static int generation_field_offsets[] = {

#define CLASS_NAME generation
#include "dac_generation_fields.h"
#undef CLASS_NAME

#undef DEFINE_MISSING_FIELD
#undef DEFINE_ARRAY_FIELD
#undef DEFINE_DPTR_FIELD
#undef DEFINE_FIELD
    };

    assert(gcDacVars != nullptr);
    // Note: These version numbers do not need to be checked in the .Net dac/SOS because
    // we always match the compiled dac and GC to the version used.  NativeAOT's SOS may
    // work differently than .Net SOS.  When making breaking changes here you may need to
    // find NativeAOT's equivalent of SOS_BREAKING_CHANGE_VERSION and increment it.
    gcDacVars->major_version_number = 2;
    gcDacVars->minor_version_number = 4;
    if (v2)
    {
        gcDacVars->total_bookkeeping_elements = total_bookkeeping_elements;
        gcDacVars->card_table_info_size = sizeof(card_table_info);
    }

    g_build_variant = 0;
#ifdef USE_REGIONS
    g_build_variant |= build_variant_use_region;
    if (v2)
    {
        gcDacVars->count_free_region_kinds = count_free_region_kinds;
        gcDacVars->global_regions_to_decommit = reinterpret_cast<dac_region_free_list**>(&gc_heap::global_regions_to_decommit);
        gcDacVars->global_free_huge_regions = reinterpret_cast<dac_region_free_list**>(&gc_heap::global_free_huge_regions);
    }
#endif //USE_REGIONS
#ifdef BACKGROUND_GC
    g_build_variant |= build_variant_background_gc;
#endif //BACKGROUND_GC
#ifdef DYNAMIC_HEAP_COUNT
    g_build_variant |= build_variant_dynamic_heap_count;
#endif //DYNAMIC_HEAP_COUNT
    gcDacVars->built_with_svr = &g_built_with_svr_gc;
    gcDacVars->build_variant = &g_build_variant;
    gcDacVars->gc_structures_invalid_cnt = const_cast<int32_t*>(&GCScan::m_GcStructuresInvalidCnt);
    gcDacVars->generation_size = sizeof(generation);
    gcDacVars->total_generation_count = total_generation_count;
    gcDacVars->max_gen = &g_max_generation;
#ifdef BACKGROUND_GC
    gcDacVars->current_c_gc_state = const_cast<c_gc_state*>(&gc_heap::current_c_gc_state);
#else //BACKGROUND_GC
    gcDacVars->current_c_gc_state = 0;
#endif //BACKGROUND_GC
#ifndef MULTIPLE_HEAPS
    gcDacVars->ephemeral_heap_segment = reinterpret_cast<dac_heap_segment**>(&gc_heap::ephemeral_heap_segment);
#ifdef USE_REGIONS
    if (v2)
    {
        gcDacVars->free_regions = reinterpret_cast<dac_region_free_list**>(&gc_heap::free_regions);
    }
#endif
#ifdef BACKGROUND_GC
    gcDacVars->mark_array = &gc_heap::mark_array;
    gcDacVars->background_saved_lowest_address = &gc_heap::background_saved_lowest_address;
    gcDacVars->background_saved_highest_address = &gc_heap::background_saved_highest_address;
    if (v2)
    {
        gcDacVars->freeable_soh_segment = reinterpret_cast<dac_heap_segment**>(&gc_heap::freeable_soh_segment);
        gcDacVars->freeable_uoh_segment = reinterpret_cast<dac_heap_segment**>(&gc_heap::freeable_uoh_segment);
    }
    gcDacVars->next_sweep_obj = &gc_heap::next_sweep_obj;
#ifdef USE_REGIONS
    gcDacVars->saved_sweep_ephemeral_seg = 0;
    gcDacVars->saved_sweep_ephemeral_start = 0;
#else
    gcDacVars->saved_sweep_ephemeral_seg = reinterpret_cast<dac_heap_segment**>(&gc_heap::saved_sweep_ephemeral_seg);
    gcDacVars->saved_sweep_ephemeral_start = &gc_heap::saved_sweep_ephemeral_start;
#endif //USE_REGIONS
#else //BACKGROUND_GC
    gcDacVars->mark_array = 0;
    gcDacVars->background_saved_lowest_address = 0;
    gcDacVars->background_saved_highest_address = 0;
    if (v2)
    {
        gcDacVars->freeable_soh_segment = 0;
        gcDacVars->freeable_uoh_segment = 0;
    }
    gcDacVars->next_sweep_obj = 0;
    gcDacVars->saved_sweep_ephemeral_seg = 0;
    gcDacVars->saved_sweep_ephemeral_start = 0;
#endif //BACKGROUND_GC
    gcDacVars->alloc_allocated = &gc_heap::alloc_allocated;
    gcDacVars->oom_info = &gc_heap::oom_info;
    gcDacVars->finalize_queue = reinterpret_cast<dac_finalize_queue**>(&gc_heap::finalize_queue);
    gcDacVars->generation_table = reinterpret_cast<unused_generation**>(&gc_heap::generation_table);
#ifdef GC_CONFIG_DRIVEN
    gcDacVars->gc_global_mechanisms = reinterpret_cast<size_t**>(&gc_global_mechanisms);
    gcDacVars->interesting_data_per_heap = reinterpret_cast<size_t**>(&gc_heap::interesting_data_per_heap);
    gcDacVars->compact_reasons_per_heap = reinterpret_cast<size_t**>(&gc_heap::compact_reasons_per_heap);
    gcDacVars->expand_mechanisms_per_heap = reinterpret_cast<size_t**>(&gc_heap::expand_mechanisms_per_heap);
    gcDacVars->interesting_mechanism_bits_per_heap = reinterpret_cast<size_t**>(&gc_heap::interesting_mechanism_bits_per_heap);
#endif // GC_CONFIG_DRIVEN
#ifdef HEAP_ANALYZE
    gcDacVars->internal_root_array = &gc_heap::internal_root_array;
    gcDacVars->internal_root_array_index = &gc_heap::internal_root_array_index;
    gcDacVars->heap_analyze_success = &gc_heap::heap_analyze_success;
#endif // HEAP_ANALYZE
#else
    gcDacVars->n_heaps = &gc_heap::n_heaps;
    gcDacVars->g_heaps = reinterpret_cast<unused_gc_heap***>(&gc_heap::g_heaps);
    gcDacVars->gc_heap_field_offsets = reinterpret_cast<int**>(&gc_heap_field_offsets);
#endif // MULTIPLE_HEAPS
    gcDacVars->generation_field_offsets = reinterpret_cast<int**>(&generation_field_offsets);
    if (v2)
    {
        gcDacVars->bookkeeping_start = &gc_heap::bookkeeping_start;
    }
    if (v4)
    {
#ifdef DYNAMIC_HEAP_COUNT
        gcDacVars->dynamic_adaptation_mode = &gc_heap::dynamic_adaptation_mode;
#else
        gcDacVars->dynamic_adaptation_mode = nullptr;
#endif //DYNAMIC_HEAP_COUNT
    }
    if (v6)
    {
#ifdef GC_DESCRIPTOR
#ifdef MULTIPLE_HEAPS
        gcDacVars->gc_descriptor = (void*)&GCContractDescriptorSVR;
#else // MULTIPLE_HEAPS
        gcDacVars->gc_descriptor = (void*)&GCContractDescriptorWKS;
#endif // MULTIPLE_HEAPS
#endif // GC_DESCRIPTOR
    }
}

int GCHeap::RefreshMemoryLimit()
{
    return gc_heap::refresh_memory_limit();
}

bool gc_heap::compute_hard_limit_from_heap_limits()
{
#ifndef HOST_64BIT
    // need to consider overflows:
    if (! ((heap_hard_limit_oh[soh] < max_heap_hard_limit && heap_hard_limit_oh[loh] <= max_heap_hard_limit / 2 && heap_hard_limit_oh[poh] <= max_heap_hard_limit / 2)
           || (heap_hard_limit_oh[soh] <= max_heap_hard_limit / 2 && heap_hard_limit_oh[loh] < max_heap_hard_limit && heap_hard_limit_oh[poh] <= max_heap_hard_limit / 2)
           || (heap_hard_limit_oh[soh] <= max_heap_hard_limit / 2 && heap_hard_limit_oh[loh] <= max_heap_hard_limit / 2 && heap_hard_limit_oh[poh] < max_heap_hard_limit)))
    {
        return false;
    }
#endif //!HOST_64BIT

    heap_hard_limit = heap_hard_limit_oh[soh] + heap_hard_limit_oh[loh] + heap_hard_limit_oh[poh];
    return true;
}

// On 32bit we have next guarantees for limits:
// 1) heap-specific limits:
//   0 <= (heap_hard_limit = heap_hard_limit_oh[soh] + heap_hard_limit_oh[loh] + heap_hard_limit_oh[poh]) < 4Gb
//   a) 0 <= heap_hard_limit_oh[soh] < 2Gb, 0 <= heap_hard_limit_oh[loh] <= 1Gb, 0 <= heap_hard_limit_oh[poh] <= 1Gb
//   b) 0 <= heap_hard_limit_oh[soh] <= 1Gb, 0 <= heap_hard_limit_oh[loh] < 2Gb, 0 <= heap_hard_limit_oh[poh] <= 1Gb
//   c) 0 <= heap_hard_limit_oh[soh] <= 1Gb, 0 <= heap_hard_limit_oh[loh] <= 1Gb, 0 <= heap_hard_limit_oh[poh] < 2Gb
// 2) same limit for all heaps:
//   0 <= heap_hard_limit <= 1Gb
//
// These ranges guarantee that calculation of soh_segment_size, loh_segment_size and poh_segment_size with alignment and round up won't overflow,
// as well as calculation of sum of them (overflow to 0 is allowed, because allocation with 0 size will fail later).
bool gc_heap::compute_hard_limit()
{
    heap_hard_limit_oh[soh] = 0;

    heap_hard_limit = (size_t)GCConfig::GetGCHeapHardLimit();
    heap_hard_limit_oh[soh] = (size_t)GCConfig::GetGCHeapHardLimitSOH();
    heap_hard_limit_oh[loh] = (size_t)GCConfig::GetGCHeapHardLimitLOH();
    heap_hard_limit_oh[poh] = (size_t)GCConfig::GetGCHeapHardLimitPOH();

#ifdef HOST_64BIT
    use_large_pages_p = GCConfig::GetGCLargePages();
#endif //HOST_64BIT

    if (heap_hard_limit_oh[soh] || heap_hard_limit_oh[loh] || heap_hard_limit_oh[poh])
    {
        if (!heap_hard_limit_oh[soh])
        {
            return false;
        }
        if (!heap_hard_limit_oh[loh])
        {
            return false;
        }
        if (!compute_hard_limit_from_heap_limits())
        {
            return false;
        }
    }
    else
    {
        uint32_t percent_of_mem_soh = (uint32_t)GCConfig::GetGCHeapHardLimitSOHPercent();
        uint32_t percent_of_mem_loh = (uint32_t)GCConfig::GetGCHeapHardLimitLOHPercent();
        uint32_t percent_of_mem_poh = (uint32_t)GCConfig::GetGCHeapHardLimitPOHPercent();
        if (percent_of_mem_soh || percent_of_mem_loh || percent_of_mem_poh)
        {
            if ((percent_of_mem_soh <= 0) || (percent_of_mem_soh >= 100))
            {
                return false;
            }
            if ((percent_of_mem_loh <= 0) || (percent_of_mem_loh >= 100))
            {
                return false;
            }
            else if ((percent_of_mem_poh < 0) || (percent_of_mem_poh >= 100))
            {
                return false;
            }
            if ((percent_of_mem_soh + percent_of_mem_loh + percent_of_mem_poh) >= 100)
            {
                return false;
            }
            heap_hard_limit_oh[soh] = (size_t)(total_physical_mem * (uint64_t)percent_of_mem_soh / (uint64_t)100);
            heap_hard_limit_oh[loh] = (size_t)(total_physical_mem * (uint64_t)percent_of_mem_loh / (uint64_t)100);
            heap_hard_limit_oh[poh] = (size_t)(total_physical_mem * (uint64_t)percent_of_mem_poh / (uint64_t)100);

            if (!compute_hard_limit_from_heap_limits())
            {
                return false;
            }
        }
#ifndef HOST_64BIT
        else
        {
            // need to consider overflows
            if (heap_hard_limit > max_heap_hard_limit / 2)
            {
                return false;
            }
        }
#endif //!HOST_64BIT
    }

    if (heap_hard_limit_oh[soh] && (!heap_hard_limit_oh[poh]) && (!use_large_pages_p))
    {
        return false;
    }

    if (!(heap_hard_limit))
    {
        uint32_t percent_of_mem = (uint32_t)GCConfig::GetGCHeapHardLimitPercent();
        if ((percent_of_mem > 0) && (percent_of_mem < 100))
        {
            heap_hard_limit = (size_t)(total_physical_mem * (uint64_t)percent_of_mem / (uint64_t)100);

#ifndef HOST_64BIT
            // need to consider overflows
            if (heap_hard_limit > max_heap_hard_limit / 2)
            {
                return false;
            }
#endif //!HOST_64BIT
        }
    }

    return true;
}

bool gc_heap::compute_memory_settings(bool is_initialization, uint32_t& nhp, uint32_t nhp_from_config, size_t& seg_size_from_config, size_t new_current_total_committed)
{
#ifdef HOST_64BIT
    // If the hard limit is specified, the user is saying even if the process is already
    // running in a container, use this limit for the GC heap.
    if (!hard_limit_config_p)
    {
        if (is_restricted_physical_mem)
        {
            uint64_t physical_mem_for_gc = total_physical_mem * (uint64_t)75 / (uint64_t)100;
#ifndef USE_REGIONS
            // Establishing a heap_hard_limit when we don't already have one requires
            // us to figure out how many bytes are committed for what purposes. This is going
            // to be very tedious for segments and therefore we chose not to support this scenario.
            if (is_initialization)
#endif //USE_REGIONS
            {
                heap_hard_limit = (size_t)max ((uint64_t)(20 * 1024 * 1024), physical_mem_for_gc);
            }
        }
    }
#endif //HOST_64BIT

    if (heap_hard_limit && (heap_hard_limit < new_current_total_committed))
    {
        return false;
    }

#ifdef USE_REGIONS
    {
#else
    // Changing segment size in the hard limit case for segments is not supported
    if (is_initialization)
    {
#endif //USE_REGIONS
        if (heap_hard_limit)
        {
            if (is_initialization && (!nhp_from_config))
            {
                nhp = adjust_heaps_hard_limit (nhp);
            }

            seg_size_from_config = (size_t)GCConfig::GetSegmentSize();
            if (seg_size_from_config)
            {
                seg_size_from_config = use_large_pages_p ? align_on_segment_hard_limit (seg_size_from_config) :
#ifdef HOST_64BIT
                    round_up_power2 (seg_size_from_config);
#else //HOST_64BIT
                    round_down_power2 (seg_size_from_config);
                seg_size_from_config = min (seg_size_from_config, max_heap_hard_limit / 2);
#endif //HOST_64BIT
            }

            // On 32bit we have next guarantees:
            //   0 <= seg_size_from_config <= 1Gb (from max_heap_hard_limit/2)
            // a) heap-specific limits:
            //   0 <= (heap_hard_limit = heap_hard_limit_oh[soh] + heap_hard_limit_oh[loh] + heap_hard_limit_oh[poh]) < 4Gb (from gc_heap::compute_hard_limit_from_heap_limits)
            //   0 <= heap_hard_limit_oh[soh] <= 1Gb or < 2Gb
            //   0 <= soh_segment_size <= 1Gb or <= 2Gb (alignment and round up)
            // b) same limit for all heaps:
            //   0 <= heap_hard_limit <= 1Gb
            //   0 <= soh_segment_size <= 1Gb
            size_t limit_to_check = (heap_hard_limit_oh[soh] ? heap_hard_limit_oh[soh] : heap_hard_limit);
            soh_segment_size = max (adjust_segment_size_hard_limit (limit_to_check, nhp), seg_size_from_config);
        }
        else
        {
            soh_segment_size = get_valid_segment_size();
        }
    }

    mem_one_percent = total_physical_mem / 100;
#ifndef MULTIPLE_HEAPS
    mem_one_percent /= g_num_processors;
#endif //!MULTIPLE_HEAPS

    uint32_t highmem_th_from_config = (uint32_t)GCConfig::GetGCHighMemPercent();
    if (highmem_th_from_config)
    {
        high_memory_load_th = min (99u, highmem_th_from_config);
        v_high_memory_load_th = min (99u, (high_memory_load_th + 7));
#ifdef FEATURE_EVENT_TRACE
        high_mem_percent_from_config = highmem_th_from_config;
#endif //FEATURE_EVENT_TRACE
    }
    else
    {
        // We should only use this if we are in the "many process" mode which really is only applicable
        // to very powerful machines - before that's implemented, temporarily I am only enabling this for 80GB+ memory.
        // For now I am using an estimate to calculate these numbers but this should really be obtained
        // programmatically going forward.
        // I am assuming 47 processes using WKS GC and 3 using SVR GC.
        // I am assuming 3 in part due to the "very high memory load" is 97%.
        int available_mem_th = 10;
        if (total_physical_mem >= ((uint64_t)80 * 1024 * 1024 * 1024))
        {
            int adjusted_available_mem_th = 3 + (int)((float)47 / (float)g_num_processors);
            available_mem_th = min (available_mem_th, adjusted_available_mem_th);
        }

        high_memory_load_th = 100 - available_mem_th;
        v_high_memory_load_th = 97;
    }

    m_high_memory_load_th = min ((high_memory_load_th + 5), v_high_memory_load_th);
    almost_high_memory_load_th = (high_memory_load_th > 5) ? (high_memory_load_th - 5) : 1; // avoid underflow of high_memory_load_th - 5

    GCConfig::SetGCHighMemPercent (high_memory_load_th);

    return true;
}

size_t gc_heap::compute_committed_bytes_per_heap(int oh, size_t& committed_bookkeeping)
{
#ifdef USE_REGIONS
    int start_generation = (oh == 0) ? 0 : oh + max_generation;
#else
    int start_generation = oh + max_generation;
#endif
    int end_generation = oh + max_generation;

    size_t total_committed_per_heap = 0;
    for (int gen = start_generation; gen <= end_generation; gen++)
    {
        accumulate_committed_bytes (generation_start_segment (generation_of (gen)), total_committed_per_heap, committed_bookkeeping);
    }

#ifdef BACKGROUND_GC
    if (oh == soh)
    {
        accumulate_committed_bytes (freeable_soh_segment, total_committed_per_heap, committed_bookkeeping);
    }
    else
#endif //BACKGROUND_GC
    {
        accumulate_committed_bytes (freeable_uoh_segment, total_committed_per_heap, committed_bookkeeping, (gc_oh_num)oh);
    }

    return total_committed_per_heap;
}

void gc_heap::compute_committed_bytes(size_t& total_committed, size_t& committed_decommit, size_t& committed_free,
                                      size_t& committed_bookkeeping, size_t& new_current_total_committed, size_t& new_current_total_committed_bookkeeping,
                                      size_t* new_committed_by_oh)
{
    // Accounting for the bytes committed for the regions
    for (int oh = soh; oh < total_oh_count; oh++)
    {
        size_t total_committed_per_oh = 0;
#ifdef MULTIPLE_HEAPS
        for (int h = 0; h < n_heaps; h++)
        {
            gc_heap* heap = g_heaps[h];
#else
        {
            gc_heap* heap = pGenGCHeap;
#endif //MULTIPLE_HEAPS
            size_t total_committed_per_heap = heap->compute_committed_bytes_per_heap (oh, committed_bookkeeping);
#if defined(MULTIPLE_HEAPS) && defined(_DEBUG)
            heap->committed_by_oh_per_heap_refresh[oh] = total_committed_per_heap;
#endif // MULTIPLE_HEAPS && _DEBUG
            total_committed_per_oh += total_committed_per_heap;
        }
        new_committed_by_oh[oh] = total_committed_per_oh;
        total_committed += total_committed_per_oh;
    }

#ifdef USE_REGIONS
    // Accounting for the bytes committed for the free lists
    size_t committed_old_free = 0;
    committed_free = 0;
#ifdef MULTIPLE_HEAPS
    for (int h = 0; h < n_heaps; h++)
    {
        gc_heap* heap = g_heaps[h];
#else
    {
        gc_heap* heap = pGenGCHeap;
#endif //MULTIPLE_HEAPS
        for (int i = 0; i < count_free_region_kinds; i++)
        {
            heap_segment* seg = heap->free_regions[i].get_first_free_region();
            heap->accumulate_committed_bytes (seg, committed_free, committed_bookkeeping);
        }
    }
    committed_old_free += committed_free;
    committed_decommit = 0;
    for (int i = 0; i < count_free_region_kinds; i++)
    {
        heap_segment* seg = global_regions_to_decommit[i].get_first_free_region();
#ifdef MULTIPLE_HEAPS
        gc_heap* heap = g_heaps[0];
#else
        gc_heap* heap = nullptr;
#endif //MULTIPLE_HEAPS
        heap->accumulate_committed_bytes (seg, committed_decommit, committed_bookkeeping);
    }
    committed_old_free += committed_decommit;
    {
        heap_segment* seg = global_free_huge_regions.get_first_free_region();
#ifdef MULTIPLE_HEAPS
        gc_heap* heap = g_heaps[0];
#else
        gc_heap* heap = pGenGCHeap;
#endif //MULTIPLE_HEAPS
        heap->accumulate_committed_bytes (seg, committed_old_free, committed_bookkeeping);
    }

    new_committed_by_oh[recorded_committed_free_bucket] = committed_old_free;
    total_committed += committed_old_free;

    // Accounting for the bytes committed for the book keeping elements
    uint8_t* commit_begins[total_bookkeeping_elements];
    size_t commit_sizes[total_bookkeeping_elements];
    size_t new_sizes[total_bookkeeping_elements];
    bool get_card_table_commit_layout_result = get_card_table_commit_layout(g_gc_lowest_address, bookkeeping_covered_committed, commit_begins, commit_sizes, new_sizes);
    assert (get_card_table_commit_layout_result);

    for (int i = card_table_element; i <= seg_mapping_table_element; i++)
    {
        // In case background GC is disabled - the software write watch table is still there
        // but with size 0
        assert (commit_sizes[i] >= 0);
        committed_bookkeeping += commit_sizes[i];
    }

    new_current_total_committed_bookkeeping = committed_bookkeeping;
    new_committed_by_oh[recorded_committed_bookkeeping_bucket] = committed_bookkeeping;
#else
    new_committed_by_oh[recorded_committed_ignored_bucket] = committed_free = 0;

    uint32_t* ct = &g_gc_card_table[card_word (gcard_of (g_gc_lowest_address))];
    while (ct)
    {
        uint8_t* lowest = card_table_lowest_address (ct);
        uint8_t* highest = card_table_highest_address (ct);
        get_card_table_element_layout(lowest, highest, card_table_element_layout);
        size_t result = card_table_element_layout[seg_mapping_table_element + 1];
        committed_bookkeeping += result;
        ct = card_table_next (ct);
    }
    // If we don't put the mark array committed in the ignored bucket, calculate the committed memory for mark array here
    new_committed_by_oh[recorded_committed_bookkeeping_bucket] = new_current_total_committed_bookkeeping = committed_bookkeeping;
#endif //USE_REGIONS
    total_committed += committed_bookkeeping;
    new_current_total_committed = total_committed;
}

int gc_heap::refresh_memory_limit()
{
    refresh_memory_limit_status status = refresh_success;

    if (GCConfig::GetGCTotalPhysicalMemory() != 0)
    {
        return (int)status;
    }

    GCToEEInterface::SuspendEE(SUSPEND_FOR_GC);

    uint32_t nhp_from_config = static_cast<uint32_t>(GCConfig::GetHeapCount());
#ifdef MULTIPLE_HEAPS
    uint32_t nhp = n_heaps;
#else
    uint32_t nhp = 1;
#endif //MULTIPLE_HEAPS
    size_t seg_size_from_config;

    bool old_is_restricted_physical_mem = is_restricted_physical_mem;
    uint64_t old_total_physical_mem = total_physical_mem;
    size_t old_heap_hard_limit = heap_hard_limit;
    size_t old_heap_hard_limit_soh = heap_hard_limit_oh[soh];
    size_t old_heap_hard_limit_loh = heap_hard_limit_oh[loh];
    size_t old_heap_hard_limit_poh = heap_hard_limit_oh[poh];
    bool old_hard_limit_config_p = hard_limit_config_p;

    total_physical_mem = GCToOSInterface::GetPhysicalMemoryLimit (&is_restricted_physical_mem);

    bool succeed = true;

#ifdef USE_REGIONS
    GCConfig::RefreshHeapHardLimitSettings();

    if (!compute_hard_limit())
    {
        succeed = false;
        status = refresh_hard_limit_invalid;
    }
    hard_limit_config_p = heap_hard_limit != 0;
#else
    size_t new_current_total_committed = 0;
#endif //USE_REGIONS

    if (succeed && !compute_memory_settings(false, nhp, nhp_from_config, seg_size_from_config, current_total_committed))
    {
        succeed = false;
        status = refresh_hard_limit_too_low;
    }

    if (!succeed)
    {
        is_restricted_physical_mem = old_is_restricted_physical_mem;
        total_physical_mem = old_total_physical_mem;
        heap_hard_limit = old_heap_hard_limit;
        heap_hard_limit_oh[soh] = old_heap_hard_limit_soh;
        heap_hard_limit_oh[loh] = old_heap_hard_limit_loh;
        heap_hard_limit_oh[poh] = old_heap_hard_limit_poh;
        hard_limit_config_p = old_hard_limit_config_p;
    }
#ifdef COMMITTED_BYTES_SHADOW
    else
    {
        decommit_lock.Enter();
        verify_committed_bytes ();
        decommit_lock.Leave();
    }
#endif //COMMITTED_BYTES_SHADOW

    GCToEEInterface::RestartEE(TRUE);

    return (int)status;
}

void gc_heap::accumulate_committed_bytes(heap_segment* seg, size_t& committed_bytes, size_t& mark_array_committed_bytes, gc_oh_num oh)
{
    seg = heap_segment_rw (seg);
    while (seg)
    {
        if ((oh == unknown) || (heap_segment_oh (seg) == oh))
        {
            uint8_t* start;
#ifdef USE_REGIONS
            mark_array_committed_bytes += get_mark_array_size (seg);
            start = get_region_start (seg);
#else
            start = (uint8_t*)seg;
#endif
            committed_bytes += (heap_segment_committed (seg) - start);
        }
        seg = heap_segment_next_rw (seg);
    }
}

#ifdef USE_REGIONS

size_t gc_heap::get_mark_array_size (heap_segment* seg)
{
#ifdef BACKGROUND_GC
    if (seg->flags & heap_segment_flags_ma_committed)
    {
        uint32_t* mark_array_addr = mark_array;
        uint8_t* begin = get_start_address (seg);
        uint8_t* end = heap_segment_reserved (seg);
        size_t beg_word = mark_word_of (begin);
        size_t end_word = mark_word_of (align_on_mark_word (end));
        uint8_t* commit_start = align_lower_page ((uint8_t*)&mark_array_addr[beg_word]);
        uint8_t* commit_end = align_on_page ((uint8_t*)&mark_array_addr[end_word]);
        return (size_t)(commit_end - commit_start);
    }
#endif //BACKGROUND_GC
    return 0;
}
#endif //USE_REGIONS

}