// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // // #Overview // // GC automatically manages memory allocated by managed code. // The design doc for GC can be found at Documentation/botr/garbage-collection.md // // This file includes both the code for GC and the allocator. The most common // case for a GC to be triggered is from the allocator code. See // code:#try_allocate_more_space where it calls GarbageCollectGeneration. // // Entry points for the allocator are GCHeap::Alloc* which are called by the // allocation helpers in gcscan.cpp // #include "gcpriv.h" #if defined(TARGET_AMD64) && defined(TARGET_WINDOWS) #define USE_VXSORT #else #define USE_INTROSORT #endif // We just needed a simple random number generator for testing. class gc_rand { public: static uint64_t x; static uint64_t get_rand() { x = (314159269*x+278281) & 0x7FFFFFFF; return x; } // obtain random number in the range 0 .. r-1 static uint64_t get_rand(uint64_t r) { // require r >= 0 uint64_t x = (uint64_t)((get_rand() * r) >> 31); return x; } }; uint64_t gc_rand::x = 0; #if defined(BACKGROUND_GC) && defined(FEATURE_EVENT_TRACE) BOOL bgc_heap_walk_for_etw_p = FALSE; #endif //BACKGROUND_GC && FEATURE_EVENT_TRACE #define MAX_PTR ((uint8_t*)(~(ptrdiff_t)0)) #define commit_min_th (16*OS_PAGE_SIZE) #define MIN_SOH_CROSS_GEN_REFS (400) #define MIN_LOH_CROSS_GEN_REFS (800) #ifdef SERVER_GC #define partial_size_th 100 #define num_partial_refs 64 #else //SERVER_GC #define partial_size_th 100 #define num_partial_refs 32 #endif //SERVER_GC #ifdef USE_REGIONS // If the pinned survived is 1+% of the region size, we don't demote. #define demotion_pinned_ratio_th (1) // If the survived / region_size is 90+%, we don't compact this region. #define sip_surv_ratio_th (90) // If the survived due to cards from old generations / region_size is 90+%, // we don't compact this region, also we immediately promote it to gen2. #define sip_old_card_surv_ratio_th (90) #else #define demotion_plug_len_th (6*1024*1024) #endif //USE_REGIONS #ifdef HOST_64BIT #define MARK_STACK_INITIAL_LENGTH 1024 #else #define MARK_STACK_INITIAL_LENGTH 128 #endif // HOST_64BIT #define LOH_PIN_QUEUE_LENGTH 100 #define LOH_PIN_DECAY 10 uint32_t yp_spin_count_unit = 0; size_t loh_size_threshold = LARGE_OBJECT_SIZE; #ifdef GC_CONFIG_DRIVEN int compact_ratio = 0; #endif //GC_CONFIG_DRIVEN // See comments in reset_memory. BOOL reset_mm_p = TRUE; #ifdef FEATURE_SVR_GC bool g_built_with_svr_gc = true; #else bool g_built_with_svr_gc = false; #endif // FEATURE_SVR_GC #if defined(BUILDENV_DEBUG) uint8_t g_build_variant = 0; #elif defined(BUILDENV_CHECKED) uint8_t g_build_variant = 1; #else uint8_t g_build_variant = 2; #endif //BUILDENV_DEBUG VOLATILE(int32_t) g_no_gc_lock = -1; #if defined (TRACE_GC) && !defined (DACCESS_COMPILE) const char * const allocation_state_str[] = { "start", "can_allocate", "cant_allocate", "retry_allocate", "try_fit", "try_fit_new_seg", "try_fit_after_cg", "try_fit_after_bgc", "try_free_full_seg_in_bgc", "try_free_after_bgc", "try_seg_end", "acquire_seg", "acquire_seg_after_cg", "acquire_seg_after_bgc", "check_and_wait_for_bgc", "trigger_full_compact_gc", "trigger_ephemeral_gc", "trigger_2nd_ephemeral_gc", "check_retry_seg" }; const char * const msl_take_state_str[] = { "get_large_seg", "bgc_loh_sweep", "wait_bgc", "block_gc", "clr_mem", "clr_large_mem", "t_eph_gc", "t_full_gc", "alloc_small", "alloc_large", "alloc_small_cant", "alloc_large_cant", "try_alloc", "try_budget" }; #endif //TRACE_GC && !DACCESS_COMPILE // Keep this in sync with the definition of gc_reason #if (defined(DT_LOG) || defined(TRACE_GC)) && !defined (DACCESS_COMPILE) static const char* const str_gc_reasons[] = { "alloc_soh", "induced", "lowmem", "empty", "alloc_loh", "oos_soh", "oos_loh", "induced_noforce", "gcstress", "induced_lowmem", "induced_compacting", "lowmemory_host", "pm_full_gc", "lowmemory_host_blocking" }; static const char* const str_gc_pause_modes[] = { "batch", "interactive", "low_latency", "sustained_low_latency", "no_gc" }; static const char* const str_root_kinds[] = { "Stack", "FinalizeQueue", "Handles", "OlderGen", "SizedRef", "Overflow", "DependentHandles", "NewFQ", "Steal", "BGC" }; #endif //DT_LOG || TRACE_GC inline BOOL is_induced (gc_reason reason) { return ((reason == reason_induced) || (reason == reason_induced_noforce) || (reason == reason_lowmemory) || (reason == reason_lowmemory_blocking) || (reason == reason_induced_compacting) || (reason == reason_lowmemory_host) || (reason == reason_lowmemory_host_blocking)); } inline BOOL is_induced_blocking (gc_reason reason) { return ((reason == reason_induced) || (reason == reason_lowmemory_blocking) || (reason == reason_induced_compacting) || (reason == reason_lowmemory_host_blocking)); } gc_oh_num gen_to_oh(int gen) { switch (gen) { case soh_gen0: return gc_oh_num::soh; case soh_gen1: return gc_oh_num::soh; case soh_gen2: return gc_oh_num::soh; case loh_generation: return gc_oh_num::loh; case poh_generation: return gc_oh_num::poh; default: return gc_oh_num::none; } } #ifndef DACCESS_COMPILE uint64_t qpf; double qpf_ms; double qpf_us; uint64_t GetHighPrecisionTimeStamp() { int64_t ts = GCToOSInterface::QueryPerformanceCounter(); return (uint64_t)((double)ts * qpf_us); } uint64_t RawGetHighPrecisionTimeStamp() { return (uint64_t)GCToOSInterface::QueryPerformanceCounter(); } #endif #ifdef BGC_SERVO_TUNING bool gc_heap::bgc_tuning::enable_fl_tuning = false; uint32_t gc_heap::bgc_tuning::memory_load_goal = 0; uint32_t gc_heap::bgc_tuning::memory_load_goal_slack = 0; uint64_t gc_heap::bgc_tuning::available_memory_goal = 0; bool gc_heap::bgc_tuning::panic_activated_p = false; double gc_heap::bgc_tuning::accu_error_panic = 0.0; double gc_heap::bgc_tuning::above_goal_kp = 0.0; double gc_heap::bgc_tuning::above_goal_ki = 0.0; bool gc_heap::bgc_tuning::enable_kd = false; bool gc_heap::bgc_tuning::enable_ki = false; bool gc_heap::bgc_tuning::enable_smooth = false; bool gc_heap::bgc_tuning::enable_tbh = false; bool gc_heap::bgc_tuning::enable_ff = false; bool gc_heap::bgc_tuning::enable_gradual_d = false; double gc_heap::bgc_tuning::above_goal_kd = 0.0; double gc_heap::bgc_tuning::above_goal_ff = 0.0; double gc_heap::bgc_tuning::num_gen1s_smooth_factor = 0.0; double gc_heap::bgc_tuning::ml_kp = 0.0; double gc_heap::bgc_tuning::ml_ki = 0.0; double gc_heap::bgc_tuning::accu_error = 0.0; bool gc_heap::bgc_tuning::fl_tuning_triggered = false; size_t gc_heap::bgc_tuning::num_bgcs_since_tuning_trigger = 0; bool gc_heap::bgc_tuning::next_bgc_p = false; size_t gc_heap::bgc_tuning::gen1_index_last_bgc_end; size_t gc_heap::bgc_tuning::gen1_index_last_bgc_start; size_t gc_heap::bgc_tuning::gen1_index_last_bgc_sweep; size_t gc_heap::bgc_tuning::actual_num_gen1s_to_trigger; gc_heap::bgc_tuning::tuning_calculation gc_heap::bgc_tuning::gen_calc[2]; gc_heap::bgc_tuning::tuning_stats gc_heap::bgc_tuning::gen_stats[2]; gc_heap::bgc_tuning::bgc_size_data gc_heap::bgc_tuning::current_bgc_end_data[2]; size_t gc_heap::bgc_tuning::last_stepping_bgc_count = 0; uint32_t gc_heap::bgc_tuning::last_stepping_mem_load = 0; uint32_t gc_heap::bgc_tuning::stepping_interval = 0; bool gc_heap::bgc_tuning::use_stepping_trigger_p = true; double gc_heap::bgc_tuning::gen2_ratio_correction = 0.0; double gc_heap::bgc_tuning::ratio_correction_step = 0.0; int gc_heap::saved_bgc_tuning_reason = -1; #endif //BGC_SERVO_TUNING inline size_t round_up_power2 (size_t size) { // Get the 0-based index of the most-significant bit in size-1. // If the call failed (because size-1 is zero), size must be 1, // so return 1 (because 1 rounds up to itself). DWORD highest_set_bit_index; if (0 == #ifdef HOST_64BIT BitScanReverse64( #else BitScanReverse( #endif &highest_set_bit_index, size - 1)) { return 1; } // The size == 0 case (which would have overflowed to SIZE_MAX when decremented) // is handled below by relying on the fact that highest_set_bit_index is the maximum value // (31 or 63, depending on sizeof(size_t)) and left-shifting a value >= 2 by that // number of bits shifts in zeros from the right, resulting in an output of zero. return static_cast(2) << highest_set_bit_index; } inline size_t round_down_power2 (size_t size) { // Get the 0-based index of the most-significant bit in size. // If the call failed, size must be zero so return zero. DWORD highest_set_bit_index; if (0 == #ifdef HOST_64BIT BitScanReverse64( #else BitScanReverse( #endif &highest_set_bit_index, size)) { return 0; } // Left-shift 1 by highest_set_bit_index to get back a value containing only // the most-significant set bit of size, i.e. size rounded down // to the next power-of-two value. return static_cast(1) << highest_set_bit_index; } // Get the 0-based index of the most-significant bit in the value. // Returns -1 if the input value is zero (i.e. has no set bits). inline int index_of_highest_set_bit (size_t value) { // Get the 0-based index of the most-significant bit in the value. // If the call failed (because value is zero), return -1. DWORD highest_set_bit_index; return (0 == #ifdef HOST_64BIT BitScanReverse64( #else BitScanReverse( #endif &highest_set_bit_index, value)) ? -1 : static_cast(highest_set_bit_index); } inline int relative_index_power2_plug (size_t power2) { int index = index_of_highest_set_bit (power2); assert (index <= MAX_INDEX_POWER2); return ((index < MIN_INDEX_POWER2) ? 0 : (index - MIN_INDEX_POWER2)); } inline int relative_index_power2_free_space (size_t power2) { int index = index_of_highest_set_bit (power2); assert (index <= MAX_INDEX_POWER2); return ((index < MIN_INDEX_POWER2) ? -1 : (index - MIN_INDEX_POWER2)); } #ifdef BACKGROUND_GC uint32_t bgc_alloc_spin_count = 140; uint32_t bgc_alloc_spin_count_loh = 16; uint32_t bgc_alloc_spin = 2; inline void c_write (uint32_t& place, uint32_t value) { Interlocked::Exchange (&place, value); } #ifndef DACCESS_COMPILE // If every heap's gen2 or gen3 size is less than this threshold we will do a blocking GC. const size_t bgc_min_per_heap = 4*1024*1024; int gc_heap::gchist_index = 0; gc_mechanisms_store gc_heap::gchist[max_history_count]; #ifndef MULTIPLE_HEAPS VOLATILE(bgc_state) gc_heap::current_bgc_state = bgc_not_in_process; int gc_heap::gchist_index_per_heap = 0; gc_heap::gc_history gc_heap::gchist_per_heap[max_history_count]; #endif //MULTIPLE_HEAPS void gc_heap::add_to_history_per_heap() { #if defined(GC_HISTORY) && defined(BACKGROUND_GC) gc_history* current_hist = &gchist_per_heap[gchist_index_per_heap]; current_hist->gc_index = settings.gc_index; current_hist->current_bgc_state = current_bgc_state; size_t elapsed = dd_gc_elapsed_time (dynamic_data_of (0)); current_hist->gc_time_ms = (uint32_t)(elapsed / 1000); current_hist->gc_efficiency = (elapsed ? (total_promoted_bytes / elapsed) : total_promoted_bytes); #ifndef USE_REGIONS current_hist->eph_low = generation_allocation_start (generation_of (max_generation - 1)); current_hist->gen0_start = generation_allocation_start (generation_of (0)); current_hist->eph_high = heap_segment_allocated (ephemeral_heap_segment); #endif //!USE_REGIONS #ifdef BACKGROUND_GC current_hist->bgc_lowest = background_saved_lowest_address; current_hist->bgc_highest = background_saved_highest_address; #endif //BACKGROUND_GC current_hist->fgc_lowest = lowest_address; current_hist->fgc_highest = highest_address; current_hist->g_lowest = g_gc_lowest_address; current_hist->g_highest = g_gc_highest_address; gchist_index_per_heap++; if (gchist_index_per_heap == max_history_count) { gchist_index_per_heap = 0; } #endif //GC_HISTORY && BACKGROUND_GC } void gc_heap::add_to_history() { #if defined(GC_HISTORY) && defined(BACKGROUND_GC) gc_mechanisms_store* current_settings = &gchist[gchist_index]; current_settings->store (&settings); gchist_index++; if (gchist_index == max_history_count) { gchist_index = 0; } #endif //GC_HISTORY && BACKGROUND_GC } #endif //DACCESS_COMPILE #endif //BACKGROUND_GC #if defined(TRACE_GC) && !defined(DACCESS_COMPILE) BOOL gc_log_on = TRUE; FILE* gc_log = NULL; size_t gc_log_file_size = 0; size_t gc_buffer_index = 0; size_t max_gc_buffers = 0; static CLRCriticalSection gc_log_lock; // we keep this much in a buffer and only flush when the buffer is full #define gc_log_buffer_size (1024*1024) uint8_t* gc_log_buffer = 0; size_t gc_log_buffer_offset = 0; void log_va_msg(const char *fmt, va_list args) { gc_log_lock.Enter(); const int BUFFERSIZE = 4096; static char rgchBuffer[BUFFERSIZE]; char * pBuffer = &rgchBuffer[0]; pBuffer[0] = '\n'; int buffer_start = 1; int pid_len = sprintf_s (&pBuffer[buffer_start], BUFFERSIZE - buffer_start, "[%5d]", (uint32_t)GCToOSInterface::GetCurrentThreadIdForLogging()); buffer_start += pid_len; memset(&pBuffer[buffer_start], '-', BUFFERSIZE - buffer_start); int msg_len = _vsnprintf_s (&pBuffer[buffer_start], BUFFERSIZE - buffer_start, _TRUNCATE, fmt, args); if (msg_len == -1) { msg_len = BUFFERSIZE - buffer_start; } msg_len += buffer_start; if ((gc_log_buffer_offset + msg_len) > (gc_log_buffer_size - 12)) { char index_str[8]; memset (index_str, '-', 8); sprintf_s (index_str, _countof(index_str), "%d", (int)gc_buffer_index); gc_log_buffer[gc_log_buffer_offset] = '\n'; memcpy (gc_log_buffer + (gc_log_buffer_offset + 1), index_str, 8); gc_buffer_index++; if (gc_buffer_index > max_gc_buffers) { fseek (gc_log, 0, SEEK_SET); gc_buffer_index = 0; } fwrite(gc_log_buffer, gc_log_buffer_size, 1, gc_log); fflush(gc_log); memset (gc_log_buffer, '*', gc_log_buffer_size); gc_log_buffer_offset = 0; } memcpy (gc_log_buffer + gc_log_buffer_offset, pBuffer, msg_len); gc_log_buffer_offset += msg_len; gc_log_lock.Leave(); } void GCLog (const char *fmt, ... ) { if (gc_log_on && (gc_log != NULL)) { va_list args; va_start(args, fmt); log_va_msg (fmt, args); va_end(args); } } #endif // TRACE_GC && !DACCESS_COMPILE #if defined(GC_CONFIG_DRIVEN) && !defined(DACCESS_COMPILE) BOOL gc_config_log_on = FALSE; FILE* gc_config_log = NULL; // we keep this much in a buffer and only flush when the buffer is full #define gc_config_log_buffer_size (1*1024) // TEMP uint8_t* gc_config_log_buffer = 0; size_t gc_config_log_buffer_offset = 0; // For config since we log so little we keep the whole history. Also it's only // ever logged by one thread so no need to synchronize. void log_va_msg_config(const char *fmt, va_list args) { const int BUFFERSIZE = 256; static char rgchBuffer[BUFFERSIZE]; char * pBuffer = &rgchBuffer[0]; pBuffer[0] = '\n'; int buffer_start = 1; int msg_len = _vsnprintf_s (&pBuffer[buffer_start], BUFFERSIZE - buffer_start, _TRUNCATE, fmt, args ); assert (msg_len != -1); msg_len += buffer_start; if ((gc_config_log_buffer_offset + msg_len) > gc_config_log_buffer_size) { fwrite(gc_config_log_buffer, gc_config_log_buffer_offset, 1, gc_config_log); fflush(gc_config_log); gc_config_log_buffer_offset = 0; } memcpy (gc_config_log_buffer + gc_config_log_buffer_offset, pBuffer, msg_len); gc_config_log_buffer_offset += msg_len; } void GCLogConfig (const char *fmt, ... ) { if (gc_config_log_on && (gc_config_log != NULL)) { va_list args; va_start( args, fmt ); log_va_msg_config (fmt, args); } } #endif // GC_CONFIG_DRIVEN && !DACCESS_COMPILE void GCHeap::Shutdown() { #if defined(TRACE_GC) && !defined(DACCESS_COMPILE) && !defined(BUILD_AS_STANDALONE) if (gc_log_on && (gc_log != NULL)) { fwrite(gc_log_buffer, gc_log_buffer_offset, 1, gc_log); fflush(gc_log); fclose(gc_log); gc_log_buffer_offset = 0; } #endif //TRACE_GC && !DACCESS_COMPILE && !BUILD_AS_STANDALONE } #ifdef SYNCHRONIZATION_STATS // Number of GCs have we done since we last logged. static unsigned int gc_count_during_log; // In ms. This is how often we print out stats. static const unsigned int log_interval = 5000; // Time (in ms) when we start a new log interval. static unsigned int log_start_tick; static unsigned int gc_lock_contended; static int64_t log_start_hires; // Cycles accumulated in SuspendEE during log_interval. static uint64_t suspend_ee_during_log; // Cycles accumulated in RestartEE during log_interval. static uint64_t restart_ee_during_log; static uint64_t gc_during_log; #endif //SYNCHRONIZATION_STATS void init_sync_log_stats() { #ifdef SYNCHRONIZATION_STATS if (gc_count_during_log == 0) { gc_heap::init_sync_stats(); suspend_ee_during_log = 0; restart_ee_during_log = 0; gc_during_log = 0; gc_lock_contended = 0; log_start_tick = GCToOSInterface::GetLowPrecisionTimeStamp(); log_start_hires = GCToOSInterface::QueryPerformanceCounter(); } gc_count_during_log++; #endif //SYNCHRONIZATION_STATS } void process_sync_log_stats() { #ifdef SYNCHRONIZATION_STATS unsigned int log_elapsed = GCToOSInterface::GetLowPrecisionTimeStamp() - log_start_tick; if (log_elapsed > log_interval) { uint64_t total = GCToOSInterface::QueryPerformanceCounter() - log_start_hires; // Print out the cycles we spent on average in each suspend and restart. printf("\n_________________________________________________________________________________\n" "Past %d(s): #%3d GCs; Total gc_lock contended: %8u; GC: %12u\n" "SuspendEE: %8u; RestartEE: %8u GC %.3f%%\n", log_interval / 1000, gc_count_during_log, gc_lock_contended, (unsigned int)(gc_during_log / gc_count_during_log), (unsigned int)(suspend_ee_during_log / gc_count_during_log), (unsigned int)(restart_ee_during_log / gc_count_during_log), (double)(100.0f * gc_during_log / total)); gc_heap::print_sync_stats(gc_count_during_log); gc_count_during_log = 0; } #endif //SYNCHRONIZATION_STATS } #ifdef MULTIPLE_HEAPS #ifndef DACCESS_COMPILE uint32_t g_num_active_processors = 0; // Note that when a join is no longer used we still keep the values here because // tooling already recognized them as having the meaning they were assigned originally. // It doesn't break tooling if we stop using them but does if we assign a new meaning // to them. enum gc_join_stage { gc_join_init_cpu_mapping = 0, gc_join_done = 1, gc_join_generation_determined = 2, gc_join_begin_mark_phase = 3, gc_join_scan_dependent_handles = 4, gc_join_rescan_dependent_handles = 5, gc_join_scan_sizedref_done = 6, gc_join_null_dead_short_weak = 7, gc_join_scan_finalization = 8, gc_join_null_dead_long_weak = 9, gc_join_null_dead_syncblk = 10, gc_join_decide_on_compaction = 11, gc_join_rearrange_segs_compaction = 12, gc_join_adjust_handle_age_compact = 13, gc_join_adjust_handle_age_sweep = 14, gc_join_begin_relocate_phase = 15, gc_join_relocate_phase_done = 16, gc_join_verify_objects_done = 17, gc_join_start_bgc = 18, gc_join_restart_ee = 19, gc_join_concurrent_overflow = 20, gc_join_suspend_ee = 21, gc_join_bgc_after_ephemeral = 22, gc_join_allow_fgc = 23, gc_join_bgc_sweep = 24, gc_join_suspend_ee_verify = 25, gc_join_restart_ee_verify = 26, gc_join_set_state_free = 27, gc_r_join_update_card_bundle = 28, gc_join_after_absorb = 29, gc_join_verify_copy_table = 30, gc_join_after_reset = 31, gc_join_after_ephemeral_sweep = 32, gc_join_after_profiler_heap_walk = 33, gc_join_minimal_gc = 34, gc_join_after_commit_soh_no_gc = 35, gc_join_expand_loh_no_gc = 36, gc_join_final_no_gc = 37, // No longer in use but do not remove, see comments for this enum. gc_join_disable_software_write_watch = 38, gc_join_max = 39 }; enum gc_join_flavor { join_flavor_server_gc = 0, join_flavor_bgc = 1 }; #define first_thread_arrived 2 #pragma warning(push) #pragma warning(disable:4324) // don't complain if DECLSPEC_ALIGN actually pads struct DECLSPEC_ALIGN(HS_CACHE_LINE_SIZE) join_structure { // Shared non volatile keep on separate line to prevent eviction int n_threads; // Keep polling/wait structures on separate line write once per join DECLSPEC_ALIGN(HS_CACHE_LINE_SIZE) GCEvent joined_event[3]; // the last event in the array is only used for first_thread_arrived. Volatile lock_color; VOLATILE(BOOL) wait_done; VOLATILE(BOOL) joined_p; // Keep volatile counted locks on separate cache line write many per join DECLSPEC_ALIGN(HS_CACHE_LINE_SIZE) VOLATILE(int) join_lock; VOLATILE(int) r_join_lock; }; #pragma warning(pop) enum join_type { type_last_join = 0, type_join = 1, type_restart = 2, type_first_r_join = 3, type_r_join = 4 }; enum join_time { time_start = 0, time_end = 1 }; enum join_heap_index { join_heap_restart = 100, join_heap_r_restart = 200 }; class t_join { join_structure join_struct; int id; gc_join_flavor flavor; #ifdef JOIN_STATS uint64_t start[MAX_SUPPORTED_CPUS], end[MAX_SUPPORTED_CPUS], start_seq; // remember join id and last thread to arrive so restart can use these int thd; // we want to print statistics every 10 seconds - this is to remember the start of the 10 sec interval uint32_t start_tick; // counters for joins, in 1000's of clock cycles uint64_t elapsed_total[gc_join_max], wake_total[gc_join_max], seq_loss_total[gc_join_max], par_loss_total[gc_join_max], in_join_total[gc_join_max]; #endif //JOIN_STATS public: BOOL init (int n_th, gc_join_flavor f) { dprintf (JOIN_LOG, ("Initializing join structure")); join_struct.n_threads = n_th; join_struct.lock_color = 0; for (int i = 0; i < 3; i++) { if (!join_struct.joined_event[i].IsValid()) { join_struct.joined_p = FALSE; dprintf (JOIN_LOG, ("Creating join event %d", i)); // TODO - changing this to a non OS event // because this is also used by BGC threads which are // managed threads and WaitEx does not allow you to wait // for an OS event on a managed thread. // But we are not sure if this plays well in the hosting // environment. //join_struct.joined_event[i].CreateOSManualEventNoThrow(FALSE); if (!join_struct.joined_event[i].CreateManualEventNoThrow(FALSE)) return FALSE; } } join_struct.join_lock = join_struct.n_threads; join_struct.r_join_lock = join_struct.n_threads; join_struct.wait_done = FALSE; flavor = f; #ifdef JOIN_STATS start_tick = GCToOSInterface::GetLowPrecisionTimeStamp(); #endif //JOIN_STATS return TRUE; } void destroy () { dprintf (JOIN_LOG, ("Destroying join structure")); for (int i = 0; i < 3; i++) { if (join_struct.joined_event[i].IsValid()) join_struct.joined_event[i].CloseEvent(); } } inline void fire_event (int heap, join_time time, join_type type, int join_id) { FIRE_EVENT(GCJoin_V2, heap, time, type, join_id); } void join (gc_heap* gch, int join_id) { #ifdef JOIN_STATS // parallel execution ends here end[gch->heap_number] = get_ts(); #endif //JOIN_STATS assert (!join_struct.joined_p); int color = join_struct.lock_color.LoadWithoutBarrier(); if (Interlocked::Decrement(&join_struct.join_lock) != 0) { dprintf (JOIN_LOG, ("join%d(%d): Join() Waiting...join_lock is now %d", flavor, join_id, (int32_t)(join_struct.join_lock))); fire_event (gch->heap_number, time_start, type_join, join_id); //busy wait around the color if (color == join_struct.lock_color.LoadWithoutBarrier()) { respin: int spin_count = 128 * yp_spin_count_unit; for (int j = 0; j < spin_count; j++) { if (color != join_struct.lock_color.LoadWithoutBarrier()) { break; } YieldProcessor(); // indicate to the processor that we are spinning } // we've spun, and if color still hasn't changed, fall into hard wait if (color == join_struct.lock_color.LoadWithoutBarrier()) { dprintf (JOIN_LOG, ("join%d(%d): Join() hard wait on reset event %d, join_lock is now %d", flavor, join_id, color, (int32_t)(join_struct.join_lock))); uint32_t dwJoinWait = join_struct.joined_event[color].Wait(INFINITE, FALSE); if (dwJoinWait != WAIT_OBJECT_0) { STRESS_LOG1 (LF_GC, LL_FATALERROR, "joined event wait failed with code: %Ix", dwJoinWait); FATAL_GC_ERROR (); } } // avoid race due to the thread about to reset the event (occasionally) being preempted before ResetEvent() if (color == join_struct.lock_color.LoadWithoutBarrier()) { goto respin; } dprintf (JOIN_LOG, ("join%d(%d): Join() done, join_lock is %d", flavor, join_id, (int32_t)(join_struct.join_lock))); } fire_event (gch->heap_number, time_end, type_join, join_id); #ifdef JOIN_STATS // parallel execution starts here start[gch->heap_number] = get_ts(); Interlocked::ExchangeAdd(&in_join_total[join_id], (start[gch->heap_number] - end[gch->heap_number])); #endif //JOIN_STATS } else { fire_event (gch->heap_number, time_start, type_last_join, join_id); join_struct.joined_p = TRUE; dprintf (JOIN_LOG, ("join%d(%d): Last thread to complete the join, setting id", flavor, join_id)); join_struct.joined_event[!color].Reset(); id = join_id; #ifdef JOIN_STATS // remember the join id, the last thread arriving, the start of the sequential phase, // and keep track of the cycles spent waiting in the join thd = gch->heap_number; start_seq = get_ts(); Interlocked::ExchangeAdd(&in_join_total[join_id], (start_seq - end[gch->heap_number])); #endif //JOIN_STATS } } // Reverse join - first thread gets here does the work; other threads will only proceed // after the work is done. // Note that you cannot call this twice in a row on the same thread. Plus there's no // need to call it twice in row - you should just merge the work. BOOL r_join (gc_heap* gch, int join_id) { if (join_struct.n_threads == 1) { return TRUE; } if (Interlocked::CompareExchange(&join_struct.r_join_lock, 0, join_struct.n_threads) == 0) { fire_event (gch->heap_number, time_start, type_join, join_id); dprintf (JOIN_LOG, ("r_join() Waiting...")); //busy wait around the color respin: int spin_count = 256 * yp_spin_count_unit; for (int j = 0; j < spin_count; j++) { if (join_struct.wait_done) { break; } YieldProcessor(); // indicate to the processor that we are spinning } // we've spun, and if color still hasn't changed, fall into hard wait if (!join_struct.wait_done) { dprintf (JOIN_LOG, ("Join() hard wait on reset event %d", first_thread_arrived)); uint32_t dwJoinWait = join_struct.joined_event[first_thread_arrived].Wait(INFINITE, FALSE); if (dwJoinWait != WAIT_OBJECT_0) { STRESS_LOG1 (LF_GC, LL_FATALERROR, "joined event wait failed with code: %Ix", dwJoinWait); FATAL_GC_ERROR (); } } // avoid race due to the thread about to reset the event (occasionally) being preempted before ResetEvent() if (!join_struct.wait_done) { goto respin; } dprintf (JOIN_LOG, ("r_join() done")); fire_event (gch->heap_number, time_end, type_join, join_id); return FALSE; } else { fire_event (gch->heap_number, time_start, type_first_r_join, join_id); return TRUE; } } #ifdef JOIN_STATS uint64_t get_ts() { return GCToOSInterface::QueryPerformanceCounter(); } void start_ts (gc_heap* gch) { // parallel execution ends here start[gch->heap_number] = get_ts(); } #endif //JOIN_STATS void restart() { #ifdef JOIN_STATS uint64_t elapsed_seq = get_ts() - start_seq; uint64_t max = 0, sum = 0, wake = 0; uint64_t min_ts = start[0]; for (int i = 1; i < join_struct.n_threads; i++) { if(min_ts > start[i]) min_ts = start[i]; } for (int i = 0; i < join_struct.n_threads; i++) { uint64_t wake_delay = start[i] - min_ts; uint64_t elapsed = end[i] - start[i]; if (max < elapsed) max = elapsed; sum += elapsed; wake += wake_delay; } uint64_t seq_loss = (join_struct.n_threads - 1)*elapsed_seq; uint64_t par_loss = join_struct.n_threads*max - sum; double efficiency = 0.0; if (max > 0) efficiency = sum*100.0/(join_struct.n_threads*max); const double ts_scale = 1e-6; // enable this printf to get statistics on each individual join as it occurs //printf("join #%3d seq_loss = %5g par_loss = %5g efficiency = %3.0f%%\n", join_id, ts_scale*seq_loss, ts_scale*par_loss, efficiency); elapsed_total[id] += sum; wake_total[id] += wake; seq_loss_total[id] += seq_loss; par_loss_total[id] += par_loss; // every 10 seconds, print a summary of the time spent in each type of join if (GCToOSInterface::GetLowPrecisionTimeStamp() - start_tick > 10*1000) { printf("**** summary *****\n"); for (int i = 0; i < 16; i++) { printf("join #%3d elapsed_total = %8g wake_loss = %8g seq_loss = %8g par_loss = %8g in_join_total = %8g\n", i, ts_scale*elapsed_total[i], ts_scale*wake_total[i], ts_scale*seq_loss_total[i], ts_scale*par_loss_total[i], ts_scale*in_join_total[i]); elapsed_total[i] = wake_total[i] = seq_loss_total[i] = par_loss_total[i] = in_join_total[i] = 0; } start_tick = GCToOSInterface::GetLowPrecisionTimeStamp(); } #endif //JOIN_STATS fire_event (join_heap_restart, time_start, type_restart, -1); assert (join_struct.joined_p); join_struct.joined_p = FALSE; join_struct.join_lock = join_struct.n_threads; dprintf (JOIN_LOG, ("join%d(%d): Restarting from join: join_lock is %d", flavor, id, (int32_t)(join_struct.join_lock))); int color = join_struct.lock_color.LoadWithoutBarrier(); join_struct.lock_color = !color; join_struct.joined_event[color].Set(); fire_event (join_heap_restart, time_end, type_restart, -1); #ifdef JOIN_STATS start[thd] = get_ts(); #endif //JOIN_STATS } BOOL joined() { dprintf (JOIN_LOG, ("join%d(%d): joined, join_lock is %d", flavor, id, (int32_t)(join_struct.join_lock))); return join_struct.joined_p; } void r_restart() { if (join_struct.n_threads != 1) { fire_event (join_heap_r_restart, time_start, type_restart, -1); join_struct.wait_done = TRUE; join_struct.joined_event[first_thread_arrived].Set(); fire_event (join_heap_r_restart, time_end, type_restart, -1); } } void r_init() { if (join_struct.n_threads != 1) { join_struct.r_join_lock = join_struct.n_threads; join_struct.wait_done = FALSE; join_struct.joined_event[first_thread_arrived].Reset(); } } }; t_join gc_t_join; #ifdef BACKGROUND_GC t_join bgc_t_join; #endif //BACKGROUND_GC #endif // DACCESS_COMPILE #endif //MULTIPLE_HEAPS #define spin_and_switch(count_to_spin, expr) \ { \ for (int j = 0; j < count_to_spin; j++) \ { \ if (expr) \ { \ break;\ } \ YieldProcessor(); \ } \ if (!(expr)) \ { \ GCToOSInterface::YieldThread(0); \ } \ } #if defined(BACKGROUND_GC) && !(DACCESS_COMPILE) #define max_pending_allocs 64 class exclusive_sync { VOLATILE(uint8_t*) rwp_object; VOLATILE(int32_t) needs_checking; int spin_count; uint8_t cache_separator[HS_CACHE_LINE_SIZE - sizeof (int) - sizeof (int32_t)]; // TODO - perhaps each object should be on its own cache line... VOLATILE(uint8_t*) alloc_objects[max_pending_allocs]; int find_free_index () { for (int i = 0; i < max_pending_allocs; i++) { if (alloc_objects [i] == (uint8_t*)0) { return i; } } return -1; } public: void init() { spin_count = 32 * (g_num_processors - 1); rwp_object = 0; needs_checking = 0; for (int i = 0; i < max_pending_allocs; i++) { alloc_objects [i] = (uint8_t*)0; } } void check() { for (int i = 0; i < max_pending_allocs; i++) { if (alloc_objects [i] != (uint8_t*)0) { FATAL_GC_ERROR(); } } } void bgc_mark_set (uint8_t* obj) { dprintf (3, ("cm: probing %Ix", obj)); retry: if (Interlocked::CompareExchange(&needs_checking, 1, 0) == 0) { // If we spend too much time spending all the allocs, // consider adding a high water mark and scan up // to that; we'll need to interlock in done when // we update the high watermark. for (int i = 0; i < max_pending_allocs; i++) { if (obj == alloc_objects[i]) { needs_checking = 0; dprintf (3, ("cm: will spin")); spin_and_switch (spin_count, (obj != alloc_objects[i])); goto retry; } } rwp_object = obj; needs_checking = 0; dprintf (3, ("cm: set %Ix", obj)); return; } else { spin_and_switch (spin_count, (needs_checking == 0)); goto retry; } } int uoh_alloc_set (uint8_t* obj) { if (!gc_heap::cm_in_progress) { return -1; } retry: dprintf (3, ("uoh alloc: probing %Ix", obj)); if (Interlocked::CompareExchange(&needs_checking, 1, 0) == 0) { if (obj == rwp_object) { needs_checking = 0; spin_and_switch (spin_count, (obj != rwp_object)); goto retry; } else { int cookie = find_free_index(); if (cookie != -1) { alloc_objects[cookie] = obj; needs_checking = 0; //if (cookie >= 4) //{ // GCToOSInterface::DebugBreak(); //} dprintf (3, ("uoh alloc: set %Ix at %d", obj, cookie)); return cookie; } else { needs_checking = 0; dprintf (3, ("uoh alloc: setting %Ix will spin to acquire a free index", obj)); spin_and_switch (spin_count, (find_free_index () != -1)); goto retry; } } } else { dprintf (3, ("uoh alloc: will spin on checking %Ix", obj)); spin_and_switch (spin_count, (needs_checking == 0)); goto retry; } } void bgc_mark_done () { dprintf (3, ("cm: release lock on %Ix", (uint8_t *)rwp_object)); rwp_object = 0; } void uoh_alloc_done_with_index (int index) { dprintf (3, ("uoh alloc: release lock on %Ix based on %d", (uint8_t *)alloc_objects[index], index)); assert ((index >= 0) && (index < max_pending_allocs)); alloc_objects[index] = (uint8_t*)0; } void uoh_alloc_done (uint8_t* obj) { #ifdef BACKGROUND_GC if (!gc_heap::cm_in_progress) { return; } for (int i = 0; i < max_pending_allocs; i++) { if (alloc_objects [i] == obj) { uoh_alloc_done_with_index(i); return; } } #endif //BACKGROUND_GC } }; #endif //BACKGROUND_GC && !DACCESS_COMPILE void reset_memory (uint8_t* o, size_t sizeo); #ifdef WRITE_WATCH #ifndef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP static bool virtual_alloc_hardware_write_watch = false; #endif // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP static bool hardware_write_watch_capability = false; #ifndef DACCESS_COMPILE void hardware_write_watch_api_supported() { if (GCToOSInterface::SupportsWriteWatch()) { hardware_write_watch_capability = true; dprintf (2, ("WriteWatch supported")); } else { dprintf (2,("WriteWatch not supported")); } } #endif //!DACCESS_COMPILE inline bool can_use_hardware_write_watch() { return hardware_write_watch_capability; } inline bool can_use_write_watch_for_gc_heap() { #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP return true; #else // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP return can_use_hardware_write_watch(); #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP } inline bool can_use_write_watch_for_card_table() { #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES return true; #else return can_use_hardware_write_watch(); #endif } #else #define mem_reserve (MEM_RESERVE) #endif //WRITE_WATCH //check if the low memory notification is supported #ifndef DACCESS_COMPILE void WaitLongerNoInstru (int i) { // every 8th attempt: bool bToggleGC = GCToEEInterface::EnablePreemptiveGC(); // if we're waiting for gc to finish, we should block immediately if (g_fSuspensionPending == 0) { if (g_num_processors > 1) { YieldProcessor(); // indicate to the processor that we are spinning if (i & 0x01f) GCToOSInterface::YieldThread (0); else GCToOSInterface::Sleep (5); } else GCToOSInterface::Sleep (5); } // If CLR is hosted, a thread may reach here while it is in preemptive GC mode, // or it has no Thread object, in order to force a task to yield, or to triger a GC. // It is important that the thread is going to wait for GC. Otherwise the thread // is in a tight loop. If the thread has high priority, the perf is going to be very BAD. if (bToggleGC) { #ifdef _DEBUG // In debug builds, all enter_spin_lock operations go through this code. If a GC has // started, it is important to block until the GC thread calls set_gc_done (since it is // guaranteed to have cleared g_TrapReturningThreads by this point). This avoids livelock // conditions which can otherwise occur if threads are allowed to spin in this function // (and therefore starve the GC thread) between the point when the GC thread sets the // WaitForGC event and the point when the GC thread clears g_TrapReturningThreads. if (gc_heap::gc_started) { gc_heap::wait_for_gc_done(); } #endif // _DEBUG GCToEEInterface::DisablePreemptiveGC(); } else if (g_fSuspensionPending > 0) { g_theGCHeap->WaitUntilGCComplete(); } } inline static void safe_switch_to_thread() { bool cooperative_mode = gc_heap::enable_preemptive(); GCToOSInterface::YieldThread(0); gc_heap::disable_preemptive(cooperative_mode); } // // We need the following methods to have volatile arguments, so that they can accept // raw pointers in addition to the results of the & operator on Volatile. // inline static void enter_spin_lock_noinstru (RAW_KEYWORD(volatile) int32_t* lock) { retry: if (Interlocked::CompareExchange(lock, 0, -1) >= 0) { unsigned int i = 0; while (VolatileLoad(lock) >= 0) { if ((++i & 7) && !IsGCInProgress()) { if (g_num_processors > 1) { #ifndef MULTIPLE_HEAPS int spin_count = 32 * yp_spin_count_unit; #else //!MULTIPLE_HEAPS int spin_count = yp_spin_count_unit; #endif //!MULTIPLE_HEAPS for (int j = 0; j < spin_count; j++) { if (VolatileLoad(lock) < 0 || IsGCInProgress()) break; YieldProcessor(); // indicate to the processor that we are spinning } if (VolatileLoad(lock) >= 0 && !IsGCInProgress()) { safe_switch_to_thread(); } } else { safe_switch_to_thread(); } } else { WaitLongerNoInstru(i); } } goto retry; } } inline static BOOL try_enter_spin_lock_noinstru(RAW_KEYWORD(volatile) int32_t* lock) { return (Interlocked::CompareExchange(&*lock, 0, -1) < 0); } inline static void leave_spin_lock_noinstru (RAW_KEYWORD(volatile) int32_t* lock) { VolatileStore((int32_t*)lock, -1); } #ifdef _DEBUG inline static void enter_spin_lock(GCSpinLock *pSpinLock) { enter_spin_lock_noinstru(&pSpinLock->lock); assert (pSpinLock->holding_thread == (Thread*)-1); pSpinLock->holding_thread = GCToEEInterface::GetThread(); } inline static BOOL try_enter_spin_lock(GCSpinLock *pSpinLock) { BOOL ret = try_enter_spin_lock_noinstru(&pSpinLock->lock); if (ret) pSpinLock->holding_thread = GCToEEInterface::GetThread(); return ret; } inline static void leave_spin_lock(GCSpinLock *pSpinLock) { bool gc_thread_p = GCToEEInterface::WasCurrentThreadCreatedByGC(); pSpinLock->released_by_gc_p = gc_thread_p; pSpinLock->holding_thread = (Thread*) -1; if (pSpinLock->lock != -1) leave_spin_lock_noinstru(&pSpinLock->lock); } #define ASSERT_HOLDING_SPIN_LOCK(pSpinLock) \ _ASSERTE((pSpinLock)->holding_thread == GCToEEInterface::GetThread()); #define ASSERT_NOT_HOLDING_SPIN_LOCK(pSpinLock) \ _ASSERTE((pSpinLock)->holding_thread != GCToEEInterface::GetThread()); #else //_DEBUG //In the concurrent version, the Enable/DisablePreemptiveGC is optional because //the gc thread call WaitLonger. void WaitLonger (int i #ifdef SYNCHRONIZATION_STATS , GCSpinLock* spin_lock #endif //SYNCHRONIZATION_STATS ) { #ifdef SYNCHRONIZATION_STATS (spin_lock->num_wait_longer)++; #endif //SYNCHRONIZATION_STATS // every 8th attempt: bool bToggleGC = GCToEEInterface::EnablePreemptiveGC(); assert (bToggleGC); // if we're waiting for gc to finish, we should block immediately if (!gc_heap::gc_started) { #ifdef SYNCHRONIZATION_STATS (spin_lock->num_switch_thread_w)++; #endif //SYNCHRONIZATION_STATS if (g_num_processors > 1) { YieldProcessor(); // indicate to the processor that we are spinning if (i & 0x01f) GCToOSInterface::YieldThread (0); else GCToOSInterface::Sleep (5); } else GCToOSInterface::Sleep (5); } // If CLR is hosted, a thread may reach here while it is in preemptive GC mode, // or it has no Thread object, in order to force a task to yield, or to triger a GC. // It is important that the thread is going to wait for GC. Otherwise the thread // is in a tight loop. If the thread has high priority, the perf is going to be very BAD. if (gc_heap::gc_started) { gc_heap::wait_for_gc_done(); } if (bToggleGC) { #ifdef SYNCHRONIZATION_STATS (spin_lock->num_disable_preemptive_w)++; #endif //SYNCHRONIZATION_STATS GCToEEInterface::DisablePreemptiveGC(); } } inline static void enter_spin_lock (GCSpinLock* spin_lock) { retry: if (Interlocked::CompareExchange(&spin_lock->lock, 0, -1) >= 0) { unsigned int i = 0; while (spin_lock->lock >= 0) { if ((++i & 7) && !gc_heap::gc_started) { if (g_num_processors > 1) { #ifndef MULTIPLE_HEAPS int spin_count = 32 * yp_spin_count_unit; #else //!MULTIPLE_HEAPS int spin_count = yp_spin_count_unit; #endif //!MULTIPLE_HEAPS for (int j = 0; j < spin_count; j++) { if (spin_lock->lock < 0 || gc_heap::gc_started) break; YieldProcessor(); // indicate to the processor that we are spinning } if (spin_lock->lock >= 0 && !gc_heap::gc_started) { #ifdef SYNCHRONIZATION_STATS (spin_lock->num_switch_thread)++; #endif //SYNCHRONIZATION_STATS bool cooperative_mode = gc_heap::enable_preemptive (); GCToOSInterface::YieldThread(0); gc_heap::disable_preemptive (cooperative_mode); } } else GCToOSInterface::YieldThread(0); } else { WaitLonger(i #ifdef SYNCHRONIZATION_STATS , spin_lock #endif //SYNCHRONIZATION_STATS ); } } goto retry; } } inline static BOOL try_enter_spin_lock(GCSpinLock* spin_lock) { return (Interlocked::CompareExchange(&spin_lock->lock, 0, -1) < 0); } inline static void leave_spin_lock (GCSpinLock * spin_lock) { spin_lock->lock = -1; } #define ASSERT_HOLDING_SPIN_LOCK(pSpinLock) #endif //_DEBUG bool gc_heap::enable_preemptive () { return GCToEEInterface::EnablePreemptiveGC(); } void gc_heap::disable_preemptive (bool restore_cooperative) { if (restore_cooperative) { GCToEEInterface::DisablePreemptiveGC(); } } #endif // !DACCESS_COMPILE typedef void ** PTR_PTR; inline void memclr ( uint8_t* mem, size_t size) { dprintf (3, ("MEMCLR: %Ix, %d", mem, size)); assert ((size & (sizeof(PTR_PTR)-1)) == 0); assert (sizeof(PTR_PTR) == DATA_ALIGNMENT); memset (mem, 0, size); } void memcopy (uint8_t* dmem, uint8_t* smem, size_t size) { const size_t sz4ptr = sizeof(PTR_PTR)*4; const size_t sz2ptr = sizeof(PTR_PTR)*2; const size_t sz1ptr = sizeof(PTR_PTR)*1; assert ((size & (sizeof (PTR_PTR)-1)) == 0); assert (sizeof(PTR_PTR) == DATA_ALIGNMENT); // copy in groups of four pointer sized things at a time if (size >= sz4ptr) { do { ((PTR_PTR)dmem)[0] = ((PTR_PTR)smem)[0]; ((PTR_PTR)dmem)[1] = ((PTR_PTR)smem)[1]; ((PTR_PTR)dmem)[2] = ((PTR_PTR)smem)[2]; ((PTR_PTR)dmem)[3] = ((PTR_PTR)smem)[3]; dmem += sz4ptr; smem += sz4ptr; } while ((size -= sz4ptr) >= sz4ptr); } // still two pointer sized things or more left to copy? if (size & sz2ptr) { ((PTR_PTR)dmem)[0] = ((PTR_PTR)smem)[0]; ((PTR_PTR)dmem)[1] = ((PTR_PTR)smem)[1]; dmem += sz2ptr; smem += sz2ptr; } // still one pointer sized thing left to copy? if (size & sz1ptr) { ((PTR_PTR)dmem)[0] = ((PTR_PTR)smem)[0]; } } inline ptrdiff_t round_down (ptrdiff_t add, int pitch) { return ((add / pitch) * pitch); } #if defined(FEATURE_STRUCTALIGN) && defined(RESPECT_LARGE_ALIGNMENT) // FEATURE_STRUCTALIGN allows the compiler to dictate the alignment, // i.e, if a larger alignment matters or is beneficial, the compiler // generated info tells us so. RESPECT_LARGE_ALIGNMENT is just the // converse - it's a heuristic for the GC to use a larger alignment. #error FEATURE_STRUCTALIGN should imply !RESPECT_LARGE_ALIGNMENT #endif #if defined(FEATURE_STRUCTALIGN) && defined(FEATURE_LOH_COMPACTION) #error FEATURE_STRUCTALIGN and FEATURE_LOH_COMPACTION are mutually exclusive #endif // Returns true if two pointers have the same large (double than normal) alignment. inline BOOL same_large_alignment_p (uint8_t* p1, uint8_t* p2) { #ifdef RESPECT_LARGE_ALIGNMENT const size_t LARGE_ALIGNMENT_MASK = 2 * DATA_ALIGNMENT - 1; return ((((size_t)p1 ^ (size_t)p2) & LARGE_ALIGNMENT_MASK) == 0); #else UNREFERENCED_PARAMETER(p1); UNREFERENCED_PARAMETER(p2); return TRUE; #endif // RESPECT_LARGE_ALIGNMENT } // Determines the padding size required to fix large alignment during relocation. inline size_t switch_alignment_size (BOOL already_padded_p) { #ifndef RESPECT_LARGE_ALIGNMENT assert (!"Should not be called"); #endif // RESPECT_LARGE_ALIGNMENT if (already_padded_p) return DATA_ALIGNMENT; else return Align (min_obj_size) | DATA_ALIGNMENT; } #ifdef FEATURE_STRUCTALIGN void set_node_aligninfo (uint8_t *node, int requiredAlignment, ptrdiff_t pad); void clear_node_aligninfo (uint8_t *node); #else // FEATURE_STRUCTALIGN #define node_realigned(node) (((plug_and_reloc*)(node))[-1].reloc & 1) void set_node_realigned (uint8_t* node); void clear_node_realigned(uint8_t* node); #endif // FEATURE_STRUCTALIGN inline size_t AlignQword (size_t nbytes) { #ifdef FEATURE_STRUCTALIGN // This function is used to align everything on the large object // heap to an 8-byte boundary, to reduce the number of unaligned // accesses to (say) arrays of doubles. With FEATURE_STRUCTALIGN, // the compiler dictates the optimal alignment instead of having // a heuristic in the GC. return Align (nbytes); #else // FEATURE_STRUCTALIGN return (nbytes + 7) & ~7; #endif // FEATURE_STRUCTALIGN } inline BOOL Aligned (size_t n) { return (n & ALIGNCONST) == 0; } #define OBJECT_ALIGNMENT_OFFSET (sizeof(MethodTable *)) #ifdef FEATURE_STRUCTALIGN #define MAX_STRUCTALIGN OS_PAGE_SIZE #else // FEATURE_STRUCTALIGN #define MAX_STRUCTALIGN 0 #endif // FEATURE_STRUCTALIGN #ifdef FEATURE_STRUCTALIGN inline ptrdiff_t AdjustmentForMinPadSize(ptrdiff_t pad, int requiredAlignment) { // The resulting alignpad must be either 0 or at least min_obj_size. // Note that by computing the following difference on unsigned types, // we can do the range check 0 < alignpad < min_obj_size with a // single conditional branch. if ((size_t)(pad - DATA_ALIGNMENT) < Align (min_obj_size) - DATA_ALIGNMENT) { return requiredAlignment; } return 0; } inline uint8_t* StructAlign (uint8_t* origPtr, int requiredAlignment, ptrdiff_t alignmentOffset=OBJECT_ALIGNMENT_OFFSET) { // required alignment must be a power of two _ASSERTE(((size_t)origPtr & ALIGNCONST) == 0); _ASSERTE(((requiredAlignment - 1) & requiredAlignment) == 0); _ASSERTE(requiredAlignment >= sizeof(void *)); _ASSERTE(requiredAlignment <= MAX_STRUCTALIGN); // When this method is invoked for individual objects (i.e., alignmentOffset // is just the size of the PostHeader), what needs to be aligned when // we're done is the pointer to the payload of the object (which means // the actual resulting object pointer is typically not aligned). uint8_t* result = (uint8_t*)Align ((size_t)origPtr + alignmentOffset, requiredAlignment-1) - alignmentOffset; ptrdiff_t alignpad = result - origPtr; return result + AdjustmentForMinPadSize (alignpad, requiredAlignment); } inline ptrdiff_t ComputeStructAlignPad (uint8_t* plug, int requiredAlignment, size_t alignmentOffset=OBJECT_ALIGNMENT_OFFSET) { return StructAlign (plug, requiredAlignment, alignmentOffset) - plug; } BOOL IsStructAligned (uint8_t *ptr, int requiredAlignment) { return StructAlign (ptr, requiredAlignment) == ptr; } inline ptrdiff_t ComputeMaxStructAlignPad (int requiredAlignment) { if (requiredAlignment == DATA_ALIGNMENT) return 0; // Since a non-zero alignment padding cannot be less than min_obj_size (so we can fit the // alignment padding object), the worst-case alignment padding is correspondingly larger // than the required alignment. return requiredAlignment + Align (min_obj_size) - DATA_ALIGNMENT; } inline ptrdiff_t ComputeMaxStructAlignPadLarge (int requiredAlignment) { if (requiredAlignment <= get_alignment_constant (TRUE)+1) return 0; // This is the same as ComputeMaxStructAlignPad, except that in addition to leaving space // for padding before the actual object, it also leaves space for filling a gap after the // actual object. This is needed on the large object heap, as the outer allocation functions // don't operate on an allocation context (which would have left space for the final gap). return requiredAlignment + Align (min_obj_size) * 2 - DATA_ALIGNMENT; } uint8_t* gc_heap::pad_for_alignment (uint8_t* newAlloc, int requiredAlignment, size_t size, alloc_context* acontext) { uint8_t* alignedPtr = StructAlign (newAlloc, requiredAlignment); if (alignedPtr != newAlloc) { make_unused_array (newAlloc, alignedPtr - newAlloc); } acontext->alloc_ptr = alignedPtr + Align (size); return alignedPtr; } uint8_t* gc_heap::pad_for_alignment_large (uint8_t* newAlloc, int requiredAlignment, size_t size) { uint8_t* alignedPtr = StructAlign (newAlloc, requiredAlignment); if (alignedPtr != newAlloc) { make_unused_array (newAlloc, alignedPtr - newAlloc); } if (alignedPtr < newAlloc + ComputeMaxStructAlignPadLarge (requiredAlignment)) { make_unused_array (alignedPtr + AlignQword (size), newAlloc + ComputeMaxStructAlignPadLarge (requiredAlignment) - alignedPtr); } return alignedPtr; } #else // FEATURE_STRUCTALIGN #define ComputeMaxStructAlignPad(requiredAlignment) 0 #define ComputeMaxStructAlignPadLarge(requiredAlignment) 0 #endif // FEATURE_STRUCTALIGN //CLR_SIZE is the max amount of bytes from gen0 that is set to 0 in one chunk #ifdef SERVER_GC #define CLR_SIZE ((size_t)(8*1024)) #else //SERVER_GC #define CLR_SIZE ((size_t)(8*1024)) #endif //SERVER_GC #define END_SPACE_AFTER_GC (loh_size_threshold + MAX_STRUCTALIGN) // When we fit into the free list we need an extra of a min obj #define END_SPACE_AFTER_GC_FL (END_SPACE_AFTER_GC + Align (min_obj_size)) #if defined(BACKGROUND_GC) && !defined(USE_REGIONS) #define SEGMENT_INITIAL_COMMIT (2*OS_PAGE_SIZE) #else #define SEGMENT_INITIAL_COMMIT (OS_PAGE_SIZE) #endif //BACKGROUND_GC && !USE_REGIONS // This is always power of 2. const size_t min_segment_size_hard_limit = 1024*1024*16; inline size_t align_on_segment_hard_limit (size_t add) { return ((size_t)(add + (min_segment_size_hard_limit - 1)) & ~(min_segment_size_hard_limit - 1)); } #ifdef SERVER_GC #ifdef HOST_64BIT #define INITIAL_ALLOC ((size_t)((size_t)4*1024*1024*1024)) #define LHEAP_ALLOC ((size_t)(1024*1024*256)) #else #define INITIAL_ALLOC ((size_t)(1024*1024*64)) #define LHEAP_ALLOC ((size_t)(1024*1024*32)) #endif // HOST_64BIT #else //SERVER_GC #ifdef HOST_64BIT #define INITIAL_ALLOC ((size_t)(1024*1024*256)) #define LHEAP_ALLOC ((size_t)(1024*1024*128)) #else #define INITIAL_ALLOC ((size_t)(1024*1024*16)) #define LHEAP_ALLOC ((size_t)(1024*1024*16)) #endif // HOST_64BIT #endif //SERVER_GC const size_t etw_allocation_tick = 100*1024; const size_t low_latency_alloc = 256*1024; const size_t fgn_check_quantum = 2*1024*1024; #ifdef MH_SC_MARK const int max_snoop_level = 128; #endif //MH_SC_MARK #ifdef CARD_BUNDLE //threshold of heap size to turn on card bundles. #define SH_TH_CARD_BUNDLE (40*1024*1024) #define MH_TH_CARD_BUNDLE (180*1024*1024) #endif //CARD_BUNDLE // min size to decommit to make the OS call worthwhile #define MIN_DECOMMIT_SIZE (100*OS_PAGE_SIZE) // max size to decommit per millisecond #define DECOMMIT_SIZE_PER_MILLISECOND (160*1024) // time in milliseconds between decommit steps #define DECOMMIT_TIME_STEP_MILLISECONDS (100) inline size_t align_on_page (size_t add) { return ((add + OS_PAGE_SIZE - 1) & ~((size_t)OS_PAGE_SIZE - 1)); } inline uint8_t* align_on_page (uint8_t* add) { return (uint8_t*)align_on_page ((size_t) add); } inline size_t align_lower_page (size_t add) { return (add & ~((size_t)OS_PAGE_SIZE - 1)); } inline uint8_t* align_lower_page (uint8_t* add) { return (uint8_t*)align_lower_page ((size_t)add); } inline size_t align_write_watch_lower_page (size_t add) { return (add & ~(WRITE_WATCH_UNIT_SIZE - 1)); } inline uint8_t* align_write_watch_lower_page (uint8_t* add) { return (uint8_t*)align_lower_page ((size_t)add); } inline BOOL power_of_two_p (size_t integer) { return !(integer & (integer-1)); } inline BOOL oddp (size_t integer) { return (integer & 1) != 0; } // we only ever use this for WORDs. size_t logcount (size_t word) { //counts the number of high bits in a 16 bit word. assert (word < 0x10000); size_t count; count = (word & 0x5555) + ( (word >> 1 ) & 0x5555); count = (count & 0x3333) + ( (count >> 2) & 0x3333); count = (count & 0x0F0F) + ( (count >> 4) & 0x0F0F); count = (count & 0x00FF) + ( (count >> 8) & 0x00FF); return count; } #ifndef DACCESS_COMPILE void stomp_write_barrier_resize(bool is_runtime_suspended, bool requires_upper_bounds_check) { WriteBarrierParameters args = {}; args.operation = WriteBarrierOp::StompResize; args.is_runtime_suspended = is_runtime_suspended; args.requires_upper_bounds_check = requires_upper_bounds_check; args.card_table = g_gc_card_table; #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES args.card_bundle_table = g_gc_card_bundle_table; #endif args.lowest_address = g_gc_lowest_address; args.highest_address = g_gc_highest_address; #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP if (SoftwareWriteWatch::IsEnabledForGCHeap()) { args.write_watch_table = g_gc_sw_ww_table; } #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP GCToEEInterface::StompWriteBarrier(&args); } void stomp_write_barrier_ephemeral(uint8_t* ephemeral_low, uint8_t* ephemeral_high) { initGCShadow(); WriteBarrierParameters args = {}; args.operation = WriteBarrierOp::StompEphemeral; args.is_runtime_suspended = true; args.ephemeral_low = ephemeral_low; args.ephemeral_high = ephemeral_high; GCToEEInterface::StompWriteBarrier(&args); } void stomp_write_barrier_initialize(uint8_t* ephemeral_low, uint8_t* ephemeral_high) { WriteBarrierParameters args = {}; args.operation = WriteBarrierOp::Initialize; args.is_runtime_suspended = true; args.requires_upper_bounds_check = false; args.card_table = g_gc_card_table; #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES args.card_bundle_table = g_gc_card_bundle_table; #endif args.lowest_address = g_gc_lowest_address; args.highest_address = g_gc_highest_address; args.ephemeral_low = ephemeral_low; args.ephemeral_high = ephemeral_high; GCToEEInterface::StompWriteBarrier(&args); } #endif // DACCESS_COMPILE //extract the low bits [0,low[ of a uint32_t #define lowbits(wrd, bits) ((wrd) & ((1 << (bits))-1)) //extract the high bits [high, 32] of a uint32_t #define highbits(wrd, bits) ((wrd) & ~((1 << (bits))-1)) // Things we need to manually initialize: // gen0 min_size - based on cache // gen0/1 max_size - based on segment size static static_data static_data_table[latency_level_last - latency_level_first + 1][total_generation_count] = { // latency_level_memory_footprint { // gen0 {0, 0, 40000, 0.5f, 9.0f, 20.0f, (1000 * 1000), 1}, // gen1 {160*1024, 0, 80000, 0.5f, 2.0f, 7.0f, (10 * 1000 * 1000), 10}, // gen2 {256*1024, SSIZE_T_MAX, 200000, 0.25f, 1.2f, 1.8f, (100 * 1000 * 1000), 100}, // loh {3*1024*1024, SSIZE_T_MAX, 0, 0.0f, 1.25f, 4.5f, 0, 0}, // poh {3*1024*1024, SSIZE_T_MAX, 0, 0.0f, 1.25f, 4.5f, 0, 0}, }, // latency_level_balanced { // gen0 {0, 0, 40000, 0.5f, #ifdef MULTIPLE_HEAPS 20.0f, 40.0f, #else 9.0f, 20.0f, #endif //MULTIPLE_HEAPS (1000 * 1000), 1}, // gen1 {256*1024, 0, 80000, 0.5f, 2.0f, 7.0f, (10 * 1000 * 1000), 10}, // gen2 {256*1024, SSIZE_T_MAX, 200000, 0.25f, 1.2f, 1.8f, (100 * 1000 * 1000), 100}, // loh {3*1024*1024, SSIZE_T_MAX, 0, 0.0f, 1.25f, 4.5f, 0, 0}, // poh {3*1024*1024, SSIZE_T_MAX, 0, 0.0f, 1.25f, 4.5f, 0, 0} }, }; class mark; class generation; class heap_segment; class CObjectHeader; class dynamic_data; class l_heap; class sorted_table; class c_synchronize; #ifdef FEATURE_PREMORTEM_FINALIZATION #ifndef DACCESS_COMPILE static HRESULT AllocateCFinalize(CFinalize **pCFinalize); #endif //!DACCESS_COMPILE #endif // FEATURE_PREMORTEM_FINALIZATION uint8_t* tree_search (uint8_t* tree, uint8_t* old_address); #ifdef USE_INTROSORT #define _sort introsort::sort #elif defined(USE_VXSORT) // in this case we have do_vxsort which takes an additional range that // all items to be sorted are contained in // so do not #define _sort #else //USE_INTROSORT #define _sort qsort1 void qsort1(uint8_t** low, uint8_t** high, unsigned int depth); #endif //USE_INTROSORT void* virtual_alloc (size_t size); void* virtual_alloc (size_t size, bool use_large_pages_p, uint16_t numa_node = NUMA_NODE_UNDEFINED); /* per heap static initialization */ #if defined(BACKGROUND_GC) && !defined(MULTIPLE_HEAPS) uint32_t* gc_heap::mark_array; #endif //BACKGROUND_GC && !MULTIPLE_HEAPS uint8_t** gc_heap::g_mark_list; uint8_t** gc_heap::g_mark_list_copy; size_t gc_heap::mark_list_size; bool gc_heap::mark_list_overflow; #ifdef USE_REGIONS uint8_t*** gc_heap::g_mark_list_piece; size_t gc_heap::g_mark_list_piece_size; #endif //USE_REGIONS seg_mapping* seg_mapping_table; #ifdef FEATURE_BASICFREEZE sorted_table* gc_heap::seg_table; #endif //FEATURE_BASICFREEZE #ifdef MULTIPLE_HEAPS GCEvent gc_heap::ee_suspend_event; size_t gc_heap::min_gen0_balance_delta = 0; size_t gc_heap::min_balance_threshold = 0; #endif //MULTIPLE_HEAPS VOLATILE(BOOL) gc_heap::gc_started; #ifdef MULTIPLE_HEAPS GCEvent gc_heap::gc_start_event; bool gc_heap::gc_thread_no_affinitize_p = false; uintptr_t process_mask = 0; int gc_heap::n_heaps; gc_heap** gc_heap::g_heaps; #if !defined(USE_REGIONS) || defined(_DEBUG) size_t* gc_heap::g_promoted; #endif //!USE_REGIONS || _DEBUG #ifdef MH_SC_MARK int* gc_heap::g_mark_stack_busy; #endif //MH_SC_MARK #ifdef BACKGROUND_GC size_t* gc_heap::g_bpromoted; #endif //BACKGROUND_GC BOOL gc_heap::gradual_decommit_in_progress_p = FALSE; size_t gc_heap::max_decommit_step_size = 0; #else //MULTIPLE_HEAPS #if !defined(USE_REGIONS) || defined(_DEBUG) size_t gc_heap::g_promoted; #endif //!USE_REGIONS || _DEBUG #ifdef BACKGROUND_GC size_t gc_heap::g_bpromoted; #endif //BACKGROUND_GC #endif //MULTIPLE_HEAPS size_t gc_heap::reserved_memory = 0; size_t gc_heap::reserved_memory_limit = 0; BOOL gc_heap::g_low_memory_status; #ifndef DACCESS_COMPILE static gc_reason gc_trigger_reason = reason_empty; #endif //DACCESS_COMPILE gc_latency_level gc_heap::latency_level = latency_level_default; gc_mechanisms gc_heap::settings; gc_history_global gc_heap::gc_data_global; uint64_t gc_heap::gc_last_ephemeral_decommit_time = 0; CLRCriticalSection gc_heap::check_commit_cs; size_t gc_heap::current_total_committed = 0; size_t gc_heap::committed_by_oh[total_oh_count] = {0, 0, 0, 0}; size_t gc_heap::current_total_committed_bookkeeping = 0; #ifdef FEATURE_EVENT_TRACE bool gc_heap::informational_event_enabled_p = false; uint64_t* gc_heap::gc_time_info = 0; #ifdef BACKGROUND_GC uint64_t* gc_heap::bgc_time_info = 0; #endif //BACKGROUND_GC size_t gc_heap::physical_memory_from_config = 0; size_t gc_heap::gen0_min_budget_from_config = 0; size_t gc_heap::gen0_max_budget_from_config = 0; int gc_heap::high_mem_percent_from_config = 0; bool gc_heap::use_frozen_segments_p = false; bool gc_heap::hard_limit_config_p = false; #ifdef FEATURE_LOH_COMPACTION gc_heap::etw_loh_compact_info* gc_heap::loh_compact_info; #endif //FEATURE_LOH_COMPACTION #endif //FEATURE_EVENT_TRACE #ifdef SHORT_PLUGS double gc_heap::short_plugs_pad_ratio = 0; #endif //SHORT_PLUGS int gc_heap::generation_skip_ratio_threshold = 0; int gc_heap::conserve_mem_setting = 0; uint64_t gc_heap::suspended_start_time = 0; uint64_t gc_heap::end_gc_time = 0; uint64_t gc_heap::total_suspended_time = 0; uint64_t gc_heap::process_start_time = 0; last_recorded_gc_info gc_heap::last_ephemeral_gc_info; last_recorded_gc_info gc_heap::last_full_blocking_gc_info; #ifdef BACKGROUND_GC last_recorded_gc_info gc_heap::last_bgc_info[2]; VOLATILE(bool) gc_heap::is_last_recorded_bgc = false; VOLATILE(int) gc_heap::last_bgc_info_index = 0; #endif //BACKGROUND_GC #if defined(HOST_64BIT) #define MAX_ALLOWED_MEM_LOAD 85 // consider putting this in dynamic data - // we may want different values for workstation // and server GC. #define MIN_YOUNGEST_GEN_DESIRED (16*1024*1024) size_t gc_heap::youngest_gen_desired_th; #endif //HOST_64BIT uint64_t gc_heap::mem_one_percent = 0; uint32_t gc_heap::high_memory_load_th = 0; uint32_t gc_heap::m_high_memory_load_th; uint32_t gc_heap::v_high_memory_load_th; bool gc_heap::is_restricted_physical_mem; uint64_t gc_heap::total_physical_mem = 0; uint64_t gc_heap::entry_available_physical_mem = 0; size_t gc_heap::heap_hard_limit = 0; size_t gc_heap::heap_hard_limit_oh[total_oh_count - 1] = {0, 0, 0}; #ifdef USE_REGIONS size_t gc_heap::regions_range = 0; #endif //USE_REGIONS bool affinity_config_specified_p = false; #ifdef USE_REGIONS region_allocator global_region_allocator; uint8_t*(*initial_regions)[total_generation_count][2] = nullptr; size_t gc_heap::region_count = 0; #endif //USE_REGIONS #ifdef BACKGROUND_GC GCEvent gc_heap::bgc_start_event; gc_mechanisms gc_heap::saved_bgc_settings; gc_history_global gc_heap::bgc_data_global; GCEvent gc_heap::background_gc_done_event; GCEvent gc_heap::ee_proceed_event; bool gc_heap::gc_can_use_concurrent = false; bool gc_heap::temp_disable_concurrent_p = false; uint32_t gc_heap::cm_in_progress = FALSE; BOOL gc_heap::dont_restart_ee_p = FALSE; BOOL gc_heap::keep_bgc_threads_p = FALSE; GCEvent gc_heap::bgc_threads_sync_event; BOOL gc_heap::do_ephemeral_gc_p = FALSE; BOOL gc_heap::do_concurrent_p = FALSE; size_t gc_heap::ephemeral_fgc_counts[max_generation]; BOOL gc_heap::alloc_wait_event_p = FALSE; VOLATILE(c_gc_state) gc_heap::current_c_gc_state = c_gc_state_free; VOLATILE(BOOL) gc_heap::gc_background_running = FALSE; #endif //BACKGROUND_GC #ifndef MULTIPLE_HEAPS #ifdef SPINLOCK_HISTORY int gc_heap::spinlock_info_index = 0; spinlock_info gc_heap::last_spinlock_info[max_saved_spinlock_info + 8]; #endif //SPINLOCK_HISTORY uint32_t gc_heap::fgn_maxgen_percent = 0; size_t gc_heap::fgn_last_alloc = 0; int gc_heap::generation_skip_ratio = 100; #ifdef FEATURE_CARD_MARKING_STEALING VOLATILE(size_t) gc_heap::n_eph_soh = 0; VOLATILE(size_t) gc_heap::n_gen_soh = 0; VOLATILE(size_t) gc_heap::n_eph_loh = 0; VOLATILE(size_t) gc_heap::n_gen_loh = 0; #endif //FEATURE_CARD_MARKING_STEALING uint64_t gc_heap::loh_alloc_since_cg = 0; BOOL gc_heap::elevation_requested = FALSE; BOOL gc_heap::last_gc_before_oom = FALSE; BOOL gc_heap::sufficient_gen0_space_p = FALSE; #ifdef BACKGROUND_GC uint8_t* gc_heap::background_saved_lowest_address = 0; uint8_t* gc_heap::background_saved_highest_address = 0; uint8_t* gc_heap::next_sweep_obj = 0; uint8_t* gc_heap::current_sweep_pos = 0; #ifdef DOUBLY_LINKED_FL heap_segment* gc_heap::current_sweep_seg = 0; #endif //DOUBLY_LINKED_FL exclusive_sync* gc_heap::bgc_alloc_lock; #endif //BACKGROUND_GC oom_history gc_heap::oom_info; int gc_heap::oomhist_index_per_heap = 0; oom_history gc_heap::oomhist_per_heap[max_oom_history_count]; fgm_history gc_heap::fgm_result; size_t gc_heap::allocated_since_last_gc[gc_oh_num::total_oh_count - 1]; BOOL gc_heap::ro_segments_in_range; #ifndef USE_REGIONS uint8_t* gc_heap::ephemeral_low; uint8_t* gc_heap::ephemeral_high; #endif //!USE_REGIONS uint8_t* gc_heap::lowest_address; uint8_t* gc_heap::highest_address; BOOL gc_heap::ephemeral_promotion; uint8_t* gc_heap::saved_ephemeral_plan_start[ephemeral_generation_count]; size_t gc_heap::saved_ephemeral_plan_start_size[ephemeral_generation_count]; short* gc_heap::brick_table; uint32_t* gc_heap::card_table; #ifdef CARD_BUNDLE uint32_t* gc_heap::card_bundle_table; #endif //CARD_BUNDLE uint8_t* gc_heap::gc_low = 0; uint8_t* gc_heap::gc_high = 0; #ifndef USE_REGIONS uint8_t* gc_heap::demotion_low; uint8_t* gc_heap::demotion_high; #endif //!USE_REGIONS BOOL gc_heap::demote_gen1_p = TRUE; uint8_t* gc_heap::last_gen1_pin_end; gen_to_condemn_tuning gc_heap::gen_to_condemn_reasons; size_t gc_heap::etw_allocation_running_amount[gc_oh_num::total_oh_count - 1]; uint64_t gc_heap::total_alloc_bytes_soh = 0; uint64_t gc_heap::total_alloc_bytes_uoh = 0; int gc_heap::gc_policy = 0; size_t gc_heap::allocation_running_time; size_t gc_heap::allocation_running_amount; heap_segment* gc_heap::ephemeral_heap_segment = 0; #ifdef USE_REGIONS #ifdef STRESS_REGIONS OBJECTHANDLE* gc_heap::pinning_handles_for_alloc = 0; int gc_heap::ph_index_per_heap = 0; int gc_heap::pinning_seg_interval = 2; size_t gc_heap::num_gen0_regions = 0; int gc_heap::sip_seg_interval = 0; int gc_heap::sip_seg_maxgen_interval = 0; size_t gc_heap::num_condemned_regions = 0; #endif //STRESS_REGIONS region_free_list gc_heap::free_regions[count_free_region_kinds]; int gc_heap::num_regions_freed_in_sweep = 0; int gc_heap::regions_per_gen[max_generation + 1]; int gc_heap::sip_maxgen_regions_per_gen[max_generation + 1]; heap_segment* gc_heap::reserved_free_regions_sip[max_generation]; int gc_heap::num_sip_regions = 0; size_t gc_heap::committed_in_free = 0; size_t gc_heap::end_gen0_region_space = 0; size_t gc_heap::gen0_pinned_free_space = 0; bool gc_heap::gen0_large_chunk_found = false; size_t* gc_heap::survived_per_region = nullptr; size_t* gc_heap::old_card_survived_per_region = nullptr; #endif //USE_REGIONS BOOL gc_heap::blocking_collection = FALSE; heap_segment* gc_heap::freeable_uoh_segment = 0; uint64_t gc_heap::time_bgc_last = 0; size_t gc_heap::mark_stack_tos = 0; size_t gc_heap::mark_stack_bos = 0; size_t gc_heap::mark_stack_array_length = 0; mark* gc_heap::mark_stack_array = 0; #if defined (_DEBUG) && defined (VERIFY_HEAP) BOOL gc_heap::verify_pinned_queue_p = FALSE; #endif //_DEBUG && VERIFY_HEAP uint8_t* gc_heap::oldest_pinned_plug = 0; size_t gc_heap::num_pinned_objects = 0; #ifdef FEATURE_LOH_COMPACTION size_t gc_heap::loh_pinned_queue_tos = 0; size_t gc_heap::loh_pinned_queue_bos = 0; size_t gc_heap::loh_pinned_queue_length = 0; mark* gc_heap::loh_pinned_queue = 0; BOOL gc_heap::loh_compacted_p = FALSE; #endif //FEATURE_LOH_COMPACTION #ifdef BACKGROUND_GC EEThreadId gc_heap::bgc_thread_id; uint8_t* gc_heap::background_written_addresses [array_size+2]; heap_segment* gc_heap::freeable_soh_segment = 0; size_t gc_heap::bgc_overflow_count = 0; size_t gc_heap::bgc_begin_loh_size = 0; size_t gc_heap::end_loh_size = 0; size_t gc_heap::bgc_begin_poh_size = 0; size_t gc_heap::end_poh_size = 0; #ifdef BGC_SERVO_TUNING uint64_t gc_heap::loh_a_no_bgc = 0; uint64_t gc_heap::loh_a_bgc_marking = 0; uint64_t gc_heap::loh_a_bgc_planning = 0; size_t gc_heap::bgc_maxgen_end_fl_size = 0; #endif //BGC_SERVO_TUNING uint32_t gc_heap::bgc_alloc_spin_uoh = 0; size_t gc_heap::bgc_loh_size_increased = 0; size_t gc_heap::bgc_poh_size_increased = 0; size_t gc_heap::background_soh_alloc_count = 0; size_t gc_heap::background_uoh_alloc_count = 0; uint8_t** gc_heap::background_mark_stack_tos = 0; uint8_t** gc_heap::background_mark_stack_array = 0; size_t gc_heap::background_mark_stack_array_length = 0; uint8_t* gc_heap::background_min_overflow_address =0; uint8_t* gc_heap::background_max_overflow_address =0; BOOL gc_heap::processed_eph_overflow_p = FALSE; #ifndef USE_REGIONS uint8_t* gc_heap::background_min_soh_overflow_address =0; uint8_t* gc_heap::background_max_soh_overflow_address =0; heap_segment* gc_heap::saved_overflow_ephemeral_seg = 0; heap_segment* gc_heap::saved_sweep_ephemeral_seg = 0; uint8_t* gc_heap::saved_sweep_ephemeral_start = 0; #endif //!USE_REGIONS Thread* gc_heap::bgc_thread = 0; uint8_t** gc_heap::c_mark_list = 0; size_t gc_heap::c_mark_list_length = 0; size_t gc_heap::c_mark_list_index = 0; gc_history_per_heap gc_heap::bgc_data_per_heap; BOOL gc_heap::bgc_thread_running; CLRCriticalSection gc_heap::bgc_threads_timeout_cs; #endif //BACKGROUND_GC uint8_t** gc_heap::mark_list; uint8_t** gc_heap::mark_list_index; uint8_t** gc_heap::mark_list_end; #ifdef SNOOP_STATS snoop_stats_data gc_heap::snoop_stat; #endif //SNOOP_STATS uint8_t* gc_heap::min_overflow_address = MAX_PTR; uint8_t* gc_heap::max_overflow_address = 0; uint8_t* gc_heap::shigh = 0; uint8_t* gc_heap::slow = MAX_PTR; size_t gc_heap::ordered_free_space_indices[MAX_NUM_BUCKETS]; size_t gc_heap::saved_ordered_free_space_indices[MAX_NUM_BUCKETS]; size_t gc_heap::ordered_plug_indices[MAX_NUM_BUCKETS]; size_t gc_heap::saved_ordered_plug_indices[MAX_NUM_BUCKETS]; BOOL gc_heap::ordered_plug_indices_init = FALSE; BOOL gc_heap::use_bestfit = FALSE; uint8_t* gc_heap::bestfit_first_pin = 0; BOOL gc_heap::commit_end_of_seg = FALSE; size_t gc_heap::max_free_space_items = 0; size_t gc_heap::free_space_buckets = 0; size_t gc_heap::free_space_items = 0; int gc_heap::trimmed_free_space_index = 0; size_t gc_heap::total_ephemeral_plugs = 0; seg_free_spaces* gc_heap::bestfit_seg = 0; size_t gc_heap::total_ephemeral_size = 0; #ifdef HEAP_ANALYZE size_t gc_heap::internal_root_array_length = initial_internal_roots; uint8_t** gc_heap::internal_root_array = 0; size_t gc_heap::internal_root_array_index = 0; BOOL gc_heap::heap_analyze_success = TRUE; uint8_t* gc_heap::current_obj = 0; size_t gc_heap::current_obj_size = 0; #endif //HEAP_ANALYZE #ifdef GC_CONFIG_DRIVEN size_t gc_heap::interesting_data_per_gc[max_idp_count]; //size_t gc_heap::interesting_data_per_heap[max_idp_count]; //size_t gc_heap::interesting_mechanisms_per_heap[max_im_count]; #endif //GC_CONFIG_DRIVEN #endif //MULTIPLE_HEAPS no_gc_region_info gc_heap::current_no_gc_region_info; BOOL gc_heap::proceed_with_gc_p = FALSE; GCSpinLock gc_heap::gc_lock; #ifdef BGC_SERVO_TUNING uint64_t gc_heap::total_loh_a_last_bgc = 0; #endif //BGC_SERVO_TUNING size_t gc_heap::eph_gen_starts_size = 0; heap_segment* gc_heap::segment_standby_list; #if defined(USE_REGIONS) region_free_list gc_heap::global_regions_to_decommit[count_free_region_kinds]; region_free_list gc_heap::global_free_huge_regions; #endif //USE_REGIONS bool gc_heap::use_large_pages_p = 0; #ifdef HEAP_BALANCE_INSTRUMENTATION size_t gc_heap::last_gc_end_time_us = 0; #endif //HEAP_BALANCE_INSTRUMENTATION #ifndef USE_REGIONS size_t gc_heap::min_segment_size = 0; size_t gc_heap::min_uoh_segment_size = 0; #endif //!USE_REGIONS size_t gc_heap::min_segment_size_shr = 0; size_t gc_heap::soh_segment_size = 0; size_t gc_heap::segment_info_size = 0; #ifdef GC_CONFIG_DRIVEN size_t gc_heap::compact_or_sweep_gcs[2]; #endif //GC_CONFIG_DRIVEN #ifdef FEATURE_LOH_COMPACTION BOOL gc_heap::loh_compaction_always_p = FALSE; gc_loh_compaction_mode gc_heap::loh_compaction_mode = loh_compaction_default; int gc_heap::loh_pinned_queue_decay = LOH_PIN_DECAY; #endif //FEATURE_LOH_COMPACTION GCEvent gc_heap::full_gc_approach_event; GCEvent gc_heap::full_gc_end_event; uint32_t gc_heap::fgn_loh_percent = 0; #ifdef BACKGROUND_GC BOOL gc_heap::fgn_last_gc_was_concurrent = FALSE; #endif //BACKGROUND_GC VOLATILE(bool) gc_heap::full_gc_approach_event_set; bool gc_heap::special_sweep_p = false; size_t gc_heap::full_gc_counts[gc_type_max]; bool gc_heap::maxgen_size_inc_p = false; BOOL gc_heap::should_expand_in_full_gc = FALSE; // Provisional mode related stuff. bool gc_heap::provisional_mode_triggered = false; bool gc_heap::pm_trigger_full_gc = false; size_t gc_heap::provisional_triggered_gc_count = 0; size_t gc_heap::provisional_off_gc_count = 0; size_t gc_heap::num_provisional_triggered = 0; bool gc_heap::pm_stress_on = false; #ifdef HEAP_ANALYZE BOOL gc_heap::heap_analyze_enabled = FALSE; #endif //HEAP_ANALYZE #ifndef MULTIPLE_HEAPS alloc_list gc_heap::loh_alloc_list [NUM_LOH_ALIST-1]; alloc_list gc_heap::gen2_alloc_list[NUM_GEN2_ALIST-1]; alloc_list gc_heap::poh_alloc_list [NUM_POH_ALIST-1]; #ifdef DOUBLY_LINKED_FL // size we removed with no undo; only for recording purpose size_t gc_heap::gen2_removed_no_undo = 0; size_t gc_heap::saved_pinned_plug_index = 0; #endif //DOUBLY_LINKED_FL #ifdef FEATURE_EVENT_TRACE etw_bucket_info gc_heap::bucket_info[NUM_GEN2_ALIST]; #endif //FEATURE_EVENT_TRACE dynamic_data gc_heap::dynamic_data_table [total_generation_count]; gc_history_per_heap gc_heap::gc_data_per_heap; size_t gc_heap::total_promoted_bytes = 0; size_t gc_heap::finalization_promoted_bytes = 0; size_t gc_heap::maxgen_pinned_compact_before_advance = 0; uint8_t* gc_heap::alloc_allocated = 0; size_t gc_heap::allocation_quantum = CLR_SIZE; GCSpinLock gc_heap::more_space_lock_soh; GCSpinLock gc_heap::more_space_lock_uoh; #ifdef BACKGROUND_GC VOLATILE(int32_t) gc_heap::uoh_alloc_thread_count = 0; #endif //BACKGROUND_GC #ifdef SYNCHRONIZATION_STATS unsigned int gc_heap::good_suspension = 0; unsigned int gc_heap::bad_suspension = 0; uint64_t gc_heap::total_msl_acquire = 0; unsigned int gc_heap::num_msl_acquired = 0; unsigned int gc_heap::num_high_msl_acquire = 0; unsigned int gc_heap::num_low_msl_acquire = 0; #endif //SYNCHRONIZATION_STATS size_t gc_heap::alloc_contexts_used = 0; size_t gc_heap::soh_allocation_no_gc = 0; size_t gc_heap::loh_allocation_no_gc = 0; bool gc_heap::no_gc_oom_p = false; heap_segment* gc_heap::saved_loh_segment_no_gc = 0; #endif //MULTIPLE_HEAPS #ifndef MULTIPLE_HEAPS BOOL gc_heap::gen0_bricks_cleared = FALSE; int gc_heap::gen0_must_clear_bricks = 0; #ifdef FEATURE_PREMORTEM_FINALIZATION CFinalize* gc_heap::finalize_queue = 0; #endif // FEATURE_PREMORTEM_FINALIZATION #ifdef FEATURE_CARD_MARKING_STEALING VOLATILE(uint32_t) gc_heap::card_mark_chunk_index_soh; VOLATILE(bool) gc_heap::card_mark_done_soh; VOLATILE(uint32_t) gc_heap::card_mark_chunk_index_loh; VOLATILE(uint32_t) gc_heap::card_mark_chunk_index_poh; VOLATILE(bool) gc_heap::card_mark_done_uoh; #endif // FEATURE_CARD_MARKING_STEALING generation gc_heap::generation_table [total_generation_count]; size_t gc_heap::interesting_data_per_heap[max_idp_count]; size_t gc_heap::compact_reasons_per_heap[max_compact_reasons_count]; size_t gc_heap::expand_mechanisms_per_heap[max_expand_mechanisms_count]; size_t gc_heap::interesting_mechanism_bits_per_heap[max_gc_mechanism_bits_count]; #endif // MULTIPLE_HEAPS /* end of per heap static initialization */ // budget smoothing size_t gc_heap::smoothed_desired_per_heap[total_generation_count]; /* end of static initialization */ #ifndef DACCESS_COMPILE // This is for methods that need to iterate through all SOH heap segments/regions. inline int get_start_generation_index() { #ifdef USE_REGIONS return 0; #else return max_generation; #endif //USE_REGIONS } inline int get_stop_generation_index (int condemned_gen_number) { #ifdef USE_REGIONS return 0; #else return condemned_gen_number; #endif //USE_REGIONS } void gen_to_condemn_tuning::print (int heap_num) { #ifdef DT_LOG dprintf (DT_LOG_0, ("condemned reasons (%d %d)", condemn_reasons_gen, condemn_reasons_condition)); dprintf (DT_LOG_0, ("%s", record_condemn_reasons_gen_header)); gc_condemn_reason_gen r_gen; for (int i = 0; i < gcrg_max; i++) { r_gen = (gc_condemn_reason_gen)(i); str_reasons_gen[i * 2] = get_gen_char (get_gen (r_gen)); } dprintf (DT_LOG_0, ("[%2d]%s", heap_num, str_reasons_gen)); dprintf (DT_LOG_0, ("%s", record_condemn_reasons_condition_header)); gc_condemn_reason_condition r_condition; for (int i = 0; i < gcrc_max; i++) { r_condition = (gc_condemn_reason_condition)(i); str_reasons_condition[i * 2] = get_condition_char (get_condition (r_condition)); } dprintf (DT_LOG_0, ("[%2d]%s", heap_num, str_reasons_condition)); #else UNREFERENCED_PARAMETER(heap_num); #endif //DT_LOG } void gc_generation_data::print (int heap_num, int gen_num) { #if defined(SIMPLE_DPRINTF) && defined(DT_LOG) dprintf (DT_LOG_0, ("[%2d]gen%d beg %Id fl %Id fo %Id end %Id fl %Id fo %Id in %Id p %Id np %Id alloc %Id", heap_num, gen_num, size_before, free_list_space_before, free_obj_space_before, size_after, free_list_space_after, free_obj_space_after, in, pinned_surv, npinned_surv, new_allocation)); #else UNREFERENCED_PARAMETER(heap_num); UNREFERENCED_PARAMETER(gen_num); #endif //SIMPLE_DPRINTF && DT_LOG } void gc_history_per_heap::set_mechanism (gc_mechanism_per_heap mechanism_per_heap, uint32_t value) { uint32_t* mechanism = &mechanisms[mechanism_per_heap]; *mechanism = 0; *mechanism |= mechanism_mask; *mechanism |= (1 << value); #ifdef DT_LOG gc_mechanism_descr* descr = &gc_mechanisms_descr[mechanism_per_heap]; dprintf (DT_LOG_0, ("setting %s: %s", descr->name, (descr->descr)[value])); #endif //DT_LOG } void gc_history_per_heap::print() { #if defined(SIMPLE_DPRINTF) && defined(DT_LOG) for (int i = 0; i < (sizeof (gen_data)/sizeof (gc_generation_data)); i++) { gen_data[i].print (heap_index, i); } dprintf (DT_LOG_0, ("fla %Id flr %Id esa %Id ca %Id pa %Id paa %Id, rfle %d, ec %Id", maxgen_size_info.free_list_allocated, maxgen_size_info.free_list_rejected, maxgen_size_info.end_seg_allocated, maxgen_size_info.condemned_allocated, maxgen_size_info.pinned_allocated, maxgen_size_info.pinned_allocated_advance, maxgen_size_info.running_free_list_efficiency, extra_gen0_committed)); int mechanism = 0; gc_mechanism_descr* descr = 0; for (int i = 0; i < max_mechanism_per_heap; i++) { mechanism = get_mechanism ((gc_mechanism_per_heap)i); if (mechanism >= 0) { descr = &gc_mechanisms_descr[(gc_mechanism_per_heap)i]; dprintf (DT_LOG_0, ("[%2d]%s%s", heap_index, descr->name, (descr->descr)[mechanism])); } } #endif //SIMPLE_DPRINTF && DT_LOG } void gc_history_global::print() { #ifdef DT_LOG char str_settings[64]; memset (str_settings, '|', sizeof (char) * 64); str_settings[max_global_mechanisms_count*2] = 0; for (int i = 0; i < max_global_mechanisms_count; i++) { str_settings[i * 2] = (get_mechanism_p ((gc_global_mechanism_p)i) ? 'Y' : 'N'); } dprintf (DT_LOG_0, ("[hp]|c|p|o|d|b|e|")); dprintf (DT_LOG_0, ("%4d|%s", num_heaps, str_settings)); dprintf (DT_LOG_0, ("Condemned gen%d(reason: %s; mode: %s), youngest budget %Id(%d), memload %d", condemned_generation, str_gc_reasons[reason], str_gc_pause_modes[pause_mode], final_youngest_desired, gen0_reduction_count, mem_pressure)); #endif //DT_LOG } uint32_t limit_time_to_uint32 (uint64_t time) { time = min (time, UINT32_MAX); return (uint32_t)time; } void gc_heap::fire_per_heap_hist_event (gc_history_per_heap* current_gc_data_per_heap, int heap_num) { maxgen_size_increase* maxgen_size_info = &(current_gc_data_per_heap->maxgen_size_info); FIRE_EVENT(GCPerHeapHistory_V3, (void *)(maxgen_size_info->free_list_allocated), (void *)(maxgen_size_info->free_list_rejected), (void *)(maxgen_size_info->end_seg_allocated), (void *)(maxgen_size_info->condemned_allocated), (void *)(maxgen_size_info->pinned_allocated), (void *)(maxgen_size_info->pinned_allocated_advance), maxgen_size_info->running_free_list_efficiency, current_gc_data_per_heap->gen_to_condemn_reasons.get_reasons0(), current_gc_data_per_heap->gen_to_condemn_reasons.get_reasons1(), current_gc_data_per_heap->mechanisms[gc_heap_compact], current_gc_data_per_heap->mechanisms[gc_heap_expand], current_gc_data_per_heap->heap_index, (void *)(current_gc_data_per_heap->extra_gen0_committed), total_generation_count, (uint32_t)(sizeof (gc_generation_data)), (void *)&(current_gc_data_per_heap->gen_data[0])); current_gc_data_per_heap->print(); current_gc_data_per_heap->gen_to_condemn_reasons.print (heap_num); } void gc_heap::fire_pevents() { gc_history_global* current_gc_data_global = get_gc_data_global(); settings.record (current_gc_data_global); current_gc_data_global->print(); #ifdef FEATURE_EVENT_TRACE if (!informational_event_enabled_p) return; uint32_t count_time_info = (settings.concurrent ? max_bgc_time_type : (settings.compaction ? max_compact_time_type : max_sweep_time_type)); uint64_t* time_info = (settings.concurrent ? bgc_time_info : gc_time_info); // We don't want to have to fire the time info as 64-bit integers as there's no need to // so compress them down to 32-bit ones. uint32_t* time_info_32 = (uint32_t*)time_info; for (uint32_t i = 0; i < count_time_info; i++) { time_info_32[i] = limit_time_to_uint32 (time_info[i]); } FIRE_EVENT(GCGlobalHeapHistory_V4, current_gc_data_global->final_youngest_desired, current_gc_data_global->num_heaps, current_gc_data_global->condemned_generation, current_gc_data_global->gen0_reduction_count, current_gc_data_global->reason, current_gc_data_global->global_mechanisms_p, current_gc_data_global->pause_mode, current_gc_data_global->mem_pressure, current_gc_data_global->gen_to_condemn_reasons.get_reasons0(), current_gc_data_global->gen_to_condemn_reasons.get_reasons1(), count_time_info, (uint32_t)(sizeof (uint32_t)), (void*)time_info_32); #ifdef MULTIPLE_HEAPS for (int i = 0; i < gc_heap::n_heaps; i++) { gc_heap* hp = gc_heap::g_heaps[i]; gc_history_per_heap* current_gc_data_per_heap = hp->get_gc_data_per_heap(); fire_per_heap_hist_event (current_gc_data_per_heap, hp->heap_number); } #else gc_history_per_heap* current_gc_data_per_heap = get_gc_data_per_heap(); fire_per_heap_hist_event (current_gc_data_per_heap, heap_number); #endif //MULTIPLE_HEAPS #ifdef FEATURE_LOH_COMPACTION if (!settings.concurrent && settings.loh_compaction) { // Not every heap will compact LOH, the ones that didn't will just have 0s // in its info. FIRE_EVENT(GCLOHCompact, get_num_heaps(), (uint32_t)(sizeof (etw_loh_compact_info)), (void *)loh_compact_info); } #endif //FEATURE_LOH_COMPACTION #endif //FEATURE_EVENT_TRACE } inline BOOL gc_heap::dt_low_ephemeral_space_p (gc_tuning_point tp) { BOOL ret = FALSE; switch (tp) { case tuning_deciding_condemned_gen: #ifndef USE_REGIONS case tuning_deciding_compaction: case tuning_deciding_expansion: #endif //USE_REGIONS case tuning_deciding_full_gc: { ret = (!ephemeral_gen_fit_p (tp)); break; } #ifndef USE_REGIONS case tuning_deciding_promote_ephemeral: { size_t new_gen0size = approximate_new_allocation(); ptrdiff_t plan_ephemeral_size = total_ephemeral_size; dprintf (GTC_LOG, ("h%d: plan eph size is %Id, new gen0 is %Id", heap_number, plan_ephemeral_size, new_gen0size)); // If we were in no_gc_region we could have allocated a larger than normal segment, // and the next seg we allocate will be a normal sized seg so if we can't fit the new // ephemeral generations there, do an ephemeral promotion. ret = ((soh_segment_size - segment_info_size) < (plan_ephemeral_size + new_gen0size)); break; } #endif //USE_REGIONS default: { assert (!"invalid tuning reason"); break; } } return ret; } BOOL gc_heap::dt_high_frag_p (gc_tuning_point tp, int gen_number, BOOL elevate_p) { BOOL ret = FALSE; switch (tp) { case tuning_deciding_condemned_gen: { dynamic_data* dd = dynamic_data_of (gen_number); float fragmentation_burden = 0; if (elevate_p) { ret = (dd_fragmentation (dynamic_data_of (max_generation)) >= dd_max_size(dd)); dprintf (GTC_LOG, ("h%d: frag is %Id, max size is %Id", heap_number, dd_fragmentation (dd), dd_max_size(dd))); } else { #ifndef MULTIPLE_HEAPS if (gen_number == max_generation) { float frag_ratio = (float)(dd_fragmentation (dynamic_data_of (max_generation))) / (float)generation_size (max_generation); if (frag_ratio > 0.65) { dprintf (GTC_LOG, ("g2 FR: %d%%", (int)(frag_ratio*100))); return TRUE; } } #endif //!MULTIPLE_HEAPS size_t fr = generation_unusable_fragmentation (generation_of (gen_number)); ret = (fr > dd_fragmentation_limit(dd)); if (ret) { fragmentation_burden = (float)fr / generation_size (gen_number); ret = (fragmentation_burden > dd_v_fragmentation_burden_limit (dd)); } dprintf (GTC_LOG, ("h%d: gen%d, frag is %Id, alloc effi: %d%%, unusable frag is %Id, ratio is %d", heap_number, gen_number, dd_fragmentation (dd), (int)(100*generation_allocator_efficiency (generation_of (gen_number))), fr, (int)(fragmentation_burden*100))); } break; } default: break; } return ret; } inline BOOL gc_heap::dt_estimate_reclaim_space_p (gc_tuning_point tp, int gen_number) { BOOL ret = FALSE; switch (tp) { case tuning_deciding_condemned_gen: { if (gen_number == max_generation) { size_t est_maxgen_free = estimated_reclaim (gen_number); uint32_t num_heaps = 1; #ifdef MULTIPLE_HEAPS num_heaps = gc_heap::n_heaps; #endif //MULTIPLE_HEAPS size_t min_frag_th = min_reclaim_fragmentation_threshold (num_heaps); dprintf (GTC_LOG, ("h%d, min frag is %Id", heap_number, min_frag_th)); ret = (est_maxgen_free >= min_frag_th); } else { assert (0); } break; } default: break; } return ret; } // DTREVIEW: Right now we only estimate gen2 fragmentation. // on 64-bit though we should consider gen1 or even gen0 fragmentation as // well inline BOOL gc_heap::dt_estimate_high_frag_p (gc_tuning_point tp, int gen_number, uint64_t available_mem) { BOOL ret = FALSE; switch (tp) { case tuning_deciding_condemned_gen: { if (gen_number == max_generation) { dynamic_data* dd = dynamic_data_of (gen_number); float est_frag_ratio = 0; if (dd_current_size (dd) == 0) { est_frag_ratio = 1; } else if ((dd_fragmentation (dd) == 0) || (dd_fragmentation (dd) + dd_current_size (dd) == 0)) { est_frag_ratio = 0; } else { est_frag_ratio = (float)dd_fragmentation (dd) / (float)(dd_fragmentation (dd) + dd_current_size (dd)); } size_t est_frag = (dd_fragmentation (dd) + (size_t)((dd_desired_allocation (dd) - dd_new_allocation (dd)) * est_frag_ratio)); dprintf (GTC_LOG, ("h%d: gen%d: current_size is %Id, frag is %Id, est_frag_ratio is %d%%, estimated frag is %Id", heap_number, gen_number, dd_current_size (dd), dd_fragmentation (dd), (int)(est_frag_ratio*100), est_frag)); uint32_t num_heaps = 1; #ifdef MULTIPLE_HEAPS num_heaps = gc_heap::n_heaps; #endif //MULTIPLE_HEAPS uint64_t min_frag_th = min_high_fragmentation_threshold(available_mem, num_heaps); //dprintf (GTC_LOG, ("h%d, min frag is %I64d", heap_number, min_frag_th)); ret = (est_frag >= min_frag_th); } else { assert (0); } break; } default: break; } return ret; } inline BOOL gc_heap::dt_low_card_table_efficiency_p (gc_tuning_point tp) { BOOL ret = FALSE; switch (tp) { case tuning_deciding_condemned_gen: { /* promote into max-generation if the card table has too many * generation faults besides the n -> 0 */ ret = (generation_skip_ratio < generation_skip_ratio_threshold); break; } default: break; } return ret; } inline BOOL gc_heap::dt_high_memory_load_p() { return ((settings.entry_memory_load >= high_memory_load_th) || g_low_memory_status); } inline BOOL in_range_for_segment(uint8_t* add, heap_segment* seg) { return ((add >= heap_segment_mem (seg)) && (add < heap_segment_reserved (seg))); } #ifdef FEATURE_BASICFREEZE // The array we allocate is organized as follows: // 0th element is the address of the last array we allocated. // starting from the 1st element are the segment addresses, that's // what buckets() returns. struct bk { uint8_t* add; size_t val; }; class sorted_table { private: ptrdiff_t size; ptrdiff_t count; bk* slots; bk* buckets() { return (slots + 1); } uint8_t*& last_slot (bk* arr) { return arr[0].add; } bk* old_slots; public: static sorted_table* make_sorted_table (); BOOL insert (uint8_t* add, size_t val);; size_t lookup (uint8_t*& add); void remove (uint8_t* add); void clear (); void delete_sorted_table(); void delete_old_slots(); void enqueue_old_slot(bk* sl); BOOL ensure_space_for_insert(); }; sorted_table* sorted_table::make_sorted_table () { size_t size = 400; // allocate one more bk to store the older slot address. sorted_table* res = (sorted_table*)new (nothrow) char [sizeof (sorted_table) + (size + 1) * sizeof (bk)]; if (!res) return 0; res->size = size; res->slots = (bk*)(res + 1); res->old_slots = 0; res->clear(); return res; } void sorted_table::delete_sorted_table() { if (slots != (bk*)(this+1)) { delete slots; } delete_old_slots(); delete this; } void sorted_table::delete_old_slots() { uint8_t* sl = (uint8_t*)old_slots; while (sl) { uint8_t* dsl = sl; sl = last_slot ((bk*)sl); delete dsl; } old_slots = 0; } void sorted_table::enqueue_old_slot(bk* sl) { last_slot (sl) = (uint8_t*)old_slots; old_slots = sl; } inline size_t sorted_table::lookup (uint8_t*& add) { ptrdiff_t high = (count-1); ptrdiff_t low = 0; ptrdiff_t ti; ptrdiff_t mid; bk* buck = buckets(); while (low <= high) { mid = ((low + high)/2); ti = mid; if (buck[ti].add > add) { if ((ti > 0) && (buck[ti-1].add <= add)) { add = buck[ti-1].add; return buck[ti - 1].val; } high = mid - 1; } else { if (buck[ti+1].add > add) { add = buck[ti].add; return buck[ti].val; } low = mid + 1; } } add = 0; return 0; } BOOL sorted_table::ensure_space_for_insert() { if (count == size) { size = (size * 3)/2; assert((size * sizeof (bk)) > 0); bk* res = (bk*)new (nothrow) char [(size + 1) * sizeof (bk)]; assert (res); if (!res) return FALSE; last_slot (res) = 0; memcpy (((bk*)res + 1), buckets(), count * sizeof (bk)); bk* last_old_slots = slots; slots = res; if (last_old_slots != (bk*)(this + 1)) enqueue_old_slot (last_old_slots); } return TRUE; } BOOL sorted_table::insert (uint8_t* add, size_t val) { //grow if no more room assert (count < size); //insert sorted ptrdiff_t high = (count-1); ptrdiff_t low = 0; ptrdiff_t ti; ptrdiff_t mid; bk* buck = buckets(); while (low <= high) { mid = ((low + high)/2); ti = mid; if (buck[ti].add > add) { if ((ti == 0) || (buck[ti-1].add <= add)) { // found insertion point for (ptrdiff_t k = count; k > ti;k--) { buck [k] = buck [k-1]; } buck[ti].add = add; buck[ti].val = val; count++; return TRUE; } high = mid - 1; } else { if (buck[ti+1].add > add) { //found the insertion point for (ptrdiff_t k = count; k > ti+1;k--) { buck [k] = buck [k-1]; } buck[ti+1].add = add; buck[ti+1].val = val; count++; return TRUE; } low = mid + 1; } } assert (0); return TRUE; } void sorted_table::remove (uint8_t* add) { ptrdiff_t high = (count-1); ptrdiff_t low = 0; ptrdiff_t ti; ptrdiff_t mid; bk* buck = buckets(); while (low <= high) { mid = ((low + high)/2); ti = mid; if (buck[ti].add > add) { if (buck[ti-1].add <= add) { for (ptrdiff_t k = ti; k < count; k++) buck[k-1] = buck[k]; count--; return; } high = mid - 1; } else { if (buck[ti+1].add > add) { for (ptrdiff_t k = ti+1; k < count; k++) buck[k-1] = buck[k]; count--; return; } low = mid + 1; } } assert (0); } void sorted_table::clear() { count = 1; buckets()[0].add = MAX_PTR; } #endif //FEATURE_BASICFREEZE #ifdef USE_REGIONS inline size_t get_basic_region_index_for_address (uint8_t* address) { size_t basic_region_index = (size_t)address >> gc_heap::min_segment_size_shr; return (basic_region_index - ((size_t)g_gc_lowest_address >> gc_heap::min_segment_size_shr)); } // Go from a random address to its region info. The random address could be // in one of the basic regions of a larger region so we need to check for that. inline heap_segment* get_region_info_for_address (uint8_t* address) { size_t basic_region_index = (size_t)address >> gc_heap::min_segment_size_shr; heap_segment* basic_region_info_entry = (heap_segment*)&seg_mapping_table[basic_region_index]; ptrdiff_t first_field = (ptrdiff_t)heap_segment_allocated (basic_region_info_entry); if (first_field < 0) { basic_region_index += first_field; } return ((heap_segment*)(&seg_mapping_table[basic_region_index])); } // Go from the physical start of a region to its region info. inline heap_segment* get_region_info (uint8_t* region_start) { size_t region_index = (size_t)region_start >> gc_heap::min_segment_size_shr; heap_segment* region_info_entry = (heap_segment*)&seg_mapping_table[region_index]; dprintf (REGIONS_LOG, ("region info for region %Ix is at %Id, %Ix (alloc: %Ix)", region_start, region_index, (size_t)region_info_entry, heap_segment_allocated (region_info_entry))); return (heap_segment*)&seg_mapping_table[region_index]; } // Go from the actual region info to its region start. inline uint8_t* get_region_start (heap_segment* region_info) { uint8_t* obj_start = heap_segment_mem (region_info); return (obj_start - sizeof (aligned_plug_and_gap)); } inline size_t get_region_size (heap_segment* region_info) { return (size_t)(heap_segment_reserved (region_info) - get_region_start (region_info)); } inline size_t get_region_committed_size (heap_segment* region) { uint8_t* start = get_region_start (region); uint8_t* committed = heap_segment_committed (region); return committed - start; } inline bool is_free_region (heap_segment* region) { return (heap_segment_allocated (region) == nullptr); } bool region_allocator::init (uint8_t* start, uint8_t* end, size_t alignment, uint8_t** lowest, uint8_t** highest) { uint8_t* actual_start = start; region_alignment = alignment; large_region_alignment = LARGE_REGION_FACTOR * alignment; global_region_start = (uint8_t*)align_region_up ((size_t)actual_start); uint8_t* actual_end = end; global_region_end = (uint8_t*)align_region_down ((size_t)actual_end); global_region_left_used = global_region_start; global_region_right_used = global_region_end; // Note: I am allocating a map that covers the whole reserved range. // We can optimize it to only cover the current heap range. size_t total_num_units = (global_region_end - global_region_start) / region_alignment; total_free_units = (uint32_t)total_num_units; uint32_t* unit_map = new (nothrow) uint32_t[total_num_units]; if (unit_map) { memset (unit_map, 0, sizeof (uint32_t) * total_num_units); region_map_left_start = unit_map; region_map_left_end = region_map_left_start; region_map_right_start = unit_map + total_num_units; region_map_right_end = region_map_right_start; dprintf (REGIONS_LOG, ("start: %Ix, end: %Ix, total %Idmb(alignment: %Idmb), map units %d", (size_t)start, (size_t)end, (size_t)((end - start) / 1024 / 1024), (alignment / 1024 / 1024), total_num_units)); *lowest = global_region_start; *highest = global_region_end; } return (unit_map != 0); } inline uint8_t* region_allocator::region_address_of (uint32_t* map_index) { return (global_region_start + ((map_index - region_map_left_start) * region_alignment)); } inline uint32_t* region_allocator::region_map_index_of (uint8_t* address) { return (region_map_left_start + ((address - global_region_start) / region_alignment)); } void region_allocator::make_busy_block (uint32_t* index_start, uint32_t num_units) { #ifdef _DEBUG dprintf (REGIONS_LOG, ("MBB[B: %Id] %d->%d", (size_t)num_units, (int)(index_start - region_map_left_start), (int)(index_start - region_map_left_start + num_units))); #endif //_DEBUG ASSERT_HOLDING_SPIN_LOCK (®ion_allocator_lock); uint32_t* index_end = index_start + (num_units - 1); *index_start = *index_end = num_units; } void region_allocator::make_free_block (uint32_t* index_start, uint32_t num_units) { #ifdef _DEBUG dprintf (REGIONS_LOG, ("MFB[F: %Id] %d->%d", (size_t)num_units, (int)(index_start - region_map_left_start), (int)(index_start - region_map_left_start + num_units))); #endif //_DEBUG ASSERT_HOLDING_SPIN_LOCK (®ion_allocator_lock); uint32_t* index_end = index_start + (num_units - 1); *index_start = *index_end = region_alloc_free_bit | num_units; } void region_allocator::print_map (const char* msg) { ASSERT_HOLDING_SPIN_LOCK (®ion_allocator_lock); #ifdef _DEBUG const char* heap_type = "UH"; dprintf (REGIONS_LOG, ("[%s]-----printing----%s", heap_type, msg)); uint32_t* current_index = region_map_left_start; uint32_t* end_index = region_map_left_end; uint32_t count_free_units = 0; for (int i = 0; i < 2; i++) { while (current_index < end_index) { uint32_t current_val = *current_index; uint32_t current_num_units = get_num_units (current_val); bool free_p = is_unit_memory_free (current_val); dprintf (REGIONS_LOG, ("[%s][%s: %Id]%d->%d", heap_type, (free_p ? "F" : "B"), (size_t)current_num_units, (int)(current_index - region_map_left_start), (int)(current_index - region_map_left_start + current_num_units))); if (free_p) { count_free_units += current_num_units; } current_index += current_num_units; } current_index = region_map_right_start; end_index = region_map_right_end; } count_free_units += (uint32_t)(region_map_right_start - region_map_left_end); assert(count_free_units == total_free_units); uint32_t total_regions = (uint32_t)((global_region_end - global_region_start) / region_alignment); dprintf (REGIONS_LOG, ("[%s]-----end printing----[%d total, left used %d, right used %d]\n", heap_type, total_regions, (region_map_left_end - region_map_left_start), (region_map_right_end - region_map_right_start))); #endif //_DEBUG } uint8_t* region_allocator::allocate_end (uint32_t num_units, allocate_direction direction) { uint8_t* alloc = NULL; ASSERT_HOLDING_SPIN_LOCK (®ion_allocator_lock); if (global_region_left_used < global_region_right_used) { size_t end_remaining = global_region_right_used - global_region_left_used; if ((end_remaining / region_alignment) >= num_units) { if (direction == allocate_forward) { make_busy_block (region_map_left_end, num_units); region_map_left_end += num_units; alloc = global_region_left_used; global_region_left_used += num_units * region_alignment; } else { assert(direction == allocate_backward); region_map_right_start -= num_units; make_busy_block (region_map_right_start, num_units); global_region_right_used -= num_units * region_alignment; alloc = global_region_right_used; } } } return alloc; } void region_allocator::enter_spin_lock() { while (true) { if (Interlocked::CompareExchange(®ion_allocator_lock.lock, 0, -1) < 0) break; while (region_allocator_lock.lock >= 0) { YieldProcessor(); // indicate to the processor that we are spinning } } #ifdef _DEBUG region_allocator_lock.holding_thread = GCToEEInterface::GetThread(); #endif //_DEBUG } void region_allocator::leave_spin_lock() { #ifdef _DEBUG region_allocator_lock.holding_thread = (Thread*)-1; #endif //_DEBUG region_allocator_lock.lock = -1; } uint8_t* region_allocator::allocate (uint32_t num_units, allocate_direction direction) { enter_spin_lock(); uint32_t* current_index; uint32_t* end_index; if (direction == allocate_forward) { current_index = region_map_left_start; end_index = region_map_left_end; } else { assert(direction == allocate_backward); current_index = region_map_right_end; end_index = region_map_right_start; } dprintf (REGIONS_LOG, ("searching %d->%d", (int)(current_index - region_map_left_start), (int)(end_index - region_map_left_start))); print_map ("before alloc"); while (((direction == allocate_forward) && (current_index < end_index)) || ((direction == allocate_backward) && (current_index > end_index))) { uint32_t current_val = *(current_index - ((direction == -1) ? 1 : 0)); uint32_t current_num_units = get_num_units (current_val); bool free_p = is_unit_memory_free (current_val); dprintf (REGIONS_LOG, ("ALLOC[%s: %Id]%d->%d", (free_p ? "F" : "B"), (size_t)current_num_units, (int)(current_index - region_map_left_start), (int)(current_index + current_num_units - region_map_left_start))); if (free_p) { if (current_num_units >= num_units) { dprintf (REGIONS_LOG, ("found %Id contiguous free units(%d->%d), sufficient", (size_t)current_num_units, (int)(current_index - region_map_left_start), (int)(current_index - region_map_left_start + current_num_units))); uint32_t* busy_block; uint32_t* free_block; if (direction == 1) { busy_block = current_index; free_block = current_index + num_units; } else { busy_block = current_index - num_units; free_block = current_index - current_num_units; } make_busy_block (busy_block, num_units); if ((current_num_units - num_units) > 0) { make_free_block (free_block, (current_num_units - num_units)); } total_free_units -= num_units; print_map ("alloc: found in free"); leave_spin_lock(); return region_address_of (busy_block); } } if (direction == allocate_forward) { current_index += current_num_units; } else { current_index -= current_num_units; } } uint8_t* alloc = allocate_end (num_units, direction); if (alloc) { total_free_units -= num_units; print_map ("alloc: found at the end"); } else { dprintf (REGIONS_LOG, ("couldn't find memory at the end! only %Id bytes left", (global_region_right_used - global_region_left_used))); } leave_spin_lock(); return alloc; } // ETW TODO: need to fire create seg events for these methods. // FIRE_EVENT(GCCreateSegment_V1 bool region_allocator::allocate_region (size_t size, uint8_t** start, uint8_t** end, allocate_direction direction) { size_t alignment = region_alignment; size_t alloc_size = align_region_up (size); uint32_t num_units = (uint32_t)(alloc_size / alignment); bool ret = false; uint8_t* alloc = NULL; dprintf (REGIONS_LOG, ("----GET %d-----", num_units)); alloc = allocate (num_units, direction); *start = alloc; *end = alloc + alloc_size; ret = (alloc != NULL); return ret; } bool region_allocator::allocate_basic_region (uint8_t** start, uint8_t** end) { return allocate_region (region_alignment, start, end, allocate_forward); } // Large regions are 8x basic region sizes by default. If you need a larger region than that, // call allocate_region with the size. bool region_allocator::allocate_large_region (uint8_t** start, uint8_t** end, allocate_direction direction, size_t size) { if (size == 0) size = large_region_alignment; else { // round up size to a multiple of large_region_alignment // for the below computation to work, large_region_alignment must be a power of 2 assert (round_up_power2(large_region_alignment) == large_region_alignment); size = (size + (large_region_alignment - 1)) & ~(large_region_alignment - 1); } return allocate_region (size, start, end, direction); } void region_allocator::delete_region (uint8_t* region_start) { enter_spin_lock(); assert (is_region_aligned (region_start)); print_map ("before delete"); uint32_t* current_index = region_map_index_of (region_start); uint32_t current_val = *current_index; assert (!is_unit_memory_free (current_val)); dprintf (REGIONS_LOG, ("----DEL %d (%u units)-----", (current_index - region_map_left_start), current_val)); uint32_t* region_end_index = current_index + current_val; uint8_t* region_end = region_address_of (region_end_index); int free_block_size = current_val; uint32_t* free_index = current_index; if ((current_index != region_map_left_start) && (current_index != region_map_right_start)) { uint32_t previous_val = *(current_index - 1); if (is_unit_memory_free(previous_val)) { uint32_t previous_size = get_num_units (previous_val); free_index -= previous_size; free_block_size += previous_size; } } if ((region_end != global_region_left_used) && (region_end != global_region_end)) { uint32_t next_val = *region_end_index; if (is_unit_memory_free(next_val)) { uint32_t next_size = get_num_units (next_val); free_block_size += next_size; region_end += next_size; } } if (region_end == global_region_left_used) { region_map_left_end = free_index; dprintf (REGIONS_LOG, ("adjust global left used from %Ix to %Ix", global_region_left_used, region_address_of (free_index))); global_region_left_used = region_address_of (free_index); } else if (region_start == global_region_right_used) { region_map_right_start = free_index + free_block_size; dprintf (REGIONS_LOG, ("adjust global right used from %Ix to %Ix", global_region_right_used, region_address_of (free_index + free_block_size))); global_region_right_used = region_address_of (free_index + free_block_size); } else { make_free_block (free_index, free_block_size); } total_free_units += current_val; print_map ("after delete"); leave_spin_lock(); } void region_allocator::move_highest_free_regions (int64_t n, bool small_region_p, region_free_list to_free_list[count_free_region_kinds]) { assert (n > 0); uint32_t* current_index = region_map_left_end - 1; uint32_t* lowest_index = region_map_left_start; while (current_index >= lowest_index) { uint32_t current_val = *current_index; uint32_t current_num_units = get_num_units (current_val); bool free_p = is_unit_memory_free (current_val); if (!free_p && ((current_num_units == 1) == small_region_p)) { uint32_t* index = current_index - (current_num_units - 1); heap_segment* region = get_region_info (region_address_of (index)); if (is_free_region (region)) { if (n >= current_num_units) { n -= current_num_units; region_free_list::unlink_region (region); region_free_list::add_region (region, to_free_list); } else { break; } } } current_index -= current_num_units; } } #endif //USE_REGIONS inline uint8_t* align_on_segment (uint8_t* add) { return (uint8_t*)((size_t)(add + (((size_t)1 << gc_heap::min_segment_size_shr) - 1)) & ~(((size_t)1 << gc_heap::min_segment_size_shr) - 1)); } inline uint8_t* align_lower_segment (uint8_t* add) { return (uint8_t*)((size_t)(add) & ~(((size_t)1 << gc_heap::min_segment_size_shr) - 1)); } size_t size_seg_mapping_table_of (uint8_t* from, uint8_t* end) { from = align_lower_segment (from); end = align_on_segment (end); dprintf (1, ("from: %Ix, end: %Ix, size: %Ix", from, end, sizeof (seg_mapping)*(((size_t)(end - from) >> gc_heap::min_segment_size_shr)))); return sizeof (seg_mapping)*((size_t)(end - from) >> gc_heap::min_segment_size_shr); } // for seg_mapping_table we want it to start from a pointer sized address. inline size_t align_for_seg_mapping_table (size_t size) { return ((size + (sizeof (uint8_t*) - 1)) &~ (sizeof (uint8_t*) - 1)); } inline size_t seg_mapping_word_of (uint8_t* add) { return (size_t)add >> gc_heap::min_segment_size_shr; } #ifdef FEATURE_BASICFREEZE inline size_t ro_seg_begin_index (heap_segment* seg) { #ifdef USE_REGIONS size_t begin_index = (size_t)heap_segment_mem (seg) >> gc_heap::min_segment_size_shr; #else size_t begin_index = (size_t)seg >> gc_heap::min_segment_size_shr; #endif //USE_REGIONS begin_index = max (begin_index, (size_t)g_gc_lowest_address >> gc_heap::min_segment_size_shr); return begin_index; } inline size_t ro_seg_end_index (heap_segment* seg) { size_t end_index = (size_t)(heap_segment_reserved (seg) - 1) >> gc_heap::min_segment_size_shr; end_index = min (end_index, (size_t)g_gc_highest_address >> gc_heap::min_segment_size_shr); return end_index; } void seg_mapping_table_add_ro_segment (heap_segment* seg) { if ((heap_segment_reserved (seg) <= g_gc_lowest_address) || (heap_segment_mem (seg) >= g_gc_highest_address)) return; for (size_t entry_index = ro_seg_begin_index (seg); entry_index <= ro_seg_end_index (seg); entry_index++) { #ifdef USE_REGIONS heap_segment* region = (heap_segment*)&seg_mapping_table[entry_index]; heap_segment_allocated (region) = (uint8_t*)ro_in_entry; #else seg_mapping_table[entry_index].seg1 = (heap_segment*)((size_t)seg_mapping_table[entry_index].seg1 | ro_in_entry); #endif //USE_REGIONS } } void seg_mapping_table_remove_ro_segment (heap_segment* seg) { UNREFERENCED_PARAMETER(seg); #if 0 // POSSIBLE PERF TODO: right now we are not doing anything because we can't simply remove the flag. If it proves // to be a perf problem, we can search in the current ro segs and see if any lands in this range and only // remove the flag if none lands in this range. #endif //0 } heap_segment* ro_segment_lookup (uint8_t* o) { uint8_t* ro_seg_start = o; heap_segment* seg = (heap_segment*)gc_heap::seg_table->lookup (ro_seg_start); if (ro_seg_start && in_range_for_segment (o, seg)) return seg; else return 0; } #endif //FEATURE_BASICFREEZE void gc_heap::seg_mapping_table_add_segment (heap_segment* seg, gc_heap* hp) { #ifndef USE_REGIONS size_t seg_end = (size_t)(heap_segment_reserved (seg) - 1); size_t begin_index = (size_t)seg >> gc_heap::min_segment_size_shr; seg_mapping* begin_entry = &seg_mapping_table[begin_index]; size_t end_index = seg_end >> gc_heap::min_segment_size_shr; seg_mapping* end_entry = &seg_mapping_table[end_index]; dprintf (2, ("adding seg %Ix(%d)-%Ix(%d)", seg, begin_index, heap_segment_reserved (seg), end_index)); dprintf (2, ("before add: begin entry%d: boundary: %Ix; end entry: %d: boundary: %Ix", begin_index, (seg_mapping_table[begin_index].boundary + 1), end_index, (seg_mapping_table[end_index].boundary + 1))); #ifdef MULTIPLE_HEAPS #ifdef SIMPLE_DPRINTF dprintf (2, ("begin %d: h0: %Ix(%d), h1: %Ix(%d); end %d: h0: %Ix(%d), h1: %Ix(%d)", begin_index, (uint8_t*)(begin_entry->h0), (begin_entry->h0 ? begin_entry->h0->heap_number : -1), (uint8_t*)(begin_entry->h1), (begin_entry->h1 ? begin_entry->h1->heap_number : -1), end_index, (uint8_t*)(end_entry->h0), (end_entry->h0 ? end_entry->h0->heap_number : -1), (uint8_t*)(end_entry->h1), (end_entry->h1 ? end_entry->h1->heap_number : -1))); #endif //SIMPLE_DPRINTF assert (end_entry->boundary == 0); assert (end_entry->h0 == 0); end_entry->h0 = hp; assert (begin_entry->h1 == 0); begin_entry->h1 = hp; #else UNREFERENCED_PARAMETER(hp); #endif //MULTIPLE_HEAPS end_entry->boundary = (uint8_t*)seg_end; dprintf (2, ("set entry %d seg1 and %d seg0 to %Ix", begin_index, end_index, seg)); assert ((begin_entry->seg1 == 0) || ((size_t)(begin_entry->seg1) == ro_in_entry)); begin_entry->seg1 = (heap_segment*)((size_t)(begin_entry->seg1) | (size_t)seg); end_entry->seg0 = seg; // for every entry inbetween we need to set its heap too. for (size_t entry_index = (begin_index + 1); entry_index <= (end_index - 1); entry_index++) { assert (seg_mapping_table[entry_index].boundary == 0); #ifdef MULTIPLE_HEAPS assert (seg_mapping_table[entry_index].h0 == 0); seg_mapping_table[entry_index].h1 = hp; #endif //MULTIPLE_HEAPS seg_mapping_table[entry_index].seg1 = seg; } dprintf (2, ("after add: begin entry%d: boundary: %Ix; end entry: %d: boundary: %Ix", begin_index, (seg_mapping_table[begin_index].boundary + 1), end_index, (seg_mapping_table[end_index].boundary + 1))); #if defined(MULTIPLE_HEAPS) && defined(SIMPLE_DPRINTF) dprintf (2, ("begin %d: h0: %Ix(%d), h1: %Ix(%d); end: %d h0: %Ix(%d), h1: %Ix(%d)", begin_index, (uint8_t*)(begin_entry->h0), (begin_entry->h0 ? begin_entry->h0->heap_number : -1), (uint8_t*)(begin_entry->h1), (begin_entry->h1 ? begin_entry->h1->heap_number : -1), end_index, (uint8_t*)(end_entry->h0), (end_entry->h0 ? end_entry->h0->heap_number : -1), (uint8_t*)(end_entry->h1), (end_entry->h1 ? end_entry->h1->heap_number : -1))); #endif //MULTIPLE_HEAPS && SIMPLE_DPRINTF #endif //!USE_REGIONS } void gc_heap::seg_mapping_table_remove_segment (heap_segment* seg) { #ifndef USE_REGIONS size_t seg_end = (size_t)(heap_segment_reserved (seg) - 1); size_t begin_index = (size_t)seg >> gc_heap::min_segment_size_shr; seg_mapping* begin_entry = &seg_mapping_table[begin_index]; size_t end_index = seg_end >> gc_heap::min_segment_size_shr; seg_mapping* end_entry = &seg_mapping_table[end_index]; dprintf (2, ("removing seg %Ix(%d)-%Ix(%d)", seg, begin_index, heap_segment_reserved (seg), end_index)); assert (end_entry->boundary == (uint8_t*)seg_end); end_entry->boundary = 0; #ifdef MULTIPLE_HEAPS gc_heap* hp = heap_segment_heap (seg); assert (end_entry->h0 == hp); end_entry->h0 = 0; assert (begin_entry->h1 == hp); begin_entry->h1 = 0; #endif //MULTIPLE_HEAPS assert (begin_entry->seg1 != 0); begin_entry->seg1 = (heap_segment*)((size_t)(begin_entry->seg1) & ro_in_entry); end_entry->seg0 = 0; // for every entry inbetween we need to reset its heap too. for (size_t entry_index = (begin_index + 1); entry_index <= (end_index - 1); entry_index++) { assert (seg_mapping_table[entry_index].boundary == 0); #ifdef MULTIPLE_HEAPS assert (seg_mapping_table[entry_index].h0 == 0); assert (seg_mapping_table[entry_index].h1 == hp); seg_mapping_table[entry_index].h1 = 0; #endif //MULTIPLE_HEAPS seg_mapping_table[entry_index].seg1 = 0; } dprintf (2, ("after remove: begin entry%d: boundary: %Ix; end entry: %d: boundary: %Ix", begin_index, (seg_mapping_table[begin_index].boundary + 1), end_index, (seg_mapping_table[end_index].boundary + 1))); #ifdef MULTIPLE_HEAPS dprintf (2, ("begin %d: h0: %Ix, h1: %Ix; end: %d h0: %Ix, h1: %Ix", begin_index, (uint8_t*)(begin_entry->h0), (uint8_t*)(begin_entry->h1), end_index, (uint8_t*)(end_entry->h0), (uint8_t*)(end_entry->h1))); #endif //MULTIPLE_HEAPS #endif //!USE_REGIONS } #ifdef MULTIPLE_HEAPS inline gc_heap* seg_mapping_table_heap_of_worker (uint8_t* o) { size_t index = (size_t)o >> gc_heap::min_segment_size_shr; seg_mapping* entry = &seg_mapping_table[index]; #ifdef USE_REGIONS gc_heap* hp = heap_segment_heap ((heap_segment*)entry); #else gc_heap* hp = ((o > entry->boundary) ? entry->h1 : entry->h0); dprintf (2, ("checking obj %Ix, index is %Id, entry: boundary: %Ix, h0: %Ix, seg0: %Ix, h1: %Ix, seg1: %Ix", o, index, (entry->boundary + 1), (uint8_t*)(entry->h0), (uint8_t*)(entry->seg0), (uint8_t*)(entry->h1), (uint8_t*)(entry->seg1))); #ifdef _DEBUG heap_segment* seg = ((o > entry->boundary) ? entry->seg1 : entry->seg0); #ifdef FEATURE_BASICFREEZE if ((size_t)seg & ro_in_entry) seg = (heap_segment*)((size_t)seg & ~ro_in_entry); #endif //FEATURE_BASICFREEZE #ifdef TRACE_GC if (seg) { if (in_range_for_segment (o, seg)) { dprintf (2, ("obj %Ix belongs to segment %Ix(-%Ix)", o, seg, (uint8_t*)heap_segment_allocated (seg))); } else { dprintf (2, ("found seg %Ix(-%Ix) for obj %Ix, but it's not on the seg", seg, (uint8_t*)heap_segment_allocated (seg), o)); } } else { dprintf (2, ("could not find obj %Ix in any existing segments", o)); } #endif //TRACE_GC #endif //_DEBUG #endif //USE_REGIONS return hp; } gc_heap* seg_mapping_table_heap_of (uint8_t* o) { if ((o < g_gc_lowest_address) || (o >= g_gc_highest_address)) return 0; return seg_mapping_table_heap_of_worker (o); } gc_heap* seg_mapping_table_heap_of_gc (uint8_t* o) { #ifdef FEATURE_BASICFREEZE if ((o < g_gc_lowest_address) || (o >= g_gc_highest_address)) return 0; #endif //FEATURE_BASICFREEZE return seg_mapping_table_heap_of_worker (o); } #endif //MULTIPLE_HEAPS // Only returns a valid seg if we can actually find o on the seg. heap_segment* seg_mapping_table_segment_of (uint8_t* o) { #ifdef FEATURE_BASICFREEZE if ((o < g_gc_lowest_address) || (o >= g_gc_highest_address)) return ro_segment_lookup (o); #endif //FEATURE_BASICFREEZE size_t index = (size_t)o >> gc_heap::min_segment_size_shr; seg_mapping* entry = &seg_mapping_table[index]; #ifdef USE_REGIONS // REGIONS TODO: I think we could simplify this to having the same info for each // basic entry in a large region so we can get it right away instead of having to go // back some entries. ptrdiff_t first_field = (ptrdiff_t)heap_segment_allocated ((heap_segment*)entry); if (first_field == 0) { dprintf (REGIONS_LOG, ("asked for seg for %Ix, in a freed region mem: %Ix, committed %Ix", o, heap_segment_mem ((heap_segment*)entry), heap_segment_committed ((heap_segment*)entry))); return 0; } // Regions are never going to intersect an ro seg, so this can never be ro_in_entry. assert (first_field != 0); assert (first_field != ro_in_entry); if (first_field < 0) { index += first_field; } heap_segment* seg = (heap_segment*)&seg_mapping_table[index]; #else //USE_REGIONS dprintf (2, ("checking obj %Ix, index is %Id, entry: boundary: %Ix, seg0: %Ix, seg1: %Ix", o, index, (entry->boundary + 1), (uint8_t*)(entry->seg0), (uint8_t*)(entry->seg1))); heap_segment* seg = ((o > entry->boundary) ? entry->seg1 : entry->seg0); #ifdef FEATURE_BASICFREEZE if ((size_t)seg & ro_in_entry) seg = (heap_segment*)((size_t)seg & ~ro_in_entry); #endif //FEATURE_BASICFREEZE #endif //USE_REGIONS if (seg) { if (in_range_for_segment (o, seg)) { dprintf (2, ("obj %Ix belongs to segment %Ix(-%Ix)", o, (uint8_t*)heap_segment_mem(seg), (uint8_t*)heap_segment_reserved(seg))); } else { dprintf (2, ("found seg %Ix(-%Ix) for obj %Ix, but it's not on the seg, setting it to 0", (uint8_t*)heap_segment_mem(seg), (uint8_t*)heap_segment_reserved(seg), o)); seg = 0; } } else { dprintf (2, ("could not find obj %Ix in any existing segments", o)); } #ifdef FEATURE_BASICFREEZE // TODO: This was originally written assuming that the seg_mapping_table would always contain entries for ro // segments whenever the ro segment falls into the [g_gc_lowest_address,g_gc_highest_address) range. I.e., it had an // extra "&& (size_t)(entry->seg1) & ro_in_entry" expression. However, at the moment, grow_brick_card_table does // not correctly go through the ro segments and add them back to the seg_mapping_table when the [lowest,highest) // range changes. We should probably go ahead and modify grow_brick_card_table and put back the // "&& (size_t)(entry->seg1) & ro_in_entry" here. if (!seg) { seg = ro_segment_lookup (o); if (seg && !in_range_for_segment (o, seg)) seg = 0; } #endif //FEATURE_BASICFREEZE return seg; } size_t gcard_of ( uint8_t*); #define GC_MARKED (size_t)0x1 #ifdef DOUBLY_LINKED_FL // This bit indicates that we'll need to set the bgc mark bit for this object during an FGC. // We only do this when we decide to compact. #define BGC_MARKED_BY_FGC (size_t)0x2 #define MAKE_FREE_OBJ_IN_COMPACT (size_t)0x4 #define ALLOWED_SPECIAL_HEADER_BITS (GC_MARKED|BGC_MARKED_BY_FGC|MAKE_FREE_OBJ_IN_COMPACT) #else //DOUBLY_LINKED_FL #define ALLOWED_SPECIAL_HEADER_BITS (GC_MARKED) #endif //!DOUBLY_LINKED_FL #ifdef HOST_64BIT #define SPECIAL_HEADER_BITS (0x7) #else #define SPECIAL_HEADER_BITS (0x3) #endif #define slot(i, j) ((uint8_t**)(i))[(j)+1] #define free_object_base_size (plug_skew + sizeof(ArrayBase)) #define free_list_slot(x) ((uint8_t**)(x))[2] #define free_list_undo(x) ((uint8_t**)(x))[-1] #define UNDO_EMPTY ((uint8_t*)1) #ifdef DOUBLY_LINKED_FL #define free_list_prev(x) ((uint8_t**)(x))[3] #define PREV_EMPTY ((uint8_t*)1) void check_and_clear_in_free_list (uint8_t* o, size_t size) { if (size >= min_free_list) { free_list_prev (o) = PREV_EMPTY; } } // This is used when we need to clear the prev bit for a free object we made because we know // it's not actually a free obj (it's just a temporary thing during allocation). void clear_prev_bit (uint8_t* o, size_t size) { if (size >= min_free_list) { free_list_prev (o) = 0; } } #endif //DOUBLY_LINKED_FL class CObjectHeader : public Object { public: #if defined(FEATURE_REDHAWK) || defined(BUILD_AS_STANDALONE) // The GC expects the following methods that are provided by the Object class in the CLR but not provided // by Redhawk's version of Object. uint32_t GetNumComponents() { return ((ArrayBase *)this)->GetNumComponents(); } void Validate(BOOL bDeep=TRUE) { MethodTable * pMT = GetMethodTable(); _ASSERTE(pMT->SanityCheck()); bool noRangeChecks = (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_NO_RANGE_CHECKS) == GCConfig::HEAPVERIFY_NO_RANGE_CHECKS; BOOL fSmallObjectHeapPtr = FALSE, fLargeObjectHeapPtr = FALSE; if (!noRangeChecks) { fSmallObjectHeapPtr = g_theGCHeap->IsHeapPointer(this, TRUE); if (!fSmallObjectHeapPtr) fLargeObjectHeapPtr = g_theGCHeap->IsHeapPointer(this); _ASSERTE(fSmallObjectHeapPtr || fLargeObjectHeapPtr); } #ifdef FEATURE_STRUCTALIGN _ASSERTE(IsStructAligned((uint8_t *)this, GetMethodTable()->GetBaseAlignment())); #endif // FEATURE_STRUCTALIGN #if defined(FEATURE_64BIT_ALIGNMENT) && !defined(FEATURE_REDHAWK) if (pMT->RequiresAlign8()) { _ASSERTE((((size_t)this) & 0x7) == (pMT->IsValueType() ? 4U : 0U)); } #endif // FEATURE_64BIT_ALIGNMENT #ifdef VERIFY_HEAP if (bDeep && (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_GC)) g_theGCHeap->ValidateObjectMember(this); #endif if (fSmallObjectHeapPtr) { #ifdef FEATURE_BASICFREEZE _ASSERTE(!g_theGCHeap->IsLargeObject(this) || g_theGCHeap->IsInFrozenSegment(this)); #else _ASSERTE(!g_theGCHeap->IsLargeObject(this)); #endif } } void ValidateHeap(BOOL bDeep) { Validate(bDeep); } #endif //FEATURE_REDHAWK || BUILD_AS_STANDALONE ///// // // Header Status Information // MethodTable *GetMethodTable() const { return( (MethodTable *) (((size_t) RawGetMethodTable()) & (~SPECIAL_HEADER_BITS))); } void SetMarked() { _ASSERTE(RawGetMethodTable()); RawSetMethodTable((MethodTable *) (((size_t) RawGetMethodTable()) | GC_MARKED)); } BOOL IsMarked() const { return !!(((size_t)RawGetMethodTable()) & GC_MARKED); } void SetPinned() { assert (!(gc_heap::settings.concurrent)); GetHeader()->SetGCBit(); } BOOL IsPinned() const { return !!((((CObjectHeader*)this)->GetHeader()->GetBits()) & BIT_SBLK_GC_RESERVE); } // Now we set more bits should actually only clear the mark bit void ClearMarked() { #ifdef DOUBLY_LINKED_FL RawSetMethodTable ((MethodTable *)(((size_t) RawGetMethodTable()) & (~GC_MARKED))); #else RawSetMethodTable (GetMethodTable()); #endif //DOUBLY_LINKED_FL } #ifdef DOUBLY_LINKED_FL void SetBGCMarkBit() { RawSetMethodTable((MethodTable *) (((size_t) RawGetMethodTable()) | BGC_MARKED_BY_FGC)); } BOOL IsBGCMarkBitSet() const { return !!(((size_t)RawGetMethodTable()) & BGC_MARKED_BY_FGC); } void ClearBGCMarkBit() { RawSetMethodTable((MethodTable *)(((size_t) RawGetMethodTable()) & (~BGC_MARKED_BY_FGC))); } void SetFreeObjInCompactBit() { RawSetMethodTable((MethodTable *) (((size_t) RawGetMethodTable()) | MAKE_FREE_OBJ_IN_COMPACT)); } BOOL IsFreeObjInCompactBitSet() const { return !!(((size_t)RawGetMethodTable()) & MAKE_FREE_OBJ_IN_COMPACT); } void ClearFreeObjInCompactBit() { #ifdef _DEBUG // check this looks like an object, but do NOT validate pointers to other objects // as these may not be valid yet - we are calling this during compact_phase Validate(FALSE); #endif //_DEBUG RawSetMethodTable((MethodTable *)(((size_t) RawGetMethodTable()) & (~MAKE_FREE_OBJ_IN_COMPACT))); } #endif //DOUBLY_LINKED_FL size_t ClearSpecialBits() { size_t special_bits = ((size_t)RawGetMethodTable()) & SPECIAL_HEADER_BITS; if (special_bits != 0) { assert ((special_bits & (~ALLOWED_SPECIAL_HEADER_BITS)) == 0); RawSetMethodTable ((MethodTable*)(((size_t)RawGetMethodTable()) & ~(SPECIAL_HEADER_BITS))); } return special_bits; } void SetSpecialBits (size_t special_bits) { assert ((special_bits & (~ALLOWED_SPECIAL_HEADER_BITS)) == 0); if (special_bits != 0) { RawSetMethodTable ((MethodTable*)(((size_t)RawGetMethodTable()) | special_bits)); } } CGCDesc *GetSlotMap () { assert (GetMethodTable()->ContainsPointers()); return CGCDesc::GetCGCDescFromMT(GetMethodTable()); } void SetFree(size_t size) { assert (size >= free_object_base_size); assert (g_gc_pFreeObjectMethodTable->GetBaseSize() == free_object_base_size); assert (g_gc_pFreeObjectMethodTable->RawGetComponentSize() == 1); RawSetMethodTable( g_gc_pFreeObjectMethodTable ); size_t* numComponentsPtr = (size_t*) &((uint8_t*) this)[ArrayBase::GetOffsetOfNumComponents()]; *numComponentsPtr = size - free_object_base_size; #ifdef VERIFY_HEAP //This introduces a bug in the free list management. //((void**) this)[-1] = 0; // clear the sync block, assert (*numComponentsPtr >= 0); if (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_GC) { memset (((uint8_t*)this)+sizeof(ArrayBase), 0xcc, *numComponentsPtr); #ifdef DOUBLY_LINKED_FL // However, in this case we can't leave the Next field uncleared because no one will clear it // so it remains 0xcc and that's not good for verification if (*numComponentsPtr > 0) { free_list_slot (this) = 0; } #endif //DOUBLY_LINKED_FL } #endif //VERIFY_HEAP #ifdef DOUBLY_LINKED_FL // For background GC, we need to distinguish between a free object that's not on the free list // and one that is. So we always set its prev to PREV_EMPTY to indicate that it's a free // object that's not on the free list. If it should be on the free list, it will be set to the // appropriate non zero value. check_and_clear_in_free_list ((uint8_t*)this, size); #endif //DOUBLY_LINKED_FL } void UnsetFree() { size_t size = free_object_base_size - plug_skew; // since we only need to clear 2 ptr size, we do it manually PTR_PTR m = (PTR_PTR) this; for (size_t i = 0; i < size / sizeof(PTR_PTR); i++) *(m++) = 0; } BOOL IsFree () const { return (GetMethodTable() == g_gc_pFreeObjectMethodTable); } #ifdef FEATURE_STRUCTALIGN int GetRequiredAlignment () const { return GetMethodTable()->GetRequiredAlignment(); } #endif // FEATURE_STRUCTALIGN BOOL ContainsPointers() const { return GetMethodTable()->ContainsPointers(); } #ifdef COLLECTIBLE_CLASS BOOL Collectible() const { return GetMethodTable()->Collectible(); } FORCEINLINE BOOL ContainsPointersOrCollectible() const { MethodTable *pMethodTable = GetMethodTable(); return (pMethodTable->ContainsPointers() || pMethodTable->Collectible()); } #endif //COLLECTIBLE_CLASS Object* GetObjectBase() const { return (Object*) this; } }; #define header(i) ((CObjectHeader*)(i)) #ifdef DOUBLY_LINKED_FL inline BOOL is_on_free_list (uint8_t* o, size_t size) { if (size >= min_free_list) { if (header(o)->GetMethodTable() == g_gc_pFreeObjectMethodTable) { return (free_list_prev (o) != PREV_EMPTY); } } return FALSE; } inline void set_plug_bgc_mark_bit (uint8_t* node) { header(node)->SetBGCMarkBit(); } inline BOOL is_plug_bgc_mark_bit_set (uint8_t* node) { return header(node)->IsBGCMarkBitSet(); } inline void clear_plug_bgc_mark_bit (uint8_t* node) { header(node)->ClearBGCMarkBit(); } inline void set_free_obj_in_compact_bit (uint8_t* node) { header(node)->SetFreeObjInCompactBit(); } inline BOOL is_free_obj_in_compact_bit_set (uint8_t* node) { return header(node)->IsFreeObjInCompactBitSet(); } inline void clear_free_obj_in_compact_bit (uint8_t* node) { header(node)->ClearFreeObjInCompactBit(); } #endif //DOUBLY_LINKED_FL #ifdef SHORT_PLUGS inline void set_plug_padded (uint8_t* node) { header(node)->SetMarked(); } inline void clear_plug_padded (uint8_t* node) { header(node)->ClearMarked(); } inline BOOL is_plug_padded (uint8_t* node) { return header(node)->IsMarked(); } #else //SHORT_PLUGS inline void set_plug_padded (uint8_t* node){} inline void clear_plug_padded (uint8_t* node){} inline BOOL is_plug_padded (uint8_t* node){return FALSE;} #endif //SHORT_PLUGS inline size_t clear_special_bits (uint8_t* node) { return header(node)->ClearSpecialBits(); } inline void set_special_bits (uint8_t* node, size_t special_bits) { header(node)->SetSpecialBits (special_bits); } inline size_t unused_array_size(uint8_t * p) { assert(((CObjectHeader*)p)->IsFree()); size_t* numComponentsPtr = (size_t*)(p + ArrayBase::GetOffsetOfNumComponents()); return free_object_base_size + *numComponentsPtr; } inline heap_segment* heap_segment_non_sip (heap_segment* ns) { #ifdef USE_REGIONS if ((ns == 0) || !heap_segment_swept_in_plan (ns)) { return ns; } else { do { if (heap_segment_swept_in_plan (ns)) { dprintf (REGIONS_LOG, ("region %Ix->%Ix SIP", heap_segment_mem (ns), heap_segment_allocated (ns))); } ns = heap_segment_next (ns); } while ((ns != 0) && heap_segment_swept_in_plan (ns)); return ns; } #else //USE_REGIONS return ns; #endif //USE_REGIONS } inline heap_segment* heap_segment_next_non_sip (heap_segment* seg) { heap_segment* ns = heap_segment_next (seg); #ifdef USE_REGIONS return heap_segment_non_sip (ns); #else return ns; #endif //USE_REGIONS } heap_segment* heap_segment_rw (heap_segment* ns) { if ((ns == 0) || !heap_segment_read_only_p (ns)) { return ns; } else { do { ns = heap_segment_next (ns); } while ((ns != 0) && heap_segment_read_only_p (ns)); return ns; } } //returns the next non ro segment. heap_segment* heap_segment_next_rw (heap_segment* seg) { heap_segment* ns = heap_segment_next (seg); return heap_segment_rw (ns); } // returns the segment before seg. heap_segment* heap_segment_prev_rw (heap_segment* begin, heap_segment* seg) { assert (begin != 0); heap_segment* prev = begin; heap_segment* current = heap_segment_next_rw (begin); while (current && current != seg) { prev = current; current = heap_segment_next_rw (current); } if (current == seg) { return prev; } else { return 0; } } // returns the segment before seg. heap_segment* heap_segment_prev (heap_segment* begin, heap_segment* seg) { assert (begin != 0); heap_segment* prev = begin; heap_segment* current = heap_segment_next (begin); while (current && current != seg) { prev = current; current = heap_segment_next (current); } if (current == seg) { return prev; } else { return 0; } } heap_segment* heap_segment_in_range (heap_segment* ns) { if ((ns == 0) || heap_segment_in_range_p (ns)) { return ns; } else { do { ns = heap_segment_next (ns); } while ((ns != 0) && !heap_segment_in_range_p (ns)); return ns; } } heap_segment* heap_segment_next_in_range (heap_segment* seg) { heap_segment* ns = heap_segment_next (seg); return heap_segment_in_range (ns); } struct imemory_data { uint8_t* memory_base; }; struct numa_reserved_block { uint8_t* memory_base; size_t block_size; numa_reserved_block() : memory_base(nullptr), block_size(0) { } }; struct initial_memory_details { imemory_data *initial_memory; imemory_data *initial_normal_heap; // points into initial_memory_array imemory_data *initial_large_heap; // points into initial_memory_array imemory_data *initial_pinned_heap; // points into initial_memory_array size_t block_size_normal; size_t block_size_large; size_t block_size_pinned; int block_count; // # of blocks in each int current_block_normal; int current_block_large; int current_block_pinned; enum { ALLATONCE = 1, EACH_GENERATION, EACH_BLOCK, ALLATONCE_SEPARATED_POH, EACH_NUMA_NODE }; size_t allocation_pattern; size_t block_size(int i) { switch (i / block_count) { case 0: return block_size_normal; case 1: return block_size_large; case 2: return block_size_pinned; default: __UNREACHABLE(); } }; void* get_initial_memory (int gen, int h_number) { switch (gen) { case soh_gen0: case soh_gen1: case soh_gen2: return initial_normal_heap[h_number].memory_base; case loh_generation: return initial_large_heap[h_number].memory_base; case poh_generation: return initial_pinned_heap[h_number].memory_base; default: __UNREACHABLE(); } }; size_t get_initial_size (int gen) { switch (gen) { case soh_gen0: case soh_gen1: case soh_gen2: return block_size_normal; case loh_generation: return block_size_large; case poh_generation: return block_size_pinned; default: __UNREACHABLE(); } }; int numa_reserved_block_count; numa_reserved_block* numa_reserved_block_table; }; initial_memory_details memory_details; BOOL gc_heap::reserve_initial_memory (size_t normal_size, size_t large_size, size_t pinned_size, int num_heaps, bool use_large_pages_p, bool separated_poh_p, uint16_t* heap_no_to_numa_node) { BOOL reserve_success = FALSE; // should only be called once assert (memory_details.initial_memory == 0); // soh + loh + poh segments * num_heaps memory_details.initial_memory = new (nothrow) imemory_data[num_heaps * (total_generation_count - ephemeral_generation_count)]; if (memory_details.initial_memory == 0) { dprintf (2, ("failed to reserve %Id bytes for imemory_data", num_heaps * (total_generation_count - ephemeral_generation_count) * sizeof (imemory_data))); return FALSE; } memory_details.initial_normal_heap = memory_details.initial_memory; memory_details.initial_large_heap = memory_details.initial_normal_heap + num_heaps; memory_details.initial_pinned_heap = memory_details.initial_large_heap + num_heaps; memory_details.block_size_normal = normal_size; memory_details.block_size_large = large_size; memory_details.block_size_pinned = pinned_size; memory_details.block_count = num_heaps; memory_details.current_block_normal = 0; memory_details.current_block_large = 0; memory_details.current_block_pinned = 0; g_gc_lowest_address = MAX_PTR; g_gc_highest_address = 0; if (((size_t)MAX_PTR - large_size) < normal_size) { // we are already overflowing with just one heap. dprintf (2, ("0x%Ix + 0x%Ix already overflow", normal_size, large_size)); return FALSE; } if (((size_t)MAX_PTR / memory_details.block_count) < (normal_size + large_size + pinned_size)) { dprintf (2, ("(0x%Ix + 0x%Ix)*0x%Ix overflow", normal_size, large_size, memory_details.block_count)); return FALSE; } // figure out number of NUMA nodes and allocate additional table for NUMA local reservation memory_details.numa_reserved_block_count = 0; memory_details.numa_reserved_block_table = nullptr; int numa_node_count = 0; if (heap_no_to_numa_node != nullptr) { uint16_t highest_numa_node = 0; // figure out the highest NUMA node for (int heap_no = 0; heap_no < num_heaps; heap_no++) { uint16_t heap_numa_node = heap_no_to_numa_node[heap_no]; highest_numa_node = max (highest_numa_node, heap_numa_node); } assert (highest_numa_node < MAX_SUPPORTED_CPUS); numa_node_count = highest_numa_node + 1; memory_details.numa_reserved_block_count = numa_node_count * (1 + separated_poh_p); memory_details.numa_reserved_block_table = new (nothrow) numa_reserved_block[memory_details.numa_reserved_block_count]; if (memory_details.numa_reserved_block_table == nullptr) { // we couldn't get the memory - continue as if doing the non-NUMA case dprintf(2, ("failed to reserve %Id bytes for numa_reserved_block data", memory_details.numa_reserved_block_count * sizeof(numa_reserved_block))); memory_details.numa_reserved_block_count = 0; } } if (memory_details.numa_reserved_block_table != nullptr) { // figure out how much to reserve on each NUMA node // note this can be very different between NUMA nodes, depending on // which processors our heaps are associated with size_t merged_pinned_size = separated_poh_p ? 0 : pinned_size; for (int heap_no = 0; heap_no < num_heaps; heap_no++) { uint16_t heap_numa_node = heap_no_to_numa_node[heap_no]; numa_reserved_block * block = &memory_details.numa_reserved_block_table[heap_numa_node]; // add the size required for this heap block->block_size += normal_size + large_size + merged_pinned_size; if (separated_poh_p) { numa_reserved_block* pinned_block = &memory_details.numa_reserved_block_table[numa_node_count + heap_numa_node]; // add the pinned size required for this heap pinned_block->block_size += pinned_size; } } // reserve the appropriate size on each NUMA node bool failure = false; for (int block_index = 0; block_index < memory_details.numa_reserved_block_count; block_index++) { numa_reserved_block * block = &memory_details.numa_reserved_block_table[block_index]; if (block->block_size == 0) continue; int numa_node = block_index % numa_node_count; bool pinned_block = block_index >= numa_node_count; block->memory_base = (uint8_t*)virtual_alloc (block->block_size, use_large_pages_p && !pinned_block, numa_node); if (block->memory_base == nullptr) { dprintf(2, ("failed to reserve %Id bytes for on NUMA node %u", block->block_size, numa_node)); failure = true; break; } else { g_gc_lowest_address = min(g_gc_lowest_address, block->memory_base); g_gc_highest_address = max(g_gc_highest_address, block->memory_base + block->block_size); } } if (failure) { // if we had any failures, undo the work done so far // we will instead use one of the other allocation patterns // we could try to use what we did succeed to reserve, but that gets complicated for (int block_index = 0; block_index < memory_details.numa_reserved_block_count; block_index++) { numa_reserved_block * block = &memory_details.numa_reserved_block_table[block_index]; if (block->memory_base != nullptr) { virtual_free(block->memory_base, block->block_size); block->memory_base = nullptr; } } delete [] memory_details.numa_reserved_block_table; memory_details.numa_reserved_block_table = nullptr; memory_details.numa_reserved_block_count = 0; } else { // for each NUMA node, give out the memory to its heaps for (uint16_t numa_node = 0; numa_node < numa_node_count; numa_node++) { numa_reserved_block * block = &memory_details.numa_reserved_block_table[numa_node]; numa_reserved_block* pinned_block = separated_poh_p ? &memory_details.numa_reserved_block_table[numa_node_count + numa_node] : nullptr; // if the block's size is 0, there can be no heaps on this NUMA node if (block->block_size == 0) { assert((pinned_block == nullptr) || (pinned_block->block_size == 0)); continue; } uint8_t* memory_base = block->memory_base; uint8_t* pinned_memory_base = ((pinned_block == nullptr) ? nullptr : pinned_block->memory_base); for (int heap_no = 0; heap_no < num_heaps; heap_no++) { uint16_t heap_numa_node = heap_no_to_numa_node[heap_no]; if (heap_numa_node != numa_node) { // this heap is on another NUMA node continue; } memory_details.initial_normal_heap[heap_no].memory_base = memory_base; memory_base += normal_size; memory_details.initial_large_heap[heap_no].memory_base = memory_base; memory_base += large_size; if (separated_poh_p) { memory_details.initial_pinned_heap[heap_no].memory_base = pinned_memory_base; pinned_memory_base += pinned_size; } else { memory_details.initial_pinned_heap[heap_no].memory_base = memory_base; memory_base += pinned_size; } } // sanity check - we should be at the end of the memory block for this NUMA node assert (memory_base == block->memory_base + block->block_size); assert ((pinned_block == nullptr) || (pinned_memory_base == pinned_block->memory_base + pinned_block->block_size)); } memory_details.allocation_pattern = initial_memory_details::EACH_NUMA_NODE; reserve_success = TRUE; } } if (!reserve_success) { size_t temp_pinned_size = (separated_poh_p ? 0 : pinned_size); size_t separate_pinned_size = memory_details.block_count * pinned_size; size_t requestedMemory = memory_details.block_count * (normal_size + large_size + temp_pinned_size); uint8_t* allatonce_block = (uint8_t*)virtual_alloc(requestedMemory, use_large_pages_p); uint8_t* separated_poh_block = nullptr; if (allatonce_block && separated_poh_p) { separated_poh_block = (uint8_t*)virtual_alloc(separate_pinned_size, false); if (!separated_poh_block) { virtual_free(allatonce_block, requestedMemory); allatonce_block = nullptr; } } if (allatonce_block) { if (separated_poh_p) { g_gc_lowest_address = min(allatonce_block, separated_poh_block); g_gc_highest_address = max((allatonce_block + requestedMemory), (separated_poh_block + separate_pinned_size)); memory_details.allocation_pattern = initial_memory_details::ALLATONCE_SEPARATED_POH; } else { g_gc_lowest_address = allatonce_block; g_gc_highest_address = allatonce_block + requestedMemory; memory_details.allocation_pattern = initial_memory_details::ALLATONCE; } for (int i = 0; i < memory_details.block_count; i++) { memory_details.initial_normal_heap[i].memory_base = allatonce_block + (i * normal_size); memory_details.initial_large_heap[i].memory_base = allatonce_block + (memory_details.block_count * normal_size) + (i * large_size); if (separated_poh_p) { memory_details.initial_pinned_heap[i].memory_base = separated_poh_block + (i * pinned_size); } else { memory_details.initial_pinned_heap[i].memory_base = allatonce_block + (memory_details.block_count * (normal_size + large_size)) + (i * pinned_size); } } reserve_success = TRUE; } else { // try to allocate 3 blocks uint8_t* b1 = (uint8_t*)virtual_alloc(memory_details.block_count * normal_size, use_large_pages_p); uint8_t* b2 = (uint8_t*)virtual_alloc(memory_details.block_count * large_size, use_large_pages_p); uint8_t* b3 = (uint8_t*)virtual_alloc(memory_details.block_count * pinned_size, use_large_pages_p && !separated_poh_p); if (b1 && b2 && b3) { memory_details.allocation_pattern = initial_memory_details::EACH_GENERATION; g_gc_lowest_address = min(b1, min(b2, b3)); g_gc_highest_address = max(b1 + memory_details.block_count * normal_size, max(b2 + memory_details.block_count * large_size, b3 + memory_details.block_count * pinned_size)); for (int i = 0; i < memory_details.block_count; i++) { memory_details.initial_normal_heap[i].memory_base = b1 + (i * normal_size); memory_details.initial_large_heap[i].memory_base = b2 + (i * large_size); memory_details.initial_pinned_heap[i].memory_base = b3 + (i * pinned_size); } reserve_success = TRUE; } else { // allocation failed, we'll go on to try allocating each block. // We could preserve the b1 alloc, but code complexity increases if (b1) virtual_free(b1, memory_details.block_count * normal_size); if (b2) virtual_free(b2, memory_details.block_count * large_size); if (b3) virtual_free(b3, memory_details.block_count * pinned_size); } if ((b2 == NULL) && (memory_details.block_count > 1)) { memory_details.allocation_pattern = initial_memory_details::EACH_BLOCK; imemory_data* current_block = memory_details.initial_memory; for (int i = 0; i < (memory_details.block_count * (total_generation_count - ephemeral_generation_count)); i++, current_block++) { size_t block_size = memory_details.block_size(i); uint16_t numa_node = NUMA_NODE_UNDEFINED; if (heap_no_to_numa_node != nullptr) { int heap_no = i % memory_details.block_count; numa_node = heap_no_to_numa_node[heap_no]; } current_block->memory_base = (uint8_t*)virtual_alloc(block_size, use_large_pages_p, numa_node); if (current_block->memory_base == 0) { // Free the blocks that we've allocated so far current_block = memory_details.initial_memory; for (int j = 0; j < i; j++, current_block++) { if (current_block->memory_base != 0) { block_size = memory_details.block_size(i); virtual_free(current_block->memory_base, block_size); } } reserve_success = FALSE; break; } else { if (current_block->memory_base < g_gc_lowest_address) g_gc_lowest_address = current_block->memory_base; if (((uint8_t*)current_block->memory_base + block_size) > g_gc_highest_address) g_gc_highest_address = (current_block->memory_base + block_size); } reserve_success = TRUE; } } } } if (reserve_success && separated_poh_p) { for (int heap_no = 0; (reserve_success && (heap_no < num_heaps)); heap_no++) { if (!GCToOSInterface::VirtualCommit(memory_details.initial_pinned_heap[heap_no].memory_base, pinned_size)) { reserve_success = FALSE; } } } return reserve_success; } void gc_heap::destroy_initial_memory() { if (memory_details.initial_memory != NULL) { switch (memory_details.allocation_pattern) { case initial_memory_details::ALLATONCE: virtual_free (memory_details.initial_memory[0].memory_base, memory_details.block_count*(memory_details.block_size_normal + memory_details.block_size_large + memory_details.block_size_pinned)); break; case initial_memory_details::ALLATONCE_SEPARATED_POH: virtual_free(memory_details.initial_memory[0].memory_base, memory_details.block_count * (memory_details.block_size_normal + memory_details.block_size_large)); virtual_free(memory_details.initial_pinned_heap[0].memory_base, memory_details.block_count * (memory_details.block_size_pinned)); break; case initial_memory_details::EACH_GENERATION: virtual_free (memory_details.initial_normal_heap[0].memory_base, memory_details.block_count*memory_details.block_size_normal); virtual_free (memory_details.initial_large_heap[0].memory_base, memory_details.block_count*memory_details.block_size_large); virtual_free (memory_details.initial_pinned_heap[0].memory_base, memory_details.block_count*memory_details.block_size_pinned); break; case initial_memory_details::EACH_BLOCK: { imemory_data* current_block = memory_details.initial_memory; int total_block_count = memory_details.block_count * (total_generation_count - ephemeral_generation_count); for (int i = 0; i < total_block_count; i++, current_block++) { size_t block_size = memory_details.block_size (i); if (current_block->memory_base != NULL) { virtual_free (current_block->memory_base, block_size); } } break; } case initial_memory_details::EACH_NUMA_NODE: for (int block_index = 0; block_index < memory_details.numa_reserved_block_count; block_index++) { numa_reserved_block * block = &memory_details.numa_reserved_block_table[block_index]; if (block->memory_base != nullptr) { virtual_free (block->memory_base, block->block_size); } } delete [] memory_details.numa_reserved_block_table; break; default: assert (!"unexpected allocation_pattern"); break; } delete [] memory_details.initial_memory; memory_details.initial_memory = NULL; memory_details.initial_normal_heap = NULL; memory_details.initial_large_heap = NULL; memory_details.initial_pinned_heap = NULL; } } heap_segment* make_initial_segment (int gen, int h_number, gc_heap* hp) { void* mem = memory_details.get_initial_memory (gen, h_number); size_t size = memory_details.get_initial_size (gen); heap_segment* res = gc_heap::make_heap_segment ((uint8_t*)mem, size, hp, gen); return res; } void* virtual_alloc (size_t size) { return virtual_alloc(size, false); } void* virtual_alloc (size_t size, bool use_large_pages_p, uint16_t numa_node) { size_t requested_size = size; if ((gc_heap::reserved_memory_limit - gc_heap::reserved_memory) < requested_size) { gc_heap::reserved_memory_limit = GCScan::AskForMoreReservedMemory (gc_heap::reserved_memory_limit, requested_size); if ((gc_heap::reserved_memory_limit - gc_heap::reserved_memory) < requested_size) { return 0; } } uint32_t flags = VirtualReserveFlags::None; #ifndef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP if (virtual_alloc_hardware_write_watch) { flags = VirtualReserveFlags::WriteWatch; } #endif // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP void* prgmem = use_large_pages_p ? GCToOSInterface::VirtualReserveAndCommitLargePages(requested_size, numa_node) : GCToOSInterface::VirtualReserve(requested_size, card_size * card_word_width, flags, numa_node); void *aligned_mem = prgmem; // We don't want (prgmem + size) to be right at the end of the address space // because we'd have to worry about that everytime we do (address + size). // We also want to make sure that we leave loh_size_threshold at the end // so we allocate a small object we don't need to worry about overflow there // when we do alloc_ptr+size. if (prgmem) { uint8_t* end_mem = (uint8_t*)prgmem + requested_size; if ((end_mem == 0) || ((size_t)(MAX_PTR - end_mem) <= END_SPACE_AFTER_GC)) { GCToOSInterface::VirtualRelease (prgmem, requested_size); dprintf (2, ("Virtual Alloc size %Id returned memory right against 4GB [%Ix, %Ix[ - discarding", requested_size, (size_t)prgmem, (size_t)((uint8_t*)prgmem+requested_size))); prgmem = 0; aligned_mem = 0; } } if (prgmem) { gc_heap::reserved_memory += requested_size; } dprintf (2, ("Virtual Alloc size %Id: [%Ix, %Ix[", requested_size, (size_t)prgmem, (size_t)((uint8_t*)prgmem+requested_size))); return aligned_mem; } static size_t get_valid_segment_size (BOOL large_seg=FALSE) { size_t seg_size, initial_seg_size; if (!large_seg) { initial_seg_size = INITIAL_ALLOC; seg_size = static_cast(GCConfig::GetSegmentSize()); } else { initial_seg_size = LHEAP_ALLOC; seg_size = static_cast(GCConfig::GetSegmentSize()) / 2; } #ifdef MULTIPLE_HEAPS #ifdef HOST_64BIT if (!large_seg) #endif // HOST_64BIT { if (g_num_processors > 4) initial_seg_size /= 2; if (g_num_processors > 8) initial_seg_size /= 2; } #endif //MULTIPLE_HEAPS // if seg_size is small but not 0 (0 is default if config not set) // then set the segment to the minimum size if (!g_theGCHeap->IsValidSegmentSize(seg_size)) { // if requested size is between 1 byte and 4MB, use min if ((seg_size >> 1) && !(seg_size >> 22)) seg_size = 1024*1024*4; else seg_size = initial_seg_size; } #ifdef HOST_64BIT seg_size = round_up_power2 (seg_size); #else seg_size = round_down_power2 (seg_size); #endif // HOST_64BIT return (seg_size); } #ifndef USE_REGIONS void gc_heap::compute_new_ephemeral_size() { int eph_gen_max = max_generation - 1 - (settings.promotion ? 1 : 0); size_t padding_size = 0; for (int i = 0; i <= eph_gen_max; i++) { dynamic_data* dd = dynamic_data_of (i); total_ephemeral_size += (dd_survived_size (dd) - dd_pinned_survived_size (dd)); #ifdef RESPECT_LARGE_ALIGNMENT total_ephemeral_size += dd_num_npinned_plugs (dd) * switch_alignment_size (FALSE); #endif //RESPECT_LARGE_ALIGNMENT #ifdef FEATURE_STRUCTALIGN total_ephemeral_size += dd_num_npinned_plugs (dd) * MAX_STRUCTALIGN; #endif //FEATURE_STRUCTALIGN #ifdef SHORT_PLUGS padding_size += dd_padding_size (dd); #endif //SHORT_PLUGS } total_ephemeral_size += eph_gen_starts_size; #ifdef RESPECT_LARGE_ALIGNMENT size_t planned_ephemeral_size = heap_segment_plan_allocated (ephemeral_heap_segment) - generation_plan_allocation_start (generation_of (max_generation-1)); total_ephemeral_size = min (total_ephemeral_size, planned_ephemeral_size); #endif //RESPECT_LARGE_ALIGNMENT #ifdef SHORT_PLUGS total_ephemeral_size = Align ((size_t)((double)total_ephemeral_size * short_plugs_pad_ratio) + 1); total_ephemeral_size += Align (DESIRED_PLUG_LENGTH); #endif //SHORT_PLUGS dprintf (3, ("total ephemeral size is %Ix, padding %Ix(%Ix)", total_ephemeral_size, padding_size, (total_ephemeral_size - padding_size))); } #ifdef _MSC_VER #pragma warning(disable:4706) // "assignment within conditional expression" is intentional in this function. #endif // _MSC_VER heap_segment* gc_heap::soh_get_segment_to_expand() { size_t size = soh_segment_size; ordered_plug_indices_init = FALSE; use_bestfit = FALSE; //compute the size of the new ephemeral heap segment. compute_new_ephemeral_size(); if ((settings.pause_mode != pause_low_latency) && (settings.pause_mode != pause_no_gc) #ifdef BACKGROUND_GC && (!gc_heap::background_running_p()) #endif //BACKGROUND_GC ) { assert (settings.condemned_generation <= max_generation); allocator* gen_alloc = ((settings.condemned_generation == max_generation) ? nullptr : generation_allocator (generation_of (max_generation))); dprintf (2, ("(gen%d)soh_get_segment_to_expand", settings.condemned_generation)); // try to find one in the gen 2 segment list, search backwards because the first segments // tend to be more compact than the later ones. heap_segment* fseg = heap_segment_rw (generation_start_segment (generation_of (max_generation))); PREFIX_ASSUME(fseg != NULL); #ifdef SEG_REUSE_STATS int try_reuse = 0; #endif //SEG_REUSE_STATS heap_segment* seg = ephemeral_heap_segment; while ((seg = heap_segment_prev_rw (fseg, seg)) && (seg != fseg)) { #ifdef SEG_REUSE_STATS try_reuse++; #endif //SEG_REUSE_STATS if (can_expand_into_p (seg, size/3, total_ephemeral_size, gen_alloc)) { get_gc_data_per_heap()->set_mechanism (gc_heap_expand, (use_bestfit ? expand_reuse_bestfit : expand_reuse_normal)); if (settings.condemned_generation == max_generation) { if (use_bestfit) { build_ordered_free_spaces (seg); dprintf (GTC_LOG, ("can use best fit")); } #ifdef SEG_REUSE_STATS dprintf (SEG_REUSE_LOG_0, ("(gen%d)soh_get_segment_to_expand: found seg #%d to reuse", settings.condemned_generation, try_reuse)); #endif //SEG_REUSE_STATS dprintf (GTC_LOG, ("max_gen: Found existing segment to expand into %Ix", (size_t)seg)); return seg; } else { #ifdef SEG_REUSE_STATS dprintf (SEG_REUSE_LOG_0, ("(gen%d)soh_get_segment_to_expand: found seg #%d to reuse - returning", settings.condemned_generation, try_reuse)); #endif //SEG_REUSE_STATS dprintf (GTC_LOG, ("max_gen-1: Found existing segment to expand into %Ix", (size_t)seg)); // If we return 0 here, the allocator will think since we are short on end // of seg we need to trigger a full compacting GC. So if sustained low latency // is set we should acquire a new seg instead, that way we wouldn't be short. // The real solution, of course, is to actually implement seg reuse in gen1. if (settings.pause_mode != pause_sustained_low_latency) { dprintf (GTC_LOG, ("max_gen-1: SustainedLowLatency is set, acquire a new seg")); get_gc_data_per_heap()->set_mechanism (gc_heap_expand, expand_next_full_gc); return 0; } } } } } heap_segment* result = get_segment (size, gc_oh_num::soh); if(result) { #ifdef BACKGROUND_GC if (current_c_gc_state == c_gc_state_planning) { // When we expand heap during bgc sweep, we set the seg to be swept so // we'll always look at cards for objects on the new segment. result->flags |= heap_segment_flags_swept; } #endif //BACKGROUND_GC FIRE_EVENT(GCCreateSegment_V1, heap_segment_mem(result), (size_t)(heap_segment_reserved (result) - heap_segment_mem(result)), gc_etw_segment_small_object_heap); } get_gc_data_per_heap()->set_mechanism (gc_heap_expand, (result ? expand_new_seg : expand_no_memory)); if (result == 0) { dprintf (2, ("h%d: failed to allocate a new segment!", heap_number)); } else { #ifdef MULTIPLE_HEAPS heap_segment_heap (result) = this; #endif //MULTIPLE_HEAPS } dprintf (GTC_LOG, ("(gen%d)creating new segment %Ix", settings.condemned_generation, result)); return result; } #endif //!USE_REGIONS #ifdef _MSC_VER #pragma warning(default:4706) #endif // _MSC_VER //returns 0 in case of allocation failure heap_segment* gc_heap::get_segment (size_t size, gc_oh_num oh) { assert(oh != gc_oh_num::none); BOOL uoh_p = (oh == gc_oh_num::loh) || (oh == gc_oh_num::poh); if (heap_hard_limit) return NULL; heap_segment* result = 0; if (segment_standby_list != 0) { result = segment_standby_list; heap_segment* last = 0; while (result) { size_t hs = (size_t)(heap_segment_reserved (result) - (uint8_t*)result); if ((hs >= size) && ((hs / 2) < size)) { dprintf (2, ("Hoarded segment %Ix found", (size_t) result)); if (last) { heap_segment_next (last) = heap_segment_next (result); } else { segment_standby_list = heap_segment_next (result); } break; } else { last = result; result = heap_segment_next (result); } } } if (result) { init_heap_segment (result, __this #ifdef USE_REGIONS , 0, size, (uoh_p ? max_generation : 0) #endif //USE_REGIONS ); #ifdef BACKGROUND_GC if (is_bgc_in_progress()) { dprintf (GC_TABLE_LOG, ("hoarded seg %Ix, mark_array is %Ix", result, mark_array)); if (!commit_mark_array_new_seg (__this, result)) { dprintf (GC_TABLE_LOG, ("failed to commit mark array for hoarded seg")); // If we can't use it we need to thread it back. if (segment_standby_list != 0) { heap_segment_next (result) = segment_standby_list; segment_standby_list = result; } else { segment_standby_list = result; } result = 0; } } #endif //BACKGROUND_GC if (result) seg_mapping_table_add_segment (result, __this); } if (!result) { void* mem = virtual_alloc (size); if (!mem) { fgm_result.set_fgm (fgm_reserve_segment, size, uoh_p); return 0; } result = make_heap_segment ((uint8_t*)mem, size, __this, (uoh_p ? max_generation : 0)); if (result) { uint8_t* start; uint8_t* end; if (mem < g_gc_lowest_address) { start = (uint8_t*)mem; } else { start = (uint8_t*)g_gc_lowest_address; } if (((uint8_t*)mem + size) > g_gc_highest_address) { end = (uint8_t*)mem + size; } else { end = (uint8_t*)g_gc_highest_address; } if (gc_heap::grow_brick_card_tables (start, end, size, result, __this, uoh_p) != 0) { virtual_free (mem, size); return 0; } } else { fgm_result.set_fgm (fgm_commit_segment_beg, SEGMENT_INITIAL_COMMIT, uoh_p); virtual_free (mem, size); } if (result) { seg_mapping_table_add_segment (result, __this); } } #ifdef BACKGROUND_GC if (result) { ::record_changed_seg ((uint8_t*)result, heap_segment_reserved (result), settings.gc_index, current_bgc_state, seg_added); bgc_verify_mark_array_cleared (result); } #endif //BACKGROUND_GC dprintf (GC_TABLE_LOG, ("h%d: new seg: %Ix-%Ix (%Id)", heap_number, result, ((uint8_t*)result + size), size)); return result; } void gc_heap::release_segment (heap_segment* sg) { ptrdiff_t delta = 0; FIRE_EVENT(GCFreeSegment_V1, heap_segment_mem(sg)); virtual_free (sg, (uint8_t*)heap_segment_reserved (sg)-(uint8_t*)sg, sg); } heap_segment* gc_heap::get_segment_for_uoh (int gen_number, size_t size #ifdef MULTIPLE_HEAPS , gc_heap* hp #endif //MULTIPLE_HEAPS ) { #ifndef MULTIPLE_HEAPS gc_heap* hp = 0; #endif //MULTIPLE_HEAPS #ifdef USE_REGIONS heap_segment* res = hp->get_new_region (gen_number, size); #else //USE_REGIONS gc_oh_num oh = gen_to_oh (gen_number); heap_segment* res = hp->get_segment (size, oh); #endif //USE_REGIONS if (res != 0) { #ifdef MULTIPLE_HEAPS heap_segment_heap (res) = hp; #endif //MULTIPLE_HEAPS size_t flags = (gen_number == poh_generation) ? heap_segment_flags_poh : heap_segment_flags_loh; #ifdef USE_REGIONS // in the regions case, flags are set by get_new_region assert ((res->flags & (heap_segment_flags_loh | heap_segment_flags_poh)) == flags); #else //USE_REGIONS res->flags |= flags; #endif //USE_REGIONS FIRE_EVENT(GCCreateSegment_V1, heap_segment_mem(res), (size_t)(heap_segment_reserved (res) - heap_segment_mem(res)), (gen_number == poh_generation) ? gc_etw_segment_pinned_object_heap : gc_etw_segment_large_object_heap); #ifndef USE_REGIONS #ifdef MULTIPLE_HEAPS hp->thread_uoh_segment (gen_number, res); #else thread_uoh_segment (gen_number, res); #endif //MULTIPLE_HEAPS #endif //!USE_REGIONS GCToEEInterface::DiagAddNewRegion( gen_number, heap_segment_mem (res), heap_segment_allocated (res), heap_segment_reserved (res) ); } return res; } void gc_heap::thread_uoh_segment (int gen_number, heap_segment* new_seg) { heap_segment* seg = generation_allocation_segment (generation_of (gen_number)); while (heap_segment_next_rw (seg)) seg = heap_segment_next_rw (seg); heap_segment_next (seg) = new_seg; } heap_segment* gc_heap::get_uoh_segment (int gen_number, size_t size, BOOL* did_full_compact_gc) { *did_full_compact_gc = FALSE; size_t last_full_compact_gc_count = get_full_compact_gc_count(); //access to get_segment needs to be serialized add_saved_spinlock_info (true, me_release, mt_get_large_seg); leave_spin_lock (&more_space_lock_uoh); enter_spin_lock (&gc_heap::gc_lock); dprintf (SPINLOCK_LOG, ("[%d]Seg: Egc", heap_number)); // if a GC happened between here and before we ask for a segment in // get_uoh_segment, we need to count that GC. size_t current_full_compact_gc_count = get_full_compact_gc_count(); if (current_full_compact_gc_count > last_full_compact_gc_count) { *did_full_compact_gc = TRUE; } heap_segment* res = get_segment_for_uoh (gen_number, size #ifdef MULTIPLE_HEAPS , this #endif //MULTIPLE_HEAPS ); dprintf (SPINLOCK_LOG, ("[%d]Seg: A Lgc", heap_number)); leave_spin_lock (&gc_heap::gc_lock); enter_spin_lock (&more_space_lock_uoh); add_saved_spinlock_info (true, me_acquire, mt_get_large_seg); return res; } #ifdef MULTIPLE_HEAPS #ifdef HOST_X86 #ifdef _MSC_VER #pragma warning(disable:4035) static ptrdiff_t get_cycle_count() { __asm rdtsc } #pragma warning(default:4035) #elif defined(__GNUC__) static ptrdiff_t get_cycle_count() { ptrdiff_t cycles; ptrdiff_t cyclesHi; __asm__ __volatile__ ("rdtsc":"=a" (cycles), "=d" (cyclesHi)); return cycles; } #else //_MSC_VER #error Unknown compiler #endif //_MSC_VER #elif defined(TARGET_AMD64) #ifdef _MSC_VER extern "C" uint64_t __rdtsc(); #pragma intrinsic(__rdtsc) static ptrdiff_t get_cycle_count() { return (ptrdiff_t)__rdtsc(); } #elif defined(__GNUC__) static ptrdiff_t get_cycle_count() { ptrdiff_t cycles; ptrdiff_t cyclesHi; __asm__ __volatile__ ("rdtsc":"=a" (cycles), "=d" (cyclesHi)); return (cyclesHi << 32) | cycles; } #else // _MSC_VER extern "C" ptrdiff_t get_cycle_count(void); #endif // _MSC_VER #else static ptrdiff_t get_cycle_count() { // @ARMTODO, @ARM64TODO, @WASMTODO: cycle counter is not exposed to user mode. For now (until we can show this // makes a difference on the configurations on which we'll run) just return 0. This will result in // all buffer access times being reported as equal in access_time(). return 0; } #endif //TARGET_X86 // We may not be on contiguous numa nodes so need to store // the node index as well. struct node_heap_count { int node_no; int heap_count; }; class heap_select { heap_select() {} public: static uint8_t* sniff_buffer; static unsigned n_sniff_buffers; static unsigned cur_sniff_index; static uint16_t proc_no_to_heap_no[MAX_SUPPORTED_CPUS]; static uint16_t heap_no_to_proc_no[MAX_SUPPORTED_CPUS]; static uint16_t heap_no_to_numa_node[MAX_SUPPORTED_CPUS]; static uint16_t proc_no_to_numa_node[MAX_SUPPORTED_CPUS]; static uint16_t numa_node_to_heap_map[MAX_SUPPORTED_CPUS+4]; // Note this is the total numa nodes GC heaps are on. There might be // more on the machine if GC threads aren't using all of them. static uint16_t total_numa_nodes; static node_heap_count heaps_on_node[MAX_SUPPORTED_NODES]; static int access_time(uint8_t *sniff_buffer, int heap_number, unsigned sniff_index, unsigned n_sniff_buffers) { ptrdiff_t start_cycles = get_cycle_count(); uint8_t sniff = sniff_buffer[(1 + heap_number*n_sniff_buffers + sniff_index)*HS_CACHE_LINE_SIZE]; assert (sniff == 0); ptrdiff_t elapsed_cycles = get_cycle_count() - start_cycles; // add sniff here just to defeat the optimizer elapsed_cycles += sniff; return (int) elapsed_cycles; } public: static BOOL init(int n_heaps) { assert (sniff_buffer == NULL && n_sniff_buffers == 0); if (!GCToOSInterface::CanGetCurrentProcessorNumber()) { n_sniff_buffers = n_heaps*2+1; size_t n_cache_lines = 1 + n_heaps * n_sniff_buffers + 1; size_t sniff_buf_size = n_cache_lines * HS_CACHE_LINE_SIZE; if (sniff_buf_size / HS_CACHE_LINE_SIZE != n_cache_lines) // check for overlow { return FALSE; } sniff_buffer = new (nothrow) uint8_t[sniff_buf_size]; if (sniff_buffer == 0) return FALSE; memset(sniff_buffer, 0, sniff_buf_size*sizeof(uint8_t)); } bool do_numa = GCToOSInterface::CanEnableGCNumaAware(); // we want to assign heap indices such that there is a contiguous // range of heap numbers for each numa node // we do this in two passes: // 1. gather processor numbers and numa node numbers for all heaps // 2. assign heap numbers for each numa node // Pass 1: gather processor numbers and numa node numbers uint16_t proc_no[MAX_SUPPORTED_CPUS]; uint16_t node_no[MAX_SUPPORTED_CPUS]; uint16_t max_node_no = 0; for (int i = 0; i < n_heaps; i++) { if (!GCToOSInterface::GetProcessorForHeap (i, &proc_no[i], &node_no[i])) break; if (!do_numa || node_no[i] == NUMA_NODE_UNDEFINED) node_no[i] = 0; max_node_no = max(max_node_no, node_no[i]); } // Pass 2: assign heap numbers by numa node int cur_heap_no = 0; for (uint16_t cur_node_no = 0; cur_node_no <= max_node_no; cur_node_no++) { for (int i = 0; i < n_heaps; i++) { if (node_no[i] != cur_node_no) continue; // we found a heap on cur_node_no heap_no_to_proc_no[cur_heap_no] = proc_no[i]; heap_no_to_numa_node[cur_heap_no] = cur_node_no; proc_no_to_numa_node[proc_no[i]] = cur_node_no; cur_heap_no++; } } return TRUE; } static void init_cpu_mapping(int heap_number) { if (GCToOSInterface::CanGetCurrentProcessorNumber()) { uint32_t proc_no = GCToOSInterface::GetCurrentProcessorNumber(); proc_no_to_heap_no[proc_no] = (uint16_t)heap_number; } } static void mark_heap(int heap_number) { if (GCToOSInterface::CanGetCurrentProcessorNumber()) return; for (unsigned sniff_index = 0; sniff_index < n_sniff_buffers; sniff_index++) sniff_buffer[(1 + heap_number*n_sniff_buffers + sniff_index)*HS_CACHE_LINE_SIZE] &= 1; } static int select_heap(alloc_context* acontext) { #ifndef TRACE_GC UNREFERENCED_PARAMETER(acontext); // only referenced by dprintf #endif //TRACE_GC if (GCToOSInterface::CanGetCurrentProcessorNumber()) { uint32_t proc_no = GCToOSInterface::GetCurrentProcessorNumber(); return proc_no_to_heap_no[proc_no]; } unsigned sniff_index = Interlocked::Increment(&cur_sniff_index); sniff_index %= n_sniff_buffers; int best_heap = 0; int best_access_time = 1000*1000*1000; int second_best_access_time = best_access_time; uint8_t *l_sniff_buffer = sniff_buffer; unsigned l_n_sniff_buffers = n_sniff_buffers; for (int heap_number = 0; heap_number < gc_heap::n_heaps; heap_number++) { int this_access_time = access_time(l_sniff_buffer, heap_number, sniff_index, l_n_sniff_buffers); if (this_access_time < best_access_time) { second_best_access_time = best_access_time; best_access_time = this_access_time; best_heap = heap_number; } else if (this_access_time < second_best_access_time) { second_best_access_time = this_access_time; } } if (best_access_time*2 < second_best_access_time) { sniff_buffer[(1 + best_heap*n_sniff_buffers + sniff_index)*HS_CACHE_LINE_SIZE] &= 1; dprintf (3, ("select_heap yields crisp %d for context %p\n", best_heap, (void *)acontext)); } else { dprintf (3, ("select_heap yields vague %d for context %p\n", best_heap, (void *)acontext )); } return best_heap; } static bool can_find_heap_fast() { return GCToOSInterface::CanGetCurrentProcessorNumber(); } static uint16_t find_heap_no_from_proc_no(uint16_t proc_no) { return proc_no_to_heap_no[proc_no]; } static uint16_t find_proc_no_from_heap_no(int heap_number) { return heap_no_to_proc_no[heap_number]; } static void set_proc_no_for_heap(int heap_number, uint16_t proc_no) { heap_no_to_proc_no[heap_number] = proc_no; } static uint16_t find_numa_node_from_heap_no(int heap_number) { return heap_no_to_numa_node[heap_number]; } static uint16_t find_numa_node_from_proc_no (uint16_t proc_no) { return proc_no_to_numa_node[proc_no]; } static void set_numa_node_for_heap_and_proc(int heap_number, uint16_t proc_no, uint16_t numa_node) { heap_no_to_numa_node[heap_number] = numa_node; proc_no_to_numa_node[proc_no] = numa_node; } static void init_numa_node_to_heap_map(int nheaps) { // Called right after GCHeap::Init() for each heap // For each NUMA node used by the heaps, the // numa_node_to_heap_map[numa_node] is set to the first heap number on that node and // numa_node_to_heap_map[numa_node + 1] is set to the first heap number not on that node // Set the start of the heap number range for the first NUMA node numa_node_to_heap_map[heap_no_to_numa_node[0]] = 0; total_numa_nodes = 0; memset (heaps_on_node, 0, sizeof (heaps_on_node)); heaps_on_node[0].node_no = heap_no_to_numa_node[0]; heaps_on_node[0].heap_count = 1; for (int i=1; i < nheaps; i++) { if (heap_no_to_numa_node[i] != heap_no_to_numa_node[i-1]) { total_numa_nodes++; heaps_on_node[total_numa_nodes].node_no = heap_no_to_numa_node[i]; // Set the end of the heap number range for the previous NUMA node numa_node_to_heap_map[heap_no_to_numa_node[i-1] + 1] = // Set the start of the heap number range for the current NUMA node numa_node_to_heap_map[heap_no_to_numa_node[i]] = (uint16_t)i; } (heaps_on_node[total_numa_nodes].heap_count)++; } // Set the end of the heap range for the last NUMA node numa_node_to_heap_map[heap_no_to_numa_node[nheaps-1] + 1] = (uint16_t)nheaps; //mark the end with nheaps total_numa_nodes++; } // TODO: curently this doesn't work with GCHeapAffinitizeMask/GCHeapAffinitizeRanges // because the heaps may not be on contiguous active procs. // // This is for scenarios where GCHeapCount is specified as something like // (g_num_active_processors - 2) to allow less randomization to the Server GC threads. // In this case we want to assign the right heaps to those procs, ie if they share // the same numa node we want to assign local heaps to those procs. Otherwise we // let the heap balancing mechanism take over for now. static void distribute_other_procs() { if (affinity_config_specified_p) return; uint16_t proc_no = 0; uint16_t node_no = 0; bool res = false; int start_heap = -1; int end_heap = -1; int current_node_no = -1; int current_heap_on_node = -1; for (int i = gc_heap::n_heaps; i < (int)g_num_active_processors; i++) { if (!GCToOSInterface::GetProcessorForHeap (i, &proc_no, &node_no)) break; int start_heap = (int)numa_node_to_heap_map[node_no]; int end_heap = (int)(numa_node_to_heap_map[node_no + 1]); if ((end_heap - start_heap) > 0) { if (node_no == current_node_no) { // We already iterated through all heaps on this node, don't add more procs to these // heaps. if (current_heap_on_node >= end_heap) { continue; } } else { current_node_no = node_no; current_heap_on_node = start_heap; } proc_no_to_heap_no[proc_no] = current_heap_on_node; proc_no_to_numa_node[proc_no] = node_no; current_heap_on_node++; } } } static void get_heap_range_for_heap(int hn, int* start, int* end) { uint16_t numa_node = heap_no_to_numa_node[hn]; *start = (int)numa_node_to_heap_map[numa_node]; *end = (int)(numa_node_to_heap_map[numa_node+1]); #ifdef HEAP_BALANCE_INSTRUMENTATION dprintf(HEAP_BALANCE_TEMP_LOG, ("TEMPget_heap_range: %d is in numa node %d, start = %d, end = %d", hn, numa_node, *start, *end)); #endif //HEAP_BALANCE_INSTRUMENTATION } // This gets the next valid numa node index starting at current_index+1. // It assumes that current_index is a valid node index. // If current_index+1 is at the end this will start at the beginning. So this will // always return a valid node index, along with that node's start/end heaps. static uint16_t get_next_numa_node (uint16_t current_index, int* start, int* end) { int start_index = current_index + 1; int nheaps = gc_heap::n_heaps; bool found_node_with_heaps_p = false; do { int start_heap = (int)numa_node_to_heap_map[start_index]; int end_heap = (int)numa_node_to_heap_map[start_index + 1]; if (start_heap == nheaps) { // This is the last node. start_index = 0; continue; } if ((end_heap - start_heap) == 0) { // This node has no heaps. start_index++; } else { found_node_with_heaps_p = true; *start = start_heap; *end = end_heap; } } while (!found_node_with_heaps_p); return start_index; } }; uint8_t* heap_select::sniff_buffer; unsigned heap_select::n_sniff_buffers; unsigned heap_select::cur_sniff_index; uint16_t heap_select::proc_no_to_heap_no[MAX_SUPPORTED_CPUS]; uint16_t heap_select::heap_no_to_proc_no[MAX_SUPPORTED_CPUS]; uint16_t heap_select::heap_no_to_numa_node[MAX_SUPPORTED_CPUS]; uint16_t heap_select::proc_no_to_numa_node[MAX_SUPPORTED_CPUS]; uint16_t heap_select::numa_node_to_heap_map[MAX_SUPPORTED_CPUS+4]; uint16_t heap_select::total_numa_nodes; node_heap_count heap_select::heaps_on_node[MAX_SUPPORTED_NODES]; #ifdef HEAP_BALANCE_INSTRUMENTATION // This records info we use to look at effect of different strategies // for heap balancing. struct heap_balance_info { uint64_t timestamp; // This also encodes when we detect the thread runs on // different proc during a balance attempt. Sometimes // I observe this happens multiple times during one attempt! // If this happens, I just record the last proc we observe // and set MSB. int tid; // This records the final alloc_heap for the thread. // // This also encodes the reason why we needed to set_home_heap // in balance_heaps. // If we set it because the home heap is not the same as the proc, // we set MSB. // // If we set ideal proc, we set the 2nd MSB. int alloc_heap; int ideal_proc_no; }; // This means inbetween each GC we can log at most this many entries per proc. // This is usually enough. Most of the time we only need to log something every 128k // of allocations in balance_heaps and gen0 budget is <= 200mb. #define default_max_hb_heap_balance_info 4096 struct heap_balance_info_proc { int count; int index; heap_balance_info hb_info[default_max_hb_heap_balance_info]; }; struct heap_balance_info_numa { heap_balance_info_proc* hb_info_procs; }; uint64_t start_raw_ts = 0; bool cpu_group_enabled_p = false; uint32_t procs_per_numa_node = 0; uint16_t total_numa_nodes_on_machine = 0; uint32_t procs_per_cpu_group = 0; uint16_t total_cpu_groups_on_machine = 0; // Note this is still on one of the numa nodes, so we'll incur a remote access // no matter what. heap_balance_info_numa* hb_info_numa_nodes = NULL; // TODO: This doesn't work for multiple nodes per CPU group yet. int get_proc_index_numa (int proc_no, int* numa_no) { if (total_numa_nodes_on_machine == 1) { *numa_no = 0; return proc_no; } else { if (cpu_group_enabled_p) { // see vm\gcenv.os.cpp GroupProcNo implementation. *numa_no = proc_no >> 6; return (proc_no % 64); } else { *numa_no = proc_no / procs_per_numa_node; return (proc_no % procs_per_numa_node); } } } // We could consider optimizing it so we don't need to get the tid // everytime but it's not very expensive to get. void add_to_hb_numa ( int proc_no, int ideal_proc_no, int alloc_heap, bool multiple_procs_p, bool alloc_count_p, bool set_ideal_p) { int tid = (int)GCToOSInterface::GetCurrentThreadIdForLogging (); uint64_t timestamp = RawGetHighPrecisionTimeStamp (); int saved_proc_no = proc_no; int numa_no = -1; proc_no = get_proc_index_numa (proc_no, &numa_no); heap_balance_info_numa* hb_info_numa_node = &hb_info_numa_nodes[numa_no]; heap_balance_info_proc* hb_info_proc = &(hb_info_numa_node->hb_info_procs[proc_no]); int index = hb_info_proc->index; int count = hb_info_proc->count; if (index == count) { // Too much info inbetween GCs. This can happen if the thread is scheduled on a different // processor very often so it caused us to log many entries due to that reason. You could // increase default_max_hb_heap_balance_info but this usually indicates a problem that // should be investigated. dprintf (HEAP_BALANCE_LOG, ("too much info between GCs, already logged %d entries", index)); GCToOSInterface::DebugBreak (); } heap_balance_info* hb_info = &(hb_info_proc->hb_info[index]); dprintf (HEAP_BALANCE_TEMP_LOG, ("TEMP[p%3d->%3d(i:%3d), N%d] #%4d: %I64d, tid %d, ah: %d, m: %d, p: %d, i: %d", saved_proc_no, proc_no, ideal_proc_no, numa_no, index, (timestamp - start_raw_ts) / 1000, tid, alloc_heap, (int)multiple_procs_p, (int)(!alloc_count_p), (int)set_ideal_p)); if (multiple_procs_p) { tid |= (1 << (sizeof (tid) * 8 - 1)); } if (!alloc_count_p) { alloc_heap |= (1 << (sizeof (alloc_heap) * 8 - 1)); } if (set_ideal_p) { alloc_heap |= (1 << (sizeof (alloc_heap) * 8 - 2)); } hb_info->timestamp = timestamp; hb_info->tid = tid; hb_info->alloc_heap = alloc_heap; hb_info->ideal_proc_no = ideal_proc_no; (hb_info_proc->index)++; } const int hb_log_buffer_size = 4096; static char hb_log_buffer[hb_log_buffer_size]; int last_hb_recorded_gc_index = -1; #endif //HEAP_BALANCE_INSTRUMENTATION // This logs what we recorded in balance_heaps // The format for this is // // [ms since last GC end] // [cpu index] // all elements we stored before this GC for this CPU in the format // timestamp,tid, alloc_heap_no // repeat this for each CPU // // the timestamp here is just the result of calling QPC, // it's not converted to ms. The conversion will be done when we process // the log. void gc_heap::hb_log_balance_activities() { #ifdef HEAP_BALANCE_INSTRUMENTATION char* log_buffer = hb_log_buffer; uint64_t now = GetHighPrecisionTimeStamp(); size_t time_since_last_gc_ms = (size_t)((now - last_gc_end_time_us) / 1000); dprintf (HEAP_BALANCE_TEMP_LOG, ("TEMP%Id - %Id = %Id", now, last_gc_end_time_ms, time_since_last_gc_ms)); // We want to get the min and the max timestamp for all procs because it helps with our post processing // to know how big an array to allocate to display the history inbetween the GCs. uint64_t min_timestamp = 0xffffffffffffffff; uint64_t max_timestamp = 0; for (int numa_node_index = 0; numa_node_index < total_numa_nodes_on_machine; numa_node_index++) { heap_balance_info_proc* hb_info_procs = hb_info_numa_nodes[numa_node_index].hb_info_procs; for (int proc_index = 0; proc_index < (int)procs_per_numa_node; proc_index++) { heap_balance_info_proc* hb_info_proc = &hb_info_procs[proc_index]; int total_entries_on_proc = hb_info_proc->index; if (total_entries_on_proc > 0) { min_timestamp = min (min_timestamp, hb_info_proc->hb_info[0].timestamp); max_timestamp = max (max_timestamp, hb_info_proc->hb_info[total_entries_on_proc - 1].timestamp); } } } dprintf (HEAP_BALANCE_LOG, ("[GCA#%Id %Id-%I64d-%I64d]", settings.gc_index, time_since_last_gc_ms, (min_timestamp - start_raw_ts), (max_timestamp - start_raw_ts))); if (last_hb_recorded_gc_index == (int)settings.gc_index) { GCToOSInterface::DebugBreak (); } last_hb_recorded_gc_index = (int)settings.gc_index; // When we print out the proc index we need to convert it to the actual proc index (this is contiguous). // It helps with post processing. for (int numa_node_index = 0; numa_node_index < total_numa_nodes_on_machine; numa_node_index++) { heap_balance_info_proc* hb_info_procs = hb_info_numa_nodes[numa_node_index].hb_info_procs; for (int proc_index = 0; proc_index < (int)procs_per_numa_node; proc_index++) { heap_balance_info_proc* hb_info_proc = &hb_info_procs[proc_index]; int total_entries_on_proc = hb_info_proc->index; if (total_entries_on_proc > 0) { int total_exec_time_ms = (int)((double)(hb_info_proc->hb_info[total_entries_on_proc - 1].timestamp - hb_info_proc->hb_info[0].timestamp) * qpf_ms); dprintf (HEAP_BALANCE_LOG, ("[p%d]-%d-%dms", (proc_index + numa_node_index * procs_per_numa_node), total_entries_on_proc, total_exec_time_ms)); } for (int i = 0; i < hb_info_proc->index; i++) { heap_balance_info* hb_info = &hb_info_proc->hb_info[i]; bool multiple_procs_p = false; bool alloc_count_p = true; bool set_ideal_p = false; int tid = hb_info->tid; int alloc_heap = hb_info->alloc_heap; if (tid & (1 << (sizeof (tid) * 8 - 1))) { multiple_procs_p = true; tid &= ~(1 << (sizeof (tid) * 8 - 1)); } if (alloc_heap & (1 << (sizeof (alloc_heap) * 8 - 1))) { alloc_count_p = false; alloc_heap &= ~(1 << (sizeof (alloc_heap) * 8 - 1)); } if (alloc_heap & (1 << (sizeof (alloc_heap) * 8 - 2))) { set_ideal_p = true; alloc_heap &= ~(1 << (sizeof (alloc_heap) * 8 - 2)); } // TODO - This assumes ideal proc is in the same cpu group which is not true // when we don't have CPU groups. int ideal_proc_no = hb_info->ideal_proc_no; int ideal_node_no = -1; ideal_proc_no = get_proc_index_numa (ideal_proc_no, &ideal_node_no); ideal_proc_no = ideal_proc_no + ideal_node_no * procs_per_numa_node; dprintf (HEAP_BALANCE_LOG, ("%I64d,%d,%d,%d%s%s%s", (hb_info->timestamp - start_raw_ts), tid, ideal_proc_no, (int)alloc_heap, (multiple_procs_p ? "|m" : ""), (!alloc_count_p ? "|p" : ""), (set_ideal_p ? "|i" : ""))); } } } for (int numa_node_index = 0; numa_node_index < total_numa_nodes_on_machine; numa_node_index++) { heap_balance_info_proc* hb_info_procs = hb_info_numa_nodes[numa_node_index].hb_info_procs; for (int proc_index = 0; proc_index < (int)procs_per_numa_node; proc_index++) { heap_balance_info_proc* hb_info_proc = &hb_info_procs[proc_index]; hb_info_proc->index = 0; } } #endif //HEAP_BALANCE_INSTRUMENTATION } // The format for this is // // [GC_alloc_mb] // h0_new_alloc, h1_new_alloc, ... // void gc_heap::hb_log_new_allocation() { #ifdef HEAP_BALANCE_INSTRUMENTATION char* log_buffer = hb_log_buffer; int desired_alloc_mb = (int)(dd_desired_allocation (g_heaps[0]->dynamic_data_of (0)) / 1024 / 1024); int buffer_pos = sprintf_s (hb_log_buffer, hb_log_buffer_size, "[GC_alloc_mb]\n"); for (int numa_node_index = 0; numa_node_index < heap_select::total_numa_nodes; numa_node_index++) { int node_allocated_mb = 0; // I'm printing out the budget here instead of the numa node index so we know how much // of the budget we consumed. buffer_pos += sprintf_s (hb_log_buffer + buffer_pos, hb_log_buffer_size - buffer_pos, "[N#%3d]", //numa_node_index); desired_alloc_mb); int heaps_on_node = heap_select::heaps_on_node[numa_node_index].heap_count; for (int heap_index = 0; heap_index < heaps_on_node; heap_index++) { int actual_heap_index = heap_index + numa_node_index * heaps_on_node; gc_heap* hp = g_heaps[actual_heap_index]; dynamic_data* dd0 = hp->dynamic_data_of (0); int allocated_mb = (int)((dd_desired_allocation (dd0) - dd_new_allocation (dd0)) / 1024 / 1024); node_allocated_mb += allocated_mb; buffer_pos += sprintf_s (hb_log_buffer + buffer_pos, hb_log_buffer_size - buffer_pos, "%d,", allocated_mb); } dprintf (HEAP_BALANCE_TEMP_LOG, ("TEMPN#%d a %dmb(%dmb)", numa_node_index, node_allocated_mb, desired_alloc_mb)); buffer_pos += sprintf_s (hb_log_buffer + buffer_pos, hb_log_buffer_size - buffer_pos, "\n"); } dprintf (HEAP_BALANCE_LOG, ("%s", hb_log_buffer)); #endif //HEAP_BALANCE_INSTRUMENTATION } BOOL gc_heap::create_thread_support (int number_of_heaps) { BOOL ret = FALSE; if (!gc_start_event.CreateOSManualEventNoThrow (FALSE)) { goto cleanup; } if (!ee_suspend_event.CreateOSAutoEventNoThrow (FALSE)) { goto cleanup; } if (!gc_t_join.init (number_of_heaps, join_flavor_server_gc)) { goto cleanup; } ret = TRUE; cleanup: if (!ret) { destroy_thread_support(); } return ret; } void gc_heap::destroy_thread_support () { if (ee_suspend_event.IsValid()) { ee_suspend_event.CloseEvent(); } if (gc_start_event.IsValid()) { gc_start_event.CloseEvent(); } } void set_thread_affinity_for_heap (int heap_number, uint16_t proc_no) { if (!GCToOSInterface::SetThreadAffinity (proc_no)) { dprintf (1, ("Failed to set thread affinity for GC thread %d on proc #%d", heap_number, proc_no)); } } bool gc_heap::create_gc_thread () { dprintf (3, ("Creating gc thread\n")); return GCToEEInterface::CreateThread(gc_thread_stub, this, false, ".NET Server GC"); } #ifdef _MSC_VER #pragma warning(disable:4715) //IA64 xcompiler recognizes that without the 'break;' the while(1) will never end and therefore not return a value for that code path #endif //_MSC_VER void gc_heap::gc_thread_function () { assert (gc_done_event.IsValid()); assert (gc_start_event.IsValid()); dprintf (3, ("gc thread started")); heap_select::init_cpu_mapping(heap_number); while (1) { assert (!gc_t_join.joined()); if (heap_number == 0) { uint32_t wait_result = gc_heap::ee_suspend_event.Wait(gradual_decommit_in_progress_p ? DECOMMIT_TIME_STEP_MILLISECONDS : INFINITE, FALSE); if (wait_result == WAIT_TIMEOUT) { gradual_decommit_in_progress_p = decommit_step (); continue; } suspended_start_time = GetHighPrecisionTimeStamp(); BEGIN_TIMING(suspend_ee_during_log); GCToEEInterface::SuspendEE(SUSPEND_FOR_GC); END_TIMING(suspend_ee_during_log); proceed_with_gc_p = TRUE; gradual_decommit_in_progress_p = FALSE; if (!should_proceed_with_gc()) { update_collection_counts_for_no_gc(); proceed_with_gc_p = FALSE; } else { settings.init_mechanisms(); gc_start_event.Set(); } dprintf (3, (ThreadStressLog::gcServerThread0StartMsg(), heap_number)); } else { gc_start_event.Wait(INFINITE, FALSE); dprintf (3, (ThreadStressLog::gcServerThreadNStartMsg(), heap_number)); } assert ((heap_number == 0) || proceed_with_gc_p); if (proceed_with_gc_p) { garbage_collect (GCHeap::GcCondemnedGeneration); if (pm_trigger_full_gc) { garbage_collect_pm_full_gc(); } } if (heap_number == 0) { if (proceed_with_gc_p && (!settings.concurrent)) { do_post_gc(); } #ifdef BACKGROUND_GC recover_bgc_settings(); #endif //BACKGROUND_GC #ifdef MULTIPLE_HEAPS for (int i = 0; i < gc_heap::n_heaps; i++) { gc_heap* hp = gc_heap::g_heaps[i]; hp->add_saved_spinlock_info (false, me_release, mt_block_gc); leave_spin_lock(&hp->more_space_lock_soh); } #endif //MULTIPLE_HEAPS gc_heap::gc_started = FALSE; #ifdef BACKGROUND_GC gc_heap::add_bgc_pause_duration_0(); #endif //BACKGROUND_GC BEGIN_TIMING(restart_ee_during_log); GCToEEInterface::RestartEE(TRUE); END_TIMING(restart_ee_during_log); process_sync_log_stats(); dprintf (SPINLOCK_LOG, ("GC Lgc")); leave_spin_lock (&gc_heap::gc_lock); gc_heap::internal_gc_done = true; if (proceed_with_gc_p) set_gc_done(); else { // If we didn't actually do a GC, it means we didn't wait up the other threads, // we still need to set the gc_done_event for those threads. for (int i = 0; i < gc_heap::n_heaps; i++) { gc_heap* hp = gc_heap::g_heaps[i]; hp->set_gc_done(); } } // check if we should do some decommitting if (gradual_decommit_in_progress_p) { gradual_decommit_in_progress_p = decommit_step (); } } else { int spin_count = 32 * (gc_heap::n_heaps - 1); // wait until RestartEE has progressed to a stage where we can restart user threads while (!gc_heap::internal_gc_done && !GCHeap::SafeToRestartManagedThreads()) { spin_and_switch (spin_count, (gc_heap::internal_gc_done || GCHeap::SafeToRestartManagedThreads())); } set_gc_done(); } } } #ifdef _MSC_VER #pragma warning(default:4715) //IA64 xcompiler recognizes that without the 'break;' the while(1) will never end and therefore not return a value for that code path #endif //_MSC_VER #endif //MULTIPLE_HEAPS bool gc_heap::virtual_alloc_commit_for_heap (void* addr, size_t size, int h_number) { #if defined(MULTIPLE_HEAPS) && !defined(FEATURE_REDHAWK) // Currently there is no way for us to specific the numa node to allocate on via hosting interfaces to // a host. This will need to be added later. #if !defined(FEATURE_CORECLR) && !defined(BUILD_AS_STANDALONE) if (!CLRMemoryHosted()) #endif { if (GCToOSInterface::CanEnableGCNumaAware()) { uint16_t numa_node = heap_select::find_numa_node_from_heap_no(h_number); if (GCToOSInterface::VirtualCommit (addr, size, numa_node)) return true; } } #else //MULTIPLE_HEAPS && !FEATURE_REDHAWK UNREFERENCED_PARAMETER(h_number); #endif //MULTIPLE_HEAPS && !FEATURE_REDHAWK //numa aware not enabled, or call failed --> fallback to VirtualCommit() return GCToOSInterface::VirtualCommit(addr, size); } bool gc_heap::virtual_commit (void* address, size_t size, gc_oh_num oh, int h_number, bool* hard_limit_exceeded_p) { #ifndef HOST_64BIT assert (heap_hard_limit == 0); #endif //!HOST_64BIT if (heap_hard_limit) { check_commit_cs.Enter(); bool exceeded_p = false; if (heap_hard_limit_oh[soh] != 0) { if ((oh != gc_oh_num::none) && (committed_by_oh[oh] + size) > heap_hard_limit_oh[oh]) { exceeded_p = true; } } else if ((current_total_committed + size) > heap_hard_limit) { dprintf (1, ("%Id + %Id = %Id > limit %Id ", current_total_committed, size, (current_total_committed + size), heap_hard_limit)); exceeded_p = true; } if (!exceeded_p) { committed_by_oh[oh] += size; current_total_committed += size; if (h_number < 0) current_total_committed_bookkeeping += size; } check_commit_cs.Leave(); if (hard_limit_exceeded_p) *hard_limit_exceeded_p = exceeded_p; if (exceeded_p) { dprintf (1, ("can't commit %Ix for %Id bytes > HARD LIMIT %Id", (size_t)address, size, heap_hard_limit)); return false; } } // If it's a valid heap number it means it's commiting for memory on the GC heap. // In addition if large pages is enabled, we set commit_succeeded_p to true because memory is already committed. bool commit_succeeded_p = ((h_number >= 0) ? (use_large_pages_p ? true : virtual_alloc_commit_for_heap (address, size, h_number)) : GCToOSInterface::VirtualCommit(address, size)); if (!commit_succeeded_p && heap_hard_limit) { check_commit_cs.Enter(); committed_by_oh[oh] -= size; dprintf (1, ("commit failed, updating %Id to %Id", current_total_committed, (current_total_committed - size))); current_total_committed -= size; if (h_number < 0) current_total_committed_bookkeeping -= size; check_commit_cs.Leave(); } return commit_succeeded_p; } bool gc_heap::virtual_decommit (void* address, size_t size, gc_oh_num oh, int h_number) { #ifndef HOST_64BIT assert (heap_hard_limit == 0); #endif //!HOST_64BIT bool decommit_succeeded_p = GCToOSInterface::VirtualDecommit (address, size); if (decommit_succeeded_p && heap_hard_limit) { check_commit_cs.Enter(); committed_by_oh[oh] -= size; current_total_committed -= size; if (h_number < 0) current_total_committed_bookkeeping -= size; check_commit_cs.Leave(); } return decommit_succeeded_p; } void gc_heap::virtual_free (void* add, size_t allocated_size, heap_segment* sg) { bool release_succeeded_p = GCToOSInterface::VirtualRelease (add, allocated_size); if (release_succeeded_p) { reserved_memory -= allocated_size; dprintf (2, ("Virtual Free size %Id: [%Ix, %Ix[", allocated_size, (size_t)add, (size_t)((uint8_t*)add + allocated_size))); } } class mark { public: uint8_t* first; size_t len; // If we want to save space we can have a pool of plug_and_gap's instead of // always having 2 allocated for each pinned plug. gap_reloc_pair saved_pre_plug; // If we decide to not compact, we need to restore the original values. gap_reloc_pair saved_pre_plug_reloc; gap_reloc_pair saved_post_plug; // Supposedly Pinned objects cannot have references but we are seeing some from pinvoke // frames. Also if it's an artificially pinned plug created by us, it can certainly // have references. // We know these cases will be rare so we can optimize this to be only allocated on demand. gap_reloc_pair saved_post_plug_reloc; // We need to calculate this after we are done with plan phase and before compact // phase because compact phase will change the bricks so relocate_address will no // longer work. uint8_t* saved_pre_plug_info_reloc_start; // We need to save this because we will have no way to calculate it, unlike the // pre plug info start which is right before this plug. uint8_t* saved_post_plug_info_start; #ifdef SHORT_PLUGS uint8_t* allocation_context_start_region; #endif //SHORT_PLUGS // How the bits in these bytes are organized: // MSB --> LSB // bit to indicate whether it's a short obj | 3 bits for refs in this short obj | 2 unused bits | bit to indicate if it's collectible | last bit // last bit indicates if there's pre or post info associated with this plug. If it's not set all other bits will be 0. BOOL saved_pre_p; BOOL saved_post_p; #ifdef _DEBUG // We are seeing this is getting corrupted for a PP with a NP after. // Save it when we first set it and make sure it doesn't change. gap_reloc_pair saved_post_plug_debug; #endif //_DEBUG size_t get_max_short_bits() { return (sizeof (gap_reloc_pair) / sizeof (uint8_t*)); } // pre bits size_t get_pre_short_start_bit () { return (sizeof (saved_pre_p) * 8 - 1 - (sizeof (gap_reloc_pair) / sizeof (uint8_t*))); } BOOL pre_short_p() { return (saved_pre_p & (1 << (sizeof (saved_pre_p) * 8 - 1))); } void set_pre_short() { saved_pre_p |= (1 << (sizeof (saved_pre_p) * 8 - 1)); } void set_pre_short_bit (size_t bit) { saved_pre_p |= 1 << (get_pre_short_start_bit() + bit); } BOOL pre_short_bit_p (size_t bit) { return (saved_pre_p & (1 << (get_pre_short_start_bit() + bit))); } #ifdef COLLECTIBLE_CLASS void set_pre_short_collectible() { saved_pre_p |= 2; } BOOL pre_short_collectible_p() { return (saved_pre_p & 2); } #endif //COLLECTIBLE_CLASS // post bits size_t get_post_short_start_bit () { return (sizeof (saved_post_p) * 8 - 1 - (sizeof (gap_reloc_pair) / sizeof (uint8_t*))); } BOOL post_short_p() { return (saved_post_p & (1 << (sizeof (saved_post_p) * 8 - 1))); } void set_post_short() { saved_post_p |= (1 << (sizeof (saved_post_p) * 8 - 1)); } void set_post_short_bit (size_t bit) { saved_post_p |= 1 << (get_post_short_start_bit() + bit); } BOOL post_short_bit_p (size_t bit) { return (saved_post_p & (1 << (get_post_short_start_bit() + bit))); } #ifdef COLLECTIBLE_CLASS void set_post_short_collectible() { saved_post_p |= 2; } BOOL post_short_collectible_p() { return (saved_post_p & 2); } #endif //COLLECTIBLE_CLASS uint8_t* get_plug_address() { return first; } BOOL has_pre_plug_info() { return saved_pre_p; } BOOL has_post_plug_info() { return saved_post_p; } gap_reloc_pair* get_pre_plug_reloc_info() { return &saved_pre_plug_reloc; } gap_reloc_pair* get_post_plug_reloc_info() { return &saved_post_plug_reloc; } void set_pre_plug_info_reloc_start (uint8_t* reloc) { saved_pre_plug_info_reloc_start = reloc; } uint8_t* get_post_plug_info_start() { return saved_post_plug_info_start; } // We need to temporarily recover the shortened plugs for compact phase so we can // copy over the whole plug and their related info (mark bits/cards). But we will // need to set the artificial gap back so compact phase can keep reading the plug info. // We also need to recover the saved info because we'll need to recover it later. // // So we would call swap_p*_plug_and_saved once to recover the object info; then call // it again to recover the artificial gap. void swap_pre_plug_and_saved() { gap_reloc_pair temp; memcpy (&temp, (first - sizeof (plug_and_gap)), sizeof (temp)); memcpy ((first - sizeof (plug_and_gap)), &saved_pre_plug_reloc, sizeof (saved_pre_plug_reloc)); saved_pre_plug_reloc = temp; } void swap_post_plug_and_saved() { gap_reloc_pair temp; memcpy (&temp, saved_post_plug_info_start, sizeof (temp)); memcpy (saved_post_plug_info_start, &saved_post_plug_reloc, sizeof (saved_post_plug_reloc)); saved_post_plug_reloc = temp; } void swap_pre_plug_and_saved_for_profiler() { gap_reloc_pair temp; memcpy (&temp, (first - sizeof (plug_and_gap)), sizeof (temp)); memcpy ((first - sizeof (plug_and_gap)), &saved_pre_plug, sizeof (saved_pre_plug)); saved_pre_plug = temp; } void swap_post_plug_and_saved_for_profiler() { gap_reloc_pair temp; memcpy (&temp, saved_post_plug_info_start, sizeof (temp)); memcpy (saved_post_plug_info_start, &saved_post_plug, sizeof (saved_post_plug)); saved_post_plug = temp; } // We should think about whether it's really necessary to have to copy back the pre plug // info since it was already copied during compacting plugs. But if a plug doesn't move // by >= 3 ptr size (the size of gap_reloc_pair), it means we'd have to recover pre plug info. size_t recover_plug_info() { // We need to calculate the size for sweep case in order to correctly record the // free_obj_space - sweep would've made these artifical gaps into free objects and // we would need to deduct the size because now we are writing into those free objects. size_t recovered_sweep_size = 0; if (saved_pre_p) { if (gc_heap::settings.compaction) { dprintf (3, ("%Ix: REC Pre: %Ix-%Ix", first, &saved_pre_plug_reloc, saved_pre_plug_info_reloc_start)); memcpy (saved_pre_plug_info_reloc_start, &saved_pre_plug_reloc, sizeof (saved_pre_plug_reloc)); } else { dprintf (3, ("%Ix: REC Pre: %Ix-%Ix", first, &saved_pre_plug, (first - sizeof (plug_and_gap)))); memcpy ((first - sizeof (plug_and_gap)), &saved_pre_plug, sizeof (saved_pre_plug)); recovered_sweep_size += sizeof (saved_pre_plug); } } if (saved_post_p) { if (gc_heap::settings.compaction) { dprintf (3, ("%Ix: REC Post: %Ix-%Ix", first, &saved_post_plug_reloc, saved_post_plug_info_start)); memcpy (saved_post_plug_info_start, &saved_post_plug_reloc, sizeof (saved_post_plug_reloc)); } else { dprintf (3, ("%Ix: REC Post: %Ix-%Ix", first, &saved_post_plug, saved_post_plug_info_start)); memcpy (saved_post_plug_info_start, &saved_post_plug, sizeof (saved_post_plug)); recovered_sweep_size += sizeof (saved_post_plug); } } return recovered_sweep_size; } }; void gc_mechanisms::init_mechanisms() { condemned_generation = 0; promotion = FALSE;//TRUE; compaction = TRUE; #ifdef FEATURE_LOH_COMPACTION loh_compaction = gc_heap::loh_compaction_requested(); #else loh_compaction = FALSE; #endif //FEATURE_LOH_COMPACTION heap_expansion = FALSE; concurrent = FALSE; demotion = FALSE; elevation_reduced = FALSE; found_finalizers = FALSE; #ifdef BACKGROUND_GC background_p = gc_heap::background_running_p() != FALSE; allocations_allowed = TRUE; #endif //BACKGROUND_GC entry_memory_load = 0; entry_available_physical_mem = 0; exit_memory_load = 0; #ifdef STRESS_HEAP stress_induced = FALSE; #endif // STRESS_HEAP } void gc_mechanisms::first_init() { gc_index = 0; gen0_reduction_count = 0; should_lock_elevation = FALSE; elevation_locked_count = 0; reason = reason_empty; #ifdef BACKGROUND_GC pause_mode = gc_heap::gc_can_use_concurrent ? pause_interactive : pause_batch; #ifdef _DEBUG int debug_pause_mode = static_cast(GCConfig::GetLatencyMode()); if (debug_pause_mode >= 0) { assert (debug_pause_mode <= pause_sustained_low_latency); pause_mode = (gc_pause_mode)debug_pause_mode; } #endif //_DEBUG #else //BACKGROUND_GC pause_mode = pause_batch; #endif //BACKGROUND_GC init_mechanisms(); } void gc_mechanisms::record (gc_history_global* history) { #ifdef MULTIPLE_HEAPS history->num_heaps = gc_heap::n_heaps; #else history->num_heaps = 1; #endif //MULTIPLE_HEAPS history->condemned_generation = condemned_generation; history->gen0_reduction_count = gen0_reduction_count; history->reason = reason; history->pause_mode = (int)pause_mode; history->mem_pressure = entry_memory_load; history->global_mechanisms_p = 0; // start setting the boolean values. if (concurrent) history->set_mechanism_p (global_concurrent); if (compaction) history->set_mechanism_p (global_compaction); if (promotion) history->set_mechanism_p (global_promotion); if (demotion) history->set_mechanism_p (global_demotion); if (card_bundles) history->set_mechanism_p (global_card_bundles); if (elevation_reduced) history->set_mechanism_p (global_elevation); } /********************************** called at the beginning of GC to fix the allocated size to what is really allocated, or to turn the free area into an unused object It needs to be called after all of the other allocation contexts have been fixed since it relies on alloc_allocated. ********************************/ //for_gc_p indicates that the work is being done for GC, //as opposed to concurrent heap verification void gc_heap::fix_youngest_allocation_area() { // The gen 0 alloc context is never used for allocation in the allocator path. It's // still used in the allocation path during GCs. assert (generation_allocation_pointer (youngest_generation) == nullptr); assert (generation_allocation_limit (youngest_generation) == nullptr); heap_segment_allocated (ephemeral_heap_segment) = alloc_allocated; assert (heap_segment_mem (ephemeral_heap_segment) <= heap_segment_allocated (ephemeral_heap_segment)); assert (heap_segment_allocated (ephemeral_heap_segment) <= heap_segment_reserved (ephemeral_heap_segment)); } //for_gc_p indicates that the work is being done for GC, //as opposed to concurrent heap verification void gc_heap::fix_allocation_context (alloc_context* acontext, BOOL for_gc_p, BOOL record_ac_p) { dprintf (3, ("Fixing allocation context %Ix: ptr: %Ix, limit: %Ix", (size_t)acontext, (size_t)acontext->alloc_ptr, (size_t)acontext->alloc_limit)); if (acontext->alloc_ptr == 0) { return; } int align_const = get_alignment_constant (TRUE); #ifdef USE_REGIONS bool is_ephemeral_heap_segment = in_range_for_segment (acontext->alloc_limit, ephemeral_heap_segment); #else // USE_REGIONS bool is_ephemeral_heap_segment = true; #endif // USE_REGIONS if ((!is_ephemeral_heap_segment) || ((size_t)(alloc_allocated - acontext->alloc_limit) > Align (min_obj_size, align_const)) || !for_gc_p) { uint8_t* point = acontext->alloc_ptr; size_t size = (acontext->alloc_limit - acontext->alloc_ptr); // the allocation area was from the free list // it was shortened by Align (min_obj_size) to make room for // at least the shortest unused object size += Align (min_obj_size, align_const); assert ((size >= Align (min_obj_size))); dprintf(3,("Making unused area [%Ix, %Ix[", (size_t)point, (size_t)point + size )); make_unused_array (point, size); if (for_gc_p) { generation_free_obj_space (generation_of (0)) += size; if (record_ac_p) alloc_contexts_used ++; } } else if (for_gc_p) { assert (is_ephemeral_heap_segment); alloc_allocated = acontext->alloc_ptr; assert (heap_segment_allocated (ephemeral_heap_segment) <= heap_segment_committed (ephemeral_heap_segment)); if (record_ac_p) alloc_contexts_used ++; } if (for_gc_p) { // We need to update the alloc_bytes to reflect the portion that we have not used acontext->alloc_bytes -= (acontext->alloc_limit - acontext->alloc_ptr); total_alloc_bytes_soh -= (acontext->alloc_limit - acontext->alloc_ptr); acontext->alloc_ptr = 0; acontext->alloc_limit = acontext->alloc_ptr; } } //used by the heap verification for concurrent gc. //it nulls out the words set by fix_allocation_context for heap_verification void repair_allocation (gc_alloc_context* acontext, void*) { uint8_t* point = acontext->alloc_ptr; if (point != 0) { dprintf (3, ("Clearing [%Ix, %Ix[", (size_t)acontext->alloc_ptr, (size_t)acontext->alloc_limit+Align(min_obj_size))); memclr (acontext->alloc_ptr - plug_skew, (acontext->alloc_limit - acontext->alloc_ptr)+Align (min_obj_size)); } } void void_allocation (gc_alloc_context* acontext, void*) { uint8_t* point = acontext->alloc_ptr; if (point != 0) { dprintf (3, ("Void [%Ix, %Ix[", (size_t)acontext->alloc_ptr, (size_t)acontext->alloc_limit+Align(min_obj_size))); acontext->alloc_ptr = 0; acontext->alloc_limit = acontext->alloc_ptr; } } void gc_heap::repair_allocation_contexts (BOOL repair_p) { GCToEEInterface::GcEnumAllocContexts (repair_p ? repair_allocation : void_allocation, NULL); } struct fix_alloc_context_args { BOOL for_gc_p; void* heap; }; void fix_alloc_context (gc_alloc_context* acontext, void* param) { fix_alloc_context_args* args = (fix_alloc_context_args*)param; g_theGCHeap->FixAllocContext(acontext, (void*)(size_t)(args->for_gc_p), args->heap); } void gc_heap::fix_allocation_contexts (BOOL for_gc_p) { fix_alloc_context_args args; args.for_gc_p = for_gc_p; args.heap = __this; GCToEEInterface::GcEnumAllocContexts(fix_alloc_context, &args); fix_youngest_allocation_area(); } void gc_heap::fix_older_allocation_area (generation* older_gen) { heap_segment* older_gen_seg = generation_allocation_segment (older_gen); if (generation_allocation_limit (older_gen) != heap_segment_plan_allocated (older_gen_seg)) { uint8_t* point = generation_allocation_pointer (older_gen); size_t size = (generation_allocation_limit (older_gen) - generation_allocation_pointer (older_gen)); if (size != 0) { assert ((size >= Align (min_obj_size))); dprintf(3,("Making unused area [%Ix, %Ix[", (size_t)point, (size_t)point+size)); make_unused_array (point, size); if (size >= min_free_list) { generation_allocator (older_gen)->thread_item_front (point, size); add_gen_free (older_gen->gen_num, size); generation_free_list_space (older_gen) += size; } else { generation_free_obj_space (older_gen) += size; } } } else { assert (older_gen_seg != ephemeral_heap_segment); heap_segment_plan_allocated (older_gen_seg) = generation_allocation_pointer (older_gen); generation_allocation_limit (older_gen) = generation_allocation_pointer (older_gen); } generation_allocation_pointer (older_gen) = 0; generation_allocation_limit (older_gen) = 0; } void gc_heap::set_allocation_heap_segment (generation* gen) { #ifdef USE_REGIONS heap_segment* seg = heap_segment_rw (generation_start_segment (gen)); dprintf (REGIONS_LOG, ("set gen%d alloc seg to start seg %Ix", gen->gen_num, heap_segment_mem (seg))); #else uint8_t* p = generation_allocation_start (gen); assert (p); heap_segment* seg = generation_allocation_segment (gen); if (in_range_for_segment (p, seg)) return; // try ephemeral heap segment in case of heap expansion seg = ephemeral_heap_segment; if (!in_range_for_segment (p, seg)) { seg = heap_segment_rw (generation_start_segment (gen)); PREFIX_ASSUME(seg != NULL); while (!in_range_for_segment (p, seg)) { seg = heap_segment_next_rw (seg); PREFIX_ASSUME(seg != NULL); } } #endif //USE_REGIONS generation_allocation_segment (gen) = seg; } void gc_heap::reset_allocation_pointers (generation* gen, uint8_t* start) { assert (start); assert (Align ((size_t)start) == (size_t)start); #ifndef USE_REGIONS generation_allocation_start (gen) = start; #endif //!USE_REGIONS generation_allocation_pointer (gen) = 0;//start + Align (min_obj_size); generation_allocation_limit (gen) = 0;//generation_allocation_pointer (gen); set_allocation_heap_segment (gen); } bool gc_heap::new_allocation_allowed (int gen_number) { #ifdef BACKGROUND_GC //TODO BACKGROUND_GC this is for test only if (!settings.allocations_allowed) { dprintf (2, ("new allocation not allowed")); return FALSE; } #endif //BACKGROUND_GC if (dd_new_allocation (dynamic_data_of (gen_number)) < 0) { if (gen_number != 0) { // For UOH we will give it more budget before we try a GC. if (settings.concurrent) { dynamic_data* dd2 = dynamic_data_of (gen_number); if (dd_new_allocation (dd2) <= (ptrdiff_t)(-2 * dd_desired_allocation (dd2))) { return TRUE; } } } return FALSE; } #ifndef MULTIPLE_HEAPS else if ((settings.pause_mode != pause_no_gc) && (gen_number == 0)) { dynamic_data* dd0 = dynamic_data_of (0); dprintf (3, ("evaluating, running amount %Id - new %Id = %Id", allocation_running_amount, dd_new_allocation (dd0), (allocation_running_amount - dd_new_allocation (dd0)))); if ((allocation_running_amount - dd_new_allocation (dd0)) > dd_min_size (dd0)) { uint32_t ctime = GCToOSInterface::GetLowPrecisionTimeStamp(); if ((ctime - allocation_running_time) > 1000) { dprintf (2, (">1s since last gen0 gc")); return FALSE; } else { allocation_running_amount = dd_new_allocation (dd0); } } } #endif //MULTIPLE_HEAPS return TRUE; } inline ptrdiff_t gc_heap::get_desired_allocation (int gen_number) { return dd_desired_allocation (dynamic_data_of (gen_number)); } inline ptrdiff_t gc_heap::get_new_allocation (int gen_number) { return dd_new_allocation (dynamic_data_of (gen_number)); } //return the amount allocated so far in gen_number inline ptrdiff_t gc_heap::get_allocation (int gen_number) { dynamic_data* dd = dynamic_data_of (gen_number); return dd_desired_allocation (dd) - dd_new_allocation (dd); } inline BOOL grow_mark_stack (mark*& m, size_t& len, size_t init_len) { size_t new_size = max (init_len, 2*len); mark* tmp = new (nothrow) mark [new_size]; if (tmp) { memcpy (tmp, m, len * sizeof (mark)); delete m; m = tmp; len = new_size; return TRUE; } else { dprintf (1, ("Failed to allocate %Id bytes for mark stack", (len * sizeof (mark)))); return FALSE; } } inline uint8_t* pinned_plug (mark* m) { return m->first; } inline size_t& pinned_len (mark* m) { return m->len; } inline void set_new_pin_info (mark* m, uint8_t* pin_free_space_start) { m->len = pinned_plug (m) - pin_free_space_start; #ifdef SHORT_PLUGS m->allocation_context_start_region = pin_free_space_start; #endif //SHORT_PLUGS } #ifdef SHORT_PLUGS inline uint8_t*& pin_allocation_context_start_region (mark* m) { return m->allocation_context_start_region; } uint8_t* get_plug_start_in_saved (uint8_t* old_loc, mark* pinned_plug_entry) { uint8_t* saved_pre_plug_info = (uint8_t*)(pinned_plug_entry->get_pre_plug_reloc_info()); uint8_t* plug_start_in_saved = saved_pre_plug_info + (old_loc - (pinned_plug (pinned_plug_entry) - sizeof (plug_and_gap))); //dprintf (1, ("detected a very short plug: %Ix before PP %Ix, pad %Ix", // old_loc, pinned_plug (pinned_plug_entry), plug_start_in_saved)); dprintf (1, ("EP: %Ix(%Ix), %Ix", old_loc, pinned_plug (pinned_plug_entry), plug_start_in_saved)); return plug_start_in_saved; } inline void set_padding_in_expand (uint8_t* old_loc, BOOL set_padding_on_saved_p, mark* pinned_plug_entry) { if (set_padding_on_saved_p) { set_plug_padded (get_plug_start_in_saved (old_loc, pinned_plug_entry)); } else { set_plug_padded (old_loc); } } inline void clear_padding_in_expand (uint8_t* old_loc, BOOL set_padding_on_saved_p, mark* pinned_plug_entry) { if (set_padding_on_saved_p) { clear_plug_padded (get_plug_start_in_saved (old_loc, pinned_plug_entry)); } else { clear_plug_padded (old_loc); } } #endif //SHORT_PLUGS void gc_heap::reset_pinned_queue() { mark_stack_tos = 0; mark_stack_bos = 0; } void gc_heap::reset_pinned_queue_bos() { mark_stack_bos = 0; } // last_pinned_plug is only for asserting purpose. void gc_heap::merge_with_last_pinned_plug (uint8_t* last_pinned_plug, size_t plug_size) { if (last_pinned_plug) { mark& last_m = mark_stack_array[mark_stack_tos - 1]; assert (last_pinned_plug == last_m.first); if (last_m.saved_post_p) { last_m.saved_post_p = FALSE; dprintf (3, ("setting last plug %Ix post to false", last_m.first)); // We need to recover what the gap has overwritten. memcpy ((last_m.first + last_m.len - sizeof (plug_and_gap)), &(last_m.saved_post_plug), sizeof (gap_reloc_pair)); } last_m.len += plug_size; dprintf (3, ("recovered the last part of plug %Ix, setting its plug size to %Ix", last_m.first, last_m.len)); } } void gc_heap::set_allocator_next_pin (generation* gen) { dprintf (3, ("SANP: gen%d, ptr; %Ix, limit: %Ix", gen->gen_num, generation_allocation_pointer (gen), generation_allocation_limit (gen))); if (!(pinned_plug_que_empty_p())) { mark* oldest_entry = oldest_pin(); uint8_t* plug = pinned_plug (oldest_entry); if ((plug >= generation_allocation_pointer (gen)) && (plug < generation_allocation_limit (gen))) { #ifdef USE_REGIONS assert (region_of (generation_allocation_pointer (gen)) == region_of (generation_allocation_limit (gen) - 1)); #endif //USE_REGIONS generation_allocation_limit (gen) = pinned_plug (oldest_entry); dprintf (3, ("SANP: get next pin free space in gen%d for alloc: %Ix->%Ix(%Id)", gen->gen_num, generation_allocation_pointer (gen), generation_allocation_limit (gen), (generation_allocation_limit (gen) - generation_allocation_pointer (gen)))); } else assert (!((plug < generation_allocation_pointer (gen)) && (plug >= heap_segment_mem (generation_allocation_segment (gen))))); } } // After we set the info, we increase tos. void gc_heap::set_pinned_info (uint8_t* last_pinned_plug, size_t plug_len, generation* gen) { #ifndef _DEBUG UNREFERENCED_PARAMETER(last_pinned_plug); #endif //_DEBUG mark& m = mark_stack_array[mark_stack_tos]; assert (m.first == last_pinned_plug); m.len = plug_len; mark_stack_tos++; assert (gen != 0); // Why are we checking here? gen is never 0. if (gen != 0) { set_allocator_next_pin (gen); } } size_t gc_heap::deque_pinned_plug () { size_t m = mark_stack_bos; dprintf (3, ("deque: %Id->%Ix", mark_stack_bos, pinned_plug (pinned_plug_of (m)))); mark_stack_bos++; return m; } inline mark* gc_heap::pinned_plug_of (size_t bos) { return &mark_stack_array [ bos ]; } inline mark* gc_heap::oldest_pin () { return pinned_plug_of (mark_stack_bos); } inline BOOL gc_heap::pinned_plug_que_empty_p () { return (mark_stack_bos == mark_stack_tos); } inline mark* gc_heap::before_oldest_pin() { if (mark_stack_bos >= 1) return pinned_plug_of (mark_stack_bos-1); else return 0; } inline BOOL gc_heap::ephemeral_pointer_p (uint8_t* o) { #ifdef USE_REGIONS int gen_num = object_gennum ((uint8_t*)o); assert (gen_num >= 0); return (gen_num < max_generation); #else return ((o >= ephemeral_low) && (o < ephemeral_high)); #endif //USE_REGIONS } #ifdef USE_REGIONS // This assumes o is guaranteed to be in a region. inline bool gc_heap::is_in_condemned_gc (uint8_t* o) { assert ((o >= g_gc_lowest_address) && (o < g_gc_highest_address)); int condemned_gen = settings.condemned_generation; if (condemned_gen < max_generation) { int gen = get_region_gen_num (o); if (gen > condemned_gen) { return false; } } return true; } // REGIONS TODO - // This method can be called by GCHeap::Promote/Relocate which means // it could be in the heap range but not actually in a valid region. // This would return true but find_object will return 0. But this // seems counter-intuitive so we should consider a better implementation. inline bool gc_heap::is_in_condemned (uint8_t* o) { if ((o >= g_gc_lowest_address) && (o < g_gc_highest_address)) return is_in_condemned_gc (o); else return false; } inline bool gc_heap::should_check_brick_for_reloc (uint8_t* o) { assert ((o >= g_gc_lowest_address) && (o < g_gc_highest_address)); int condemned_gen = settings.condemned_generation; if (condemned_gen < max_generation) { heap_segment* region = region_of (o); int gen = get_region_gen_num (region); if ((gen > condemned_gen) || (heap_segment_swept_in_plan (region))) { if (heap_segment_swept_in_plan (region)) { dprintf (4444, ("-Rsip %Ix", o)); } return false; } } else if (heap_segment_swept_in_plan (region_of (o))) { return false; } return true; } #endif //USE_REGIONS #ifdef MH_SC_MARK inline int& gc_heap::mark_stack_busy() { return g_mark_stack_busy [(heap_number+2)*HS_CACHE_LINE_SIZE/sizeof(int)]; } #endif //MH_SC_MARK void gc_heap::make_mark_stack (mark* arr) { reset_pinned_queue(); mark_stack_array = arr; mark_stack_array_length = MARK_STACK_INITIAL_LENGTH; #ifdef MH_SC_MARK mark_stack_busy() = 0; #endif //MH_SC_MARK } #ifdef BACKGROUND_GC inline size_t& gc_heap::bpromoted_bytes(int thread) { #ifdef MULTIPLE_HEAPS return g_bpromoted [thread*16]; #else //MULTIPLE_HEAPS UNREFERENCED_PARAMETER(thread); return g_bpromoted; #endif //MULTIPLE_HEAPS } void gc_heap::make_background_mark_stack (uint8_t** arr) { background_mark_stack_array = arr; background_mark_stack_array_length = MARK_STACK_INITIAL_LENGTH; background_mark_stack_tos = arr; } void gc_heap::make_c_mark_list (uint8_t** arr) { c_mark_list = arr; c_mark_list_index = 0; c_mark_list_length = 1 + (OS_PAGE_SIZE / MIN_OBJECT_SIZE); } #endif //BACKGROUND_GC #ifdef CARD_BUNDLE // The card bundle keeps track of groups of card words. static const size_t card_bundle_word_width = 32; // How do we express the fact that 32 bits (card_word_width) is one uint32_t? static const size_t card_bundle_size = (size_t)(GC_PAGE_SIZE / (sizeof(uint32_t)*card_bundle_word_width)); inline size_t card_bundle_word (size_t cardb) { return cardb / card_bundle_word_width; } inline uint32_t card_bundle_bit (size_t cardb) { return (uint32_t)(cardb % card_bundle_word_width); } size_t align_cardw_on_bundle (size_t cardw) { return ((size_t)(cardw + card_bundle_size - 1) & ~(card_bundle_size - 1 )); } // Get the card bundle representing a card word size_t cardw_card_bundle (size_t cardw) { return cardw / card_bundle_size; } // Get the first card word in a card bundle size_t card_bundle_cardw (size_t cardb) { return cardb * card_bundle_size; } // Clear the specified card bundle void gc_heap::card_bundle_clear (size_t cardb) { uint32_t bit = (uint32_t)(1 << card_bundle_bit (cardb)); uint32_t* bundle = &card_bundle_table[card_bundle_word (cardb)]; #ifdef MULTIPLE_HEAPS // card bundles may straddle segments and heaps, thus bits may be cleared concurrently if ((*bundle & bit) != 0) { Interlocked::And (bundle, ~bit); } #else *bundle &= ~bit; #endif // check for races assert ((*bundle & bit) == 0); dprintf (2, ("Cleared card bundle %Ix [%Ix, %Ix[", cardb, (size_t)card_bundle_cardw (cardb), (size_t)card_bundle_cardw (cardb+1))); } inline void set_bundle_bits (uint32_t* bundle, uint32_t bits) { #ifdef MULTIPLE_HEAPS // card bundles may straddle segments and heaps, thus bits may be set concurrently if ((*bundle & bits) != bits) { Interlocked::Or (bundle, bits); } #else *bundle |= bits; #endif // check for races assert ((*bundle & bits) == bits); } void gc_heap::card_bundle_set (size_t cardb) { uint32_t bits = (1 << card_bundle_bit (cardb)); set_bundle_bits (&card_bundle_table [card_bundle_word (cardb)], bits); } // Set the card bundle bits between start_cardb and end_cardb void gc_heap::card_bundles_set (size_t start_cardb, size_t end_cardb) { if (start_cardb == end_cardb) { card_bundle_set(start_cardb); return; } size_t start_word = card_bundle_word (start_cardb); size_t end_word = card_bundle_word (end_cardb); if (start_word < end_word) { // Set the partial words uint32_t bits = highbits (~0u, card_bundle_bit (start_cardb)); set_bundle_bits (&card_bundle_table [start_word], bits); if (card_bundle_bit (end_cardb)) { bits = lowbits (~0u, card_bundle_bit (end_cardb)); set_bundle_bits (&card_bundle_table [end_word], bits); } // Set the full words for (size_t i = start_word + 1; i < end_word; i++) { card_bundle_table [i] = ~0u; } } else { uint32_t bits = (highbits (~0u, card_bundle_bit (start_cardb)) & lowbits (~0u, card_bundle_bit (end_cardb))); set_bundle_bits (&card_bundle_table [start_word], bits); } } // Indicates whether the specified bundle is set. BOOL gc_heap::card_bundle_set_p (size_t cardb) { return (card_bundle_table[card_bundle_word(cardb)] & (1 << card_bundle_bit (cardb))); } // Returns the size (in bytes) of a card bundle representing the region from 'from' to 'end' size_t size_card_bundle_of (uint8_t* from, uint8_t* end) { // Number of heap bytes represented by a card bundle word size_t cbw_span = card_size * card_word_width * card_bundle_size * card_bundle_word_width; // Align the start of the region down from = (uint8_t*)((size_t)from & ~(cbw_span - 1)); // Align the end of the region up end = (uint8_t*)((size_t)(end + (cbw_span - 1)) & ~(cbw_span - 1)); // Make sure they're really aligned assert (((size_t)from & (cbw_span - 1)) == 0); assert (((size_t)end & (cbw_span - 1)) == 0); return ((end - from) / cbw_span) * sizeof (uint32_t); } // Takes a pointer to a card bundle table and an address, and returns a pointer that represents // where a theoretical card bundle table that represents every address (starting from 0) would // start if the bundle word representing the address were to be located at the pointer passed in. // The returned 'translated' pointer makes it convenient/fast to calculate where the card bundle // for a given address is using a simple shift operation on the address. uint32_t* translate_card_bundle_table (uint32_t* cb, uint8_t* lowest_address) { // The number of bytes of heap memory represented by a card bundle word const size_t heap_bytes_for_bundle_word = card_size * card_word_width * card_bundle_size * card_bundle_word_width; // Each card bundle word is 32 bits return (uint32_t*)((uint8_t*)cb - (((size_t)lowest_address / heap_bytes_for_bundle_word) * sizeof (uint32_t))); } void gc_heap::enable_card_bundles () { if (can_use_write_watch_for_card_table() && (!card_bundles_enabled())) { dprintf (1, ("Enabling card bundles")); // We initially set all of the card bundles card_bundles_set (cardw_card_bundle (card_word (card_of (lowest_address))), cardw_card_bundle (align_cardw_on_bundle (card_word (card_of (highest_address))))); settings.card_bundles = TRUE; } } BOOL gc_heap::card_bundles_enabled () { return settings.card_bundles; } #endif // CARD_BUNDLE #if defined (HOST_64BIT) #define brick_size ((size_t)4096) #else #define brick_size ((size_t)2048) #endif //HOST_64BIT inline size_t gc_heap::brick_of (uint8_t* add) { return (size_t)(add - lowest_address) / brick_size; } inline uint8_t* gc_heap::brick_address (size_t brick) { return lowest_address + (brick_size * brick); } void gc_heap::clear_brick_table (uint8_t* from, uint8_t* end) { for (size_t i = brick_of (from);i < brick_of (end); i++) brick_table[i] = 0; } //codes for the brick entries: //entry == 0 -> not assigned //entry >0 offset is entry-1 //entry <0 jump back entry bricks inline void gc_heap::set_brick (size_t index, ptrdiff_t val) { if (val < -32767) { val = -32767; } assert (val < 32767); if (val >= 0) brick_table [index] = (short)val+1; else brick_table [index] = (short)val; } inline int gc_heap::get_brick_entry (size_t index) { #ifdef MULTIPLE_HEAPS return VolatileLoadWithoutBarrier(&brick_table [index]); #else return brick_table[index]; #endif } inline uint8_t* align_on_brick (uint8_t* add) { return (uint8_t*)((size_t)(add + brick_size - 1) & ~(brick_size - 1)); } inline uint8_t* align_lower_brick (uint8_t* add) { return (uint8_t*)(((size_t)add) & ~(brick_size - 1)); } size_t size_brick_of (uint8_t* from, uint8_t* end) { assert (((size_t)from & (brick_size-1)) == 0); assert (((size_t)end & (brick_size-1)) == 0); return ((end - from) / brick_size) * sizeof (short); } inline uint8_t* gc_heap::card_address (size_t card) { return (uint8_t*) (card_size * card); } inline size_t gc_heap::card_of ( uint8_t* object) { return (size_t)(object) / card_size; } inline uint8_t* align_on_card (uint8_t* add) { return (uint8_t*)((size_t)(add + card_size - 1) & ~(card_size - 1 )); } inline uint8_t* align_on_card_word (uint8_t* add) { return (uint8_t*) ((size_t)(add + (card_size*card_word_width)-1) & ~(card_size*card_word_width - 1)); } inline uint8_t* align_lower_card (uint8_t* add) { return (uint8_t*)((size_t)add & ~(card_size-1)); } inline void gc_heap::clear_card (size_t card) { card_table [card_word (card)] = (card_table [card_word (card)] & ~(1 << card_bit (card))); dprintf (3,("Cleared card %Ix [%Ix, %Ix[", card, (size_t)card_address (card), (size_t)card_address (card+1))); } inline void gc_heap::set_card (size_t card) { size_t word = card_word (card); card_table[word] = (card_table [word] | (1 << card_bit (card))); #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES // Also set the card bundle that corresponds to the card size_t bundle_to_set = cardw_card_bundle(word); card_bundle_set(bundle_to_set); dprintf (3,("Set card %Ix [%Ix, %Ix[ and bundle %Ix", card, (size_t)card_address (card), (size_t)card_address (card+1), bundle_to_set)); #endif } inline BOOL gc_heap::card_set_p (size_t card) { return ( card_table [ card_word (card) ] & (1 << card_bit (card))); } // Returns the number of DWORDs in the card table that cover the // range of addresses [from, end[. size_t count_card_of (uint8_t* from, uint8_t* end) { return card_word (gcard_of (end - 1)) - card_word (gcard_of (from)) + 1; } // Returns the number of bytes to allocate for a card table // that covers the range of addresses [from, end[. size_t size_card_of (uint8_t* from, uint8_t* end) { return count_card_of (from, end) * sizeof(uint32_t); } // We don't store seg_mapping_table in card_table_info because there's only always one view. class card_table_info { public: unsigned recount; uint8_t* lowest_address; uint8_t* highest_address; short* brick_table; #ifdef CARD_BUNDLE uint32_t* card_bundle_table; #endif //CARD_BUNDLE // mark_array is always at the end of the data structure because we // want to be able to make one commit call for everything before it. #ifdef BACKGROUND_GC uint32_t* mark_array; #endif //BACKGROUND_GC size_t size; uint32_t* next_card_table; }; //These are accessors on untranslated cardtable inline unsigned& card_table_refcount (uint32_t* c_table) { return *(unsigned*)((char*)c_table - sizeof (card_table_info)); } inline uint8_t*& card_table_lowest_address (uint32_t* c_table) { return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->lowest_address; } uint32_t* translate_card_table (uint32_t* ct) { return (uint32_t*)((uint8_t*)ct - card_word (gcard_of (card_table_lowest_address (ct))) * sizeof(uint32_t)); } inline uint8_t*& card_table_highest_address (uint32_t* c_table) { return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->highest_address; } inline short*& card_table_brick_table (uint32_t* c_table) { return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->brick_table; } #ifdef CARD_BUNDLE inline uint32_t*& card_table_card_bundle_table (uint32_t* c_table) { return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->card_bundle_table; } #endif //CARD_BUNDLE #ifdef BACKGROUND_GC inline uint32_t*& card_table_mark_array (uint32_t* c_table) { return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->mark_array; } #ifdef HOST_64BIT #define mark_bit_pitch ((size_t)16) #else #define mark_bit_pitch ((size_t)8) #endif // HOST_64BIT #define mark_word_width ((size_t)32) #define mark_word_size (mark_word_width * mark_bit_pitch) inline uint8_t* align_on_mark_bit (uint8_t* add) { return (uint8_t*)((size_t)(add + (mark_bit_pitch - 1)) & ~(mark_bit_pitch - 1)); } inline uint8_t* align_lower_mark_bit (uint8_t* add) { return (uint8_t*)((size_t)(add) & ~(mark_bit_pitch - 1)); } inline BOOL is_aligned_on_mark_word (uint8_t* add) { return ((size_t)add == ((size_t)(add) & ~(mark_word_size - 1))); } inline uint8_t* align_on_mark_word (uint8_t* add) { return (uint8_t*)((size_t)(add + mark_word_size - 1) & ~(mark_word_size - 1)); } inline uint8_t* align_lower_mark_word (uint8_t* add) { return (uint8_t*)((size_t)(add) & ~(mark_word_size - 1)); } inline size_t mark_bit_of (uint8_t* add) { return ((size_t)add / mark_bit_pitch); } inline unsigned int mark_bit_bit (size_t mark_bit) { return (unsigned int)(mark_bit % mark_word_width); } inline size_t mark_bit_word (size_t mark_bit) { return (mark_bit / mark_word_width); } inline size_t mark_word_of (uint8_t* add) { return ((size_t)add) / mark_word_size; } uint8_t* mark_word_address (size_t wd) { return (uint8_t*)(wd*mark_word_size); } uint8_t* mark_bit_address (size_t mark_bit) { return (uint8_t*)(mark_bit*mark_bit_pitch); } inline size_t mark_bit_bit_of (uint8_t* add) { return (((size_t)add / mark_bit_pitch) % mark_word_width); } inline unsigned int gc_heap::mark_array_marked(uint8_t* add) { return mark_array [mark_word_of (add)] & (1 << mark_bit_bit_of (add)); } inline BOOL gc_heap::is_mark_bit_set (uint8_t* add) { return (mark_array [mark_word_of (add)] & (1 << mark_bit_bit_of (add))); } inline void gc_heap::mark_array_set_marked (uint8_t* add) { size_t index = mark_word_of (add); uint32_t val = (1 << mark_bit_bit_of (add)); #ifdef MULTIPLE_HEAPS Interlocked::Or (&(mark_array [index]), val); #else mark_array [index] |= val; #endif } inline void gc_heap::mark_array_clear_marked (uint8_t* add) { mark_array [mark_word_of (add)] &= ~(1 << mark_bit_bit_of (add)); } size_t size_mark_array_of (uint8_t* from, uint8_t* end) { assert (((size_t)from & ((mark_word_size)-1)) == 0); assert (((size_t)end & ((mark_word_size)-1)) == 0); return sizeof (uint32_t)*(((end - from) / mark_word_size)); } //In order to eliminate the lowest_address in the mark array //computations (mark_word_of, etc) mark_array is offset // according to the lowest_address. uint32_t* translate_mark_array (uint32_t* ma) { return (uint32_t*)((uint8_t*)ma - size_mark_array_of (0, g_gc_lowest_address)); } // from and end must be page aligned addresses. void gc_heap::clear_mark_array (uint8_t* from, uint8_t* end, BOOL check_only/*=TRUE*/ #ifdef FEATURE_BASICFREEZE , BOOL read_only/*=FALSE*/ #endif // FEATURE_BASICFREEZE ) { if(!gc_can_use_concurrent) return; #ifdef FEATURE_BASICFREEZE if (!read_only) #endif // FEATURE_BASICFREEZE { assert (from == align_on_mark_word (from)); } assert (end == align_on_mark_word (end)); #ifdef BACKGROUND_GC uint8_t* current_lowest_address = background_saved_lowest_address; uint8_t* current_highest_address = background_saved_highest_address; #else uint8_t* current_lowest_address = lowest_address; uint8_t* current_highest_address = highest_address; #endif //BACKGROUND_GC //there is a possibility of the addresses to be //outside of the covered range because of a newly allocated //large object segment if ((end <= current_highest_address) && (from >= current_lowest_address)) { size_t beg_word = mark_word_of (align_on_mark_word (from)); //align end word to make sure to cover the address size_t end_word = mark_word_of (align_on_mark_word (end)); dprintf (3, ("Calling clearing mark array [%Ix, %Ix[ for addresses [%Ix, %Ix[(%s)", (size_t)mark_word_address (beg_word), (size_t)mark_word_address (end_word), (size_t)from, (size_t)end, (check_only ? "check_only" : "clear"))); if (!check_only) { uint8_t* op = from; while (op < mark_word_address (beg_word)) { mark_array_clear_marked (op); op += mark_bit_pitch; } memset (&mark_array[beg_word], 0, (end_word - beg_word)*sizeof (uint32_t)); } #ifdef _DEBUG else { //Beware, it is assumed that the mark array word straddling //start has been cleared before //verify that the array is empty. size_t markw = mark_word_of (align_on_mark_word (from)); size_t markw_end = mark_word_of (align_on_mark_word (end)); while (markw < markw_end) { assert (!(mark_array [markw])); markw++; } uint8_t* p = mark_word_address (markw_end); while (p < end) { assert (!(mark_array_marked (p))); p++; } } #endif //_DEBUG } } #endif //BACKGROUND_GC //These work on untranslated card tables inline uint32_t*& card_table_next (uint32_t* c_table) { return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->next_card_table; } inline size_t& card_table_size (uint32_t* c_table) { return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->size; } void own_card_table (uint32_t* c_table) { card_table_refcount (c_table) += 1; } void destroy_card_table (uint32_t* c_table); void delete_next_card_table (uint32_t* c_table) { uint32_t* n_table = card_table_next (c_table); if (n_table) { if (card_table_next (n_table)) { delete_next_card_table (n_table); } if (card_table_refcount (n_table) == 0) { destroy_card_table (n_table); card_table_next (c_table) = 0; } } } void release_card_table (uint32_t* c_table) { assert (card_table_refcount (c_table) >0); card_table_refcount (c_table) -= 1; if (card_table_refcount (c_table) == 0) { delete_next_card_table (c_table); if (card_table_next (c_table) == 0) { destroy_card_table (c_table); // sever the link from the parent if (&g_gc_card_table[card_word (gcard_of(g_gc_lowest_address))] == c_table) { g_gc_card_table = 0; #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES g_gc_card_bundle_table = 0; #endif #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP SoftwareWriteWatch::StaticClose(); #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP } else { uint32_t* p_table = &g_gc_card_table[card_word (gcard_of(g_gc_lowest_address))]; if (p_table) { while (p_table && (card_table_next (p_table) != c_table)) p_table = card_table_next (p_table); card_table_next (p_table) = 0; } } } } } void destroy_card_table (uint32_t* c_table) { // delete (uint32_t*)&card_table_refcount(c_table); GCToOSInterface::VirtualRelease (&card_table_refcount(c_table), card_table_size(c_table)); dprintf (2, ("Table Virtual Free : %Ix", (size_t)&card_table_refcount(c_table))); } uint32_t* gc_heap::make_card_table (uint8_t* start, uint8_t* end) { assert (g_gc_lowest_address == start); assert (g_gc_highest_address == end); uint32_t virtual_reserve_flags = VirtualReserveFlags::None; size_t bs = size_brick_of (start, end); size_t cs = size_card_of (start, end); #ifdef BACKGROUND_GC size_t ms = (gc_can_use_concurrent ? size_mark_array_of (start, end) : 0); #else size_t ms = 0; #endif //BACKGROUND_GC size_t cb = 0; #ifdef CARD_BUNDLE if (can_use_write_watch_for_card_table()) { cb = size_card_bundle_of (g_gc_lowest_address, g_gc_highest_address); #ifndef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES // If we're not manually managing the card bundles, we will need to use OS write // watch APIs over this region to track changes. virtual_reserve_flags |= VirtualReserveFlags::WriteWatch; #endif } #endif //CARD_BUNDLE size_t wws = 0; #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP size_t sw_ww_table_offset = 0; if (gc_can_use_concurrent) { size_t sw_ww_size_before_table = sizeof(card_table_info) + cs + bs + cb; sw_ww_table_offset = SoftwareWriteWatch::GetTableStartByteOffset(sw_ww_size_before_table); wws = sw_ww_table_offset - sw_ww_size_before_table + SoftwareWriteWatch::GetTableByteSize(start, end); } #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP size_t st = size_seg_mapping_table_of (g_gc_lowest_address, g_gc_highest_address); size_t st_table_offset = sizeof(card_table_info) + cs + bs + cb + wws; size_t st_table_offset_aligned = align_for_seg_mapping_table (st_table_offset); st += (st_table_offset_aligned - st_table_offset); // it is impossible for alloc_size to overflow due bounds on each of // its components. size_t alloc_size = sizeof (uint8_t)*(sizeof(card_table_info) + cs + bs + cb + wws + st + ms); uint8_t* mem = (uint8_t*)GCToOSInterface::VirtualReserve (alloc_size, 0, virtual_reserve_flags); if (!mem) return 0; dprintf (2, ("Init - Card table alloc for %Id bytes: [%Ix, %Ix[", alloc_size, (size_t)mem, (size_t)(mem+alloc_size))); // mark array will be committed separately (per segment). size_t commit_size = alloc_size - ms; if (!virtual_commit (mem, commit_size, gc_oh_num::none)) { dprintf (1, ("Card table commit failed")); GCToOSInterface::VirtualRelease (mem, alloc_size); return 0; } // initialize the ref count uint32_t* ct = (uint32_t*)(mem+sizeof (card_table_info)); card_table_refcount (ct) = 0; card_table_lowest_address (ct) = start; card_table_highest_address (ct) = end; card_table_brick_table (ct) = (short*)((uint8_t*)ct + cs); card_table_size (ct) = alloc_size; card_table_next (ct) = 0; #ifdef CARD_BUNDLE card_table_card_bundle_table (ct) = (uint32_t*)((uint8_t*)card_table_brick_table (ct) + bs); #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES g_gc_card_bundle_table = translate_card_bundle_table(card_table_card_bundle_table(ct), g_gc_lowest_address); #endif #endif //CARD_BUNDLE #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP if (gc_can_use_concurrent) { SoftwareWriteWatch::InitializeUntranslatedTable(mem + sw_ww_table_offset, start); } #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP seg_mapping_table = (seg_mapping*)(mem + st_table_offset_aligned); seg_mapping_table = (seg_mapping*)((uint8_t*)seg_mapping_table - size_seg_mapping_table_of (0, (align_lower_segment (g_gc_lowest_address)))); #ifdef BACKGROUND_GC if (gc_can_use_concurrent) card_table_mark_array (ct) = (uint32_t*)((uint8_t*)card_table_brick_table (ct) + bs + cb + wws + st); else card_table_mark_array (ct) = NULL; #endif //BACKGROUND_GC return translate_card_table(ct); } void gc_heap::set_fgm_result (failure_get_memory f, size_t s, BOOL loh_p) { #ifdef MULTIPLE_HEAPS for (int hn = 0; hn < gc_heap::n_heaps; hn++) { gc_heap* hp = gc_heap::g_heaps [hn]; hp->fgm_result.set_fgm (f, s, loh_p); } #else //MULTIPLE_HEAPS fgm_result.set_fgm (f, s, loh_p); #endif //MULTIPLE_HEAPS } //returns 0 for success, -1 otherwise // We are doing all the decommitting here because we want to make sure we have // enough memory to do so - if we do this during copy_brick_card_table and // and fail to decommit it would make the failure case very complicated to // handle. This way we can waste some decommit if we call this multiple // times before the next FGC but it's easier to handle the failure case. int gc_heap::grow_brick_card_tables (uint8_t* start, uint8_t* end, size_t size, heap_segment* new_seg, gc_heap* hp, BOOL uoh_p) { uint8_t* la = g_gc_lowest_address; uint8_t* ha = g_gc_highest_address; uint8_t* saved_g_lowest_address = min (start, g_gc_lowest_address); uint8_t* saved_g_highest_address = max (end, g_gc_highest_address); seg_mapping* new_seg_mapping_table = nullptr; #ifdef BACKGROUND_GC // This value is only for logging purpose - it's not necessarily exactly what we // would commit for mark array but close enough for diagnostics purpose. size_t logging_ma_commit_size = size_mark_array_of (0, (uint8_t*)size); #endif //BACKGROUND_GC // See if the address is already covered if ((la != saved_g_lowest_address ) || (ha != saved_g_highest_address)) { { //modify the highest address so the span covered //is twice the previous one. uint8_t* top = (uint8_t*)0 + Align (GCToOSInterface::GetVirtualMemoryLimit()); // On non-Windows systems, we get only an approximate value that can possibly be // slightly lower than the saved_g_highest_address. // In such case, we set the top to the saved_g_highest_address so that the // card and brick tables always cover the whole new range. if (top < saved_g_highest_address) { top = saved_g_highest_address; } size_t ps = ha-la; #ifdef HOST_64BIT if (ps > (uint64_t)200*1024*1024*1024) ps += (uint64_t)100*1024*1024*1024; else #endif // HOST_64BIT ps *= 2; if (saved_g_lowest_address < g_gc_lowest_address) { if (ps > (size_t)g_gc_lowest_address) saved_g_lowest_address = (uint8_t*)(size_t)OS_PAGE_SIZE; else { assert (((size_t)g_gc_lowest_address - ps) >= OS_PAGE_SIZE); saved_g_lowest_address = min (saved_g_lowest_address, (g_gc_lowest_address - ps)); } } if (saved_g_highest_address > g_gc_highest_address) { saved_g_highest_address = max ((saved_g_lowest_address + ps), saved_g_highest_address); if (saved_g_highest_address > top) saved_g_highest_address = top; } } dprintf (GC_TABLE_LOG, ("Growing card table [%Ix, %Ix[", (size_t)saved_g_lowest_address, (size_t)saved_g_highest_address)); bool write_barrier_updated = false; uint32_t virtual_reserve_flags = VirtualReserveFlags::None; uint32_t* saved_g_card_table = g_gc_card_table; #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES uint32_t* saved_g_card_bundle_table = g_gc_card_bundle_table; #endif uint32_t* ct = 0; uint32_t* translated_ct = 0; short* bt = 0; size_t cs = size_card_of (saved_g_lowest_address, saved_g_highest_address); size_t bs = size_brick_of (saved_g_lowest_address, saved_g_highest_address); #ifdef BACKGROUND_GC size_t ms = (gc_heap::gc_can_use_concurrent ? size_mark_array_of (saved_g_lowest_address, saved_g_highest_address) : 0); #else size_t ms = 0; #endif //BACKGROUND_GC size_t cb = 0; #ifdef CARD_BUNDLE if (can_use_write_watch_for_card_table()) { cb = size_card_bundle_of (saved_g_lowest_address, saved_g_highest_address); #ifndef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES // If we're not manually managing the card bundles, we will need to use OS write // watch APIs over this region to track changes. virtual_reserve_flags |= VirtualReserveFlags::WriteWatch; #endif } #endif //CARD_BUNDLE size_t wws = 0; #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP size_t sw_ww_table_offset = 0; if (gc_can_use_concurrent) { size_t sw_ww_size_before_table = sizeof(card_table_info) + cs + bs + cb; sw_ww_table_offset = SoftwareWriteWatch::GetTableStartByteOffset(sw_ww_size_before_table); wws = sw_ww_table_offset - sw_ww_size_before_table + SoftwareWriteWatch::GetTableByteSize(saved_g_lowest_address, saved_g_highest_address); } #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP size_t st = size_seg_mapping_table_of (saved_g_lowest_address, saved_g_highest_address); size_t st_table_offset = sizeof(card_table_info) + cs + bs + cb + wws; size_t st_table_offset_aligned = align_for_seg_mapping_table (st_table_offset); st += (st_table_offset_aligned - st_table_offset); // it is impossible for alloc_size to overflow due bounds on each of // its components. size_t alloc_size = sizeof (uint8_t)*(sizeof(card_table_info) + cs + bs + cb + wws + st + ms); dprintf (GC_TABLE_LOG, ("card table: %Id; brick table: %Id; card bundle: %Id; sw ww table: %Id; seg table: %Id; mark array: %Id", cs, bs, cb, wws, st, ms)); uint8_t* mem = (uint8_t*)GCToOSInterface::VirtualReserve (alloc_size, 0, virtual_reserve_flags); if (!mem) { set_fgm_result (fgm_grow_table, alloc_size, uoh_p); goto fail; } dprintf (GC_TABLE_LOG, ("Table alloc for %Id bytes: [%Ix, %Ix[", alloc_size, (size_t)mem, (size_t)((uint8_t*)mem+alloc_size))); { // mark array will be committed separately (per segment). size_t commit_size = alloc_size - ms; if (!virtual_commit (mem, commit_size, gc_oh_num::none)) { dprintf (GC_TABLE_LOG, ("Table commit failed")); set_fgm_result (fgm_commit_table, commit_size, uoh_p); goto fail; } } ct = (uint32_t*)(mem + sizeof (card_table_info)); card_table_refcount (ct) = 0; card_table_lowest_address (ct) = saved_g_lowest_address; card_table_highest_address (ct) = saved_g_highest_address; card_table_next (ct) = &g_gc_card_table[card_word (gcard_of (la))]; //clear the card table /* memclr ((uint8_t*)ct, (((saved_g_highest_address - saved_g_lowest_address)*sizeof (uint32_t) / (card_size * card_word_width)) + sizeof (uint32_t))); */ bt = (short*)((uint8_t*)ct + cs); // No initialization needed, will be done in copy_brick_card card_table_brick_table (ct) = bt; #ifdef CARD_BUNDLE card_table_card_bundle_table (ct) = (uint32_t*)((uint8_t*)card_table_brick_table (ct) + bs); //set all bundle to look at all of the cards memset(card_table_card_bundle_table (ct), 0xFF, cb); #endif //CARD_BUNDLE new_seg_mapping_table = (seg_mapping*)(mem + st_table_offset_aligned); new_seg_mapping_table = (seg_mapping*)((uint8_t*)new_seg_mapping_table - size_seg_mapping_table_of (0, (align_lower_segment (saved_g_lowest_address)))); memcpy(&new_seg_mapping_table[seg_mapping_word_of(g_gc_lowest_address)], &seg_mapping_table[seg_mapping_word_of(g_gc_lowest_address)], size_seg_mapping_table_of(g_gc_lowest_address, g_gc_highest_address)); // new_seg_mapping_table gets assigned to seg_mapping_table at the bottom of this function, // not here. The reason for this is that, if we fail at mark array committing (OOM) and we've // already switched seg_mapping_table to point to the new mapping table, we'll decommit it and // run into trouble. By not assigning here, we're making sure that we will not change seg_mapping_table // if an OOM occurs. #ifdef BACKGROUND_GC if(gc_can_use_concurrent) card_table_mark_array (ct) = (uint32_t*)((uint8_t*)card_table_brick_table (ct) + bs + cb + wws + st); else card_table_mark_array (ct) = NULL; #endif //BACKGROUND_GC translated_ct = translate_card_table (ct); dprintf (GC_TABLE_LOG, ("card table: %Ix(translated: %Ix), seg map: %Ix, mark array: %Ix", (size_t)ct, (size_t)translated_ct, (size_t)new_seg_mapping_table, (size_t)card_table_mark_array (ct))); #ifdef BACKGROUND_GC if (hp->is_bgc_in_progress()) { dprintf (GC_TABLE_LOG, ("new low: %Ix, new high: %Ix, latest mark array is %Ix(translate: %Ix)", saved_g_lowest_address, saved_g_highest_address, card_table_mark_array (ct), translate_mark_array (card_table_mark_array (ct)))); uint32_t* new_mark_array = (uint32_t*)((uint8_t*)card_table_mark_array (ct) - size_mark_array_of (0, saved_g_lowest_address)); if (!commit_new_mark_array_global (new_mark_array)) { dprintf (GC_TABLE_LOG, ("failed to commit portions in the mark array for existing segments")); set_fgm_result (fgm_commit_table, logging_ma_commit_size, uoh_p); goto fail; } if (!commit_mark_array_new_seg (hp, new_seg, translated_ct, saved_g_lowest_address)) { dprintf (GC_TABLE_LOG, ("failed to commit mark array for the new seg")); set_fgm_result (fgm_commit_table, logging_ma_commit_size, uoh_p); goto fail; } } else { clear_commit_flag_global(); } #endif //BACKGROUND_GC #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP if (gc_can_use_concurrent) { // The current design of software write watch requires that the runtime is suspended during resize. Suspending // on resize is preferred because it is a far less frequent operation than GetWriteWatch() / ResetWriteWatch(). // Suspending here allows copying dirty state from the old table into the new table, and not have to merge old // table info lazily as done for card tables. // Either this thread was the thread that did the suspension which means we are suspended; or this is called // from a GC thread which means we are in a blocking GC and also suspended. bool is_runtime_suspended = GCToEEInterface::IsGCThread(); if (!is_runtime_suspended) { // Note on points where the runtime is suspended anywhere in this function. Upon an attempt to suspend the // runtime, a different thread may suspend first, causing this thread to block at the point of the suspend call. // So, at any suspend point, externally visible state needs to be consistent, as code that depends on that state // may run while this thread is blocked. This includes updates to g_gc_card_table, g_gc_lowest_address, and // g_gc_highest_address. suspend_EE(); } g_gc_card_table = translated_ct; #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES g_gc_card_bundle_table = translate_card_bundle_table(card_table_card_bundle_table(ct), saved_g_lowest_address); #endif SoftwareWriteWatch::SetResizedUntranslatedTable( mem + sw_ww_table_offset, saved_g_lowest_address, saved_g_highest_address); seg_mapping_table = new_seg_mapping_table; // Since the runtime is already suspended, update the write barrier here as well. // This passes a bool telling whether we need to switch to the post // grow version of the write barrier. This test tells us if the new // segment was allocated at a lower address than the old, requiring // that we start doing an upper bounds check in the write barrier. g_gc_lowest_address = saved_g_lowest_address; g_gc_highest_address = saved_g_highest_address; stomp_write_barrier_resize(true, la != saved_g_lowest_address); write_barrier_updated = true; if (!is_runtime_suspended) { restart_EE(); } } else #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP { g_gc_card_table = translated_ct; #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES g_gc_card_bundle_table = translate_card_bundle_table(card_table_card_bundle_table(ct), saved_g_lowest_address); #endif } if (!write_barrier_updated) { seg_mapping_table = new_seg_mapping_table; GCToOSInterface::FlushProcessWriteBuffers(); g_gc_lowest_address = saved_g_lowest_address; g_gc_highest_address = saved_g_highest_address; // This passes a bool telling whether we need to switch to the post // grow version of the write barrier. This test tells us if the new // segment was allocated at a lower address than the old, requiring // that we start doing an upper bounds check in the write barrier. // This will also suspend the runtime if the write barrier type needs // to be changed, so we are doing this after all global state has // been updated. See the comment above suspend_EE() above for more // info. stomp_write_barrier_resize(GCToEEInterface::IsGCThread(), la != saved_g_lowest_address); } return 0; fail: if (mem) { assert(g_gc_card_table == saved_g_card_table); #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES assert(g_gc_card_bundle_table == saved_g_card_bundle_table); #endif if (!GCToOSInterface::VirtualRelease (mem, alloc_size)) { dprintf (GC_TABLE_LOG, ("GCToOSInterface::VirtualRelease failed")); assert (!"release failed"); } } return -1; } else { #ifdef BACKGROUND_GC if (hp->is_bgc_in_progress()) { dprintf (GC_TABLE_LOG, ("in range new seg %Ix, mark_array is %Ix", new_seg, hp->mark_array)); if (!commit_mark_array_new_seg (hp, new_seg)) { dprintf (GC_TABLE_LOG, ("failed to commit mark array for the new seg in range")); set_fgm_result (fgm_commit_table, logging_ma_commit_size, uoh_p); return -1; } } #endif //BACKGROUND_GC } return 0; } //copy all of the arrays managed by the card table for a page aligned range void gc_heap::copy_brick_card_range (uint8_t* la, uint32_t* old_card_table, short* old_brick_table, uint8_t* start, uint8_t* end) { ptrdiff_t brick_offset = brick_of (start) - brick_of (la); dprintf (2, ("copying tables for range [%Ix %Ix[", (size_t)start, (size_t)end)); // copy brick table short* brick_start = &brick_table [brick_of (start)]; if (old_brick_table) { // segments are always on page boundaries memcpy (brick_start, &old_brick_table[brick_offset], size_brick_of (start, end)); } uint32_t* old_ct = &old_card_table[card_word (card_of (la))]; if (gc_heap::background_running_p()) { uint32_t* old_mark_array = card_table_mark_array (old_ct); // We don't need to go through all the card tables here because // we only need to copy from the GC version of the mark array - when we // mark (even in allocate_uoh_object) we always use that mark array. if ((card_table_highest_address (old_ct) >= start) && (card_table_lowest_address (old_ct) <= end)) { if ((background_saved_highest_address >= start) && (background_saved_lowest_address <= end)) { //copy the mark bits // segments are always on page boundaries uint8_t* m_start = max (background_saved_lowest_address, start); uint8_t* m_end = min (background_saved_highest_address, end); memcpy (&mark_array[mark_word_of (m_start)], &old_mark_array[mark_word_of (m_start) - mark_word_of (la)], size_mark_array_of (m_start, m_end)); } } else { //only large segments can be out of range assert (old_brick_table == 0); } } // n way merge with all of the card table ever used in between uint32_t* ct = card_table_next (&card_table[card_word (card_of(lowest_address))]); assert (ct); while (card_table_next (old_ct) != ct) { //copy if old card table contained [start, end[ if ((card_table_highest_address (ct) >= end) && (card_table_lowest_address (ct) <= start)) { // or the card_tables size_t start_word = card_word (card_of (start)); uint32_t* dest = &card_table[start_word]; uint32_t* src = &((translate_card_table (ct))[start_word]); ptrdiff_t count = count_card_of (start, end); for (int x = 0; x < count; x++) { *dest |= *src; #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES if (*src != 0) { card_bundle_set(cardw_card_bundle(start_word+x)); } #endif dest++; src++; } } ct = card_table_next (ct); } } void gc_heap::copy_brick_card_table() { uint32_t* old_card_table = card_table; short* old_brick_table = brick_table; uint8_t* la = lowest_address; #ifdef _DEBUG uint8_t* ha = highest_address; assert (la == card_table_lowest_address (&old_card_table[card_word (card_of (la))])); assert (ha == card_table_highest_address (&old_card_table[card_word (card_of (la))])); #endif //_DEBUG /* todo: Need a global lock for this */ uint32_t* ct = &g_gc_card_table[card_word (gcard_of (g_gc_lowest_address))]; own_card_table (ct); card_table = translate_card_table (ct); /* End of global lock */ highest_address = card_table_highest_address (ct); lowest_address = card_table_lowest_address (ct); brick_table = card_table_brick_table (ct); #ifdef BACKGROUND_GC if (gc_can_use_concurrent) { mark_array = translate_mark_array (card_table_mark_array (ct)); assert (mark_word_of (g_gc_highest_address) == mark_word_of (align_on_mark_word (g_gc_highest_address))); } else mark_array = NULL; #endif //BACKGROUND_GC #ifdef CARD_BUNDLE card_bundle_table = translate_card_bundle_table (card_table_card_bundle_table (ct), g_gc_lowest_address); // Ensure that the word that represents g_gc_lowest_address in the translated table is located at the // start of the untranslated table. assert (&card_bundle_table [card_bundle_word (cardw_card_bundle (card_word (card_of (g_gc_lowest_address))))] == card_table_card_bundle_table (ct)); //set the card table if we are in a heap growth scenario if (card_bundles_enabled()) { card_bundles_set (cardw_card_bundle (card_word (card_of (lowest_address))), cardw_card_bundle (align_cardw_on_bundle (card_word (card_of (highest_address))))); } //check if we need to turn on card_bundles. #ifdef MULTIPLE_HEAPS // use INT64 arithmetic here because of possible overflow on 32p uint64_t th = (uint64_t)MH_TH_CARD_BUNDLE*gc_heap::n_heaps; #else // use INT64 arithmetic here because of possible overflow on 32p uint64_t th = (uint64_t)SH_TH_CARD_BUNDLE; #endif //MULTIPLE_HEAPS if (reserved_memory >= th) { enable_card_bundles(); } #endif //CARD_BUNDLE // for each of the segments and heaps, copy the brick table and // or the card table for (int i = get_start_generation_index(); i < total_generation_count; i++) { heap_segment* seg = generation_start_segment (generation_of (i)); while (seg) { if (heap_segment_read_only_p (seg) && !heap_segment_in_range_p (seg)) { //check if it became in range if ((heap_segment_reserved (seg) > lowest_address) && (heap_segment_mem (seg) < highest_address)) { set_ro_segment_in_range (seg); } } else { uint8_t* end = align_on_page (heap_segment_allocated (seg)); copy_brick_card_range (la, old_card_table, (i < uoh_start_generation) ? old_brick_table : NULL, align_lower_page (heap_segment_mem (seg)), end); } seg = heap_segment_next (seg); } } release_card_table (&old_card_table[card_word (card_of(la))]); } #ifdef FEATURE_BASICFREEZE BOOL gc_heap::insert_ro_segment (heap_segment* seg) { #ifdef FEATURE_EVENT_TRACE if (!use_frozen_segments_p) use_frozen_segments_p = true; #endif //FEATURE_EVENT_TRACE enter_spin_lock (&gc_heap::gc_lock); if (!gc_heap::seg_table->ensure_space_for_insert () || (is_bgc_in_progress() && !commit_mark_array_new_seg(__this, seg))) { leave_spin_lock(&gc_heap::gc_lock); return FALSE; } //insert at the head of the segment list generation* gen2 = generation_of (max_generation); heap_segment* oldhead = generation_start_segment (gen2); heap_segment_next (seg) = oldhead; generation_start_segment (gen2) = seg; #ifdef USE_REGIONS dprintf (REGIONS_LOG, ("setting gen2 start seg to %Ix(%Ix)->%Ix", (size_t)seg, heap_segment_mem (seg), heap_segment_mem (oldhead))); if (generation_tail_ro_region (gen2) == 0) { dprintf (REGIONS_LOG, ("setting gen2 tail ro -> %Ix", heap_segment_mem (seg))); generation_tail_ro_region (gen2) = seg; } #endif //USE_REGIONS seg_table->insert (heap_segment_mem(seg), (size_t)seg); seg_mapping_table_add_ro_segment (seg); if ((heap_segment_reserved (seg) > lowest_address) && (heap_segment_mem (seg) < highest_address)) { set_ro_segment_in_range (seg); } FIRE_EVENT(GCCreateSegment_V1, heap_segment_mem(seg), (size_t)(heap_segment_reserved (seg) - heap_segment_mem(seg)), gc_etw_segment_read_only_heap); leave_spin_lock (&gc_heap::gc_lock); return TRUE; } // No one is calling this function right now. If this is getting called we need // to take care of decommitting the mark array for it - we will need to remember // which portion of the mark array was committed and only decommit that. void gc_heap::remove_ro_segment (heap_segment* seg) { //clear the mark bits so a new segment allocated in its place will have a clear mark bits #ifdef BACKGROUND_GC if (gc_can_use_concurrent) { clear_mark_array (align_lower_mark_word (max (heap_segment_mem (seg), lowest_address)), align_on_card_word (min (heap_segment_allocated (seg), highest_address)), false); // read_only segments need the mark clear } #endif //BACKGROUND_GC enter_spin_lock (&gc_heap::gc_lock); seg_table->remove ((uint8_t*)seg); seg_mapping_table_remove_ro_segment (seg); // Locate segment (and previous segment) in the list. generation* gen2 = generation_of (max_generation); #ifdef USE_REGIONS if (generation_tail_ro_region (gen2) == seg) { generation_tail_ro_region (gen2) = 0; } #endif //USE_REGIONS heap_segment* curr_seg = generation_start_segment (gen2); heap_segment* prev_seg = NULL; while (curr_seg && curr_seg != seg) { prev_seg = curr_seg; curr_seg = heap_segment_next (curr_seg); } assert (curr_seg == seg); // Patch previous segment (or list head if there is none) to skip the removed segment. if (prev_seg) heap_segment_next (prev_seg) = heap_segment_next (curr_seg); else generation_start_segment (gen2) = heap_segment_next (curr_seg); leave_spin_lock (&gc_heap::gc_lock); } #endif //FEATURE_BASICFREEZE BOOL gc_heap::set_ro_segment_in_range (heap_segment* seg) { seg->flags |= heap_segment_flags_inrange; ro_segments_in_range = TRUE; return TRUE; } uint8_t** make_mark_list (size_t size) { uint8_t** mark_list = new (nothrow) uint8_t* [size]; return mark_list; } #define swap(a,b){uint8_t* t; t = a; a = b; b = t;} void verify_qsort_array (uint8_t* *low, uint8_t* *high) { uint8_t **i = 0; for (i = low+1; i <= high; i++) { if (*i < *(i-1)) { FATAL_GC_ERROR(); } } } #ifndef USE_INTROSORT void qsort1( uint8_t* *low, uint8_t* *high, unsigned int depth) { if (((low + 16) >= high) || (depth > 100)) { //insertion sort uint8_t **i, **j; for (i = low+1; i <= high; i++) { uint8_t* val = *i; for (j=i;j >low && val<*(j-1);j--) { *j=*(j-1); } *j=val; } } else { uint8_t *pivot, **left, **right; //sort low middle and high if (*(low+((high-low)/2)) < *low) swap (*(low+((high-low)/2)), *low); if (*high < *low) swap (*low, *high); if (*high < *(low+((high-low)/2))) swap (*(low+((high-low)/2)), *high); swap (*(low+((high-low)/2)), *(high-1)); pivot = *(high-1); left = low; right = high-1; while (1) { while (*(--right) > pivot); while (*(++left) < pivot); if (left < right) { swap(*left, *right); } else break; } swap (*left, *(high-1)); qsort1(low, left-1, depth+1); qsort1(left+1, high, depth+1); } } #endif //USE_INTROSORT void rqsort1( uint8_t* *low, uint8_t* *high) { if ((low + 16) >= high) { //insertion sort uint8_t **i, **j; for (i = low+1; i <= high; i++) { uint8_t* val = *i; for (j=i;j >low && val>*(j-1);j--) { *j=*(j-1); } *j=val; } } else { uint8_t *pivot, **left, **right; //sort low middle and high if (*(low+((high-low)/2)) > *low) swap (*(low+((high-low)/2)), *low); if (*high > *low) swap (*low, *high); if (*high > *(low+((high-low)/2))) swap (*(low+((high-low)/2)), *high); swap (*(low+((high-low)/2)), *(high-1)); pivot = *(high-1); left = low; right = high-1; while (1) { while (*(--right) < pivot); while (*(++left) > pivot); if (left < right) { swap(*left, *right); } else break; } swap (*left, *(high-1)); rqsort1(low, left-1); rqsort1(left+1, high); } } // vxsort uses introsort as a fallback if the AVX2 instruction set is not supported #if defined(USE_INTROSORT) || defined(USE_VXSORT) class introsort { private: static const int size_threshold = 64; static const int max_depth = 100; inline static void swap_elements(uint8_t** i,uint8_t** j) { uint8_t* t=*i; *i=*j; *j=t; } public: static void sort (uint8_t** begin, uint8_t** end, int ignored) { ignored = 0; introsort_loop (begin, end, max_depth); insertionsort (begin, end); } private: static void introsort_loop (uint8_t** lo, uint8_t** hi, int depth_limit) { while (hi-lo >= size_threshold) { if (depth_limit == 0) { heapsort (lo, hi); return; } uint8_t** p=median_partition (lo, hi); depth_limit=depth_limit-1; introsort_loop (p, hi, depth_limit); hi=p-1; } } static uint8_t** median_partition (uint8_t** low, uint8_t** high) { uint8_t *pivot, **left, **right; //sort low middle and high if (*(low+((high-low)/2)) < *low) swap_elements ((low+((high-low)/2)), low); if (*high < *low) swap_elements (low, high); if (*high < *(low+((high-low)/2))) swap_elements ((low+((high-low)/2)), high); swap_elements ((low+((high-low)/2)), (high-1)); pivot = *(high-1); left = low; right = high-1; while (1) { while (*(--right) > pivot); while (*(++left) < pivot); if (left < right) { swap_elements(left, right); } else break; } swap_elements (left, (high-1)); return left; } static void insertionsort (uint8_t** lo, uint8_t** hi) { for (uint8_t** i=lo+1; i <= hi; i++) { uint8_t** j = i; uint8_t* t = *i; while((j > lo) && (t <*(j-1))) { *j = *(j-1); j--; } *j = t; } } static void heapsort (uint8_t** lo, uint8_t** hi) { size_t n = hi - lo + 1; for (size_t i=n / 2; i >= 1; i--) { downheap (i,n,lo); } for (size_t i = n; i > 1; i--) { swap_elements (lo, lo + i - 1); downheap(1, i - 1, lo); } } static void downheap (size_t i, size_t n, uint8_t** lo) { uint8_t* d = *(lo + i - 1); size_t child; while (i <= n / 2) { child = 2*i; if (child < n && *(lo + child - 1)<(*(lo + child))) { child++; } if (!(d<*(lo + child - 1))) { break; } *(lo + i - 1) = *(lo + child - 1); i = child; } *(lo + i - 1) = d; } }; #endif //defined(USE_INTROSORT) || defined(USE_VXSORT) #ifdef USE_VXSORT static void do_vxsort (uint8_t** item_array, ptrdiff_t item_count, uint8_t* range_low, uint8_t* range_high) { // above this threshold, using AVX2 for sorting will likely pay off // despite possible downclocking on some devices const size_t AVX2_THRESHOLD_SIZE = 8 * 1024; // above this threshold, using AVX51F for sorting will likely pay off // despite possible downclocking on current devices const size_t AVX512F_THRESHOLD_SIZE = 128 * 1024; if (item_count <= 1) return; if (IsSupportedInstructionSet (InstructionSet::AVX2) && (item_count > AVX2_THRESHOLD_SIZE)) { dprintf(3, ("Sorting mark lists")); // use AVX512F only if the list is large enough to pay for downclocking impact if (IsSupportedInstructionSet (InstructionSet::AVX512F) && (item_count > AVX512F_THRESHOLD_SIZE)) { do_vxsort_avx512 (item_array, &item_array[item_count - 1], range_low, range_high); } else { do_vxsort_avx2 (item_array, &item_array[item_count - 1], range_low, range_high); } } else { dprintf (3, ("Sorting mark lists")); introsort::sort (item_array, &item_array[item_count - 1], 0); } #ifdef _DEBUG // check the array is sorted for (ptrdiff_t i = 0; i < item_count - 1; i++) { assert (item_array[i] <= item_array[i + 1]); } // check that the ends of the array are indeed in range // together with the above this implies all elements are in range assert ((range_low <= item_array[0]) && (item_array[item_count - 1] <= range_high)); #endif } #endif //USE_VXSORT #ifdef MULTIPLE_HEAPS static size_t target_mark_count_for_heap (size_t total_mark_count, int heap_count, int heap_number) { // compute the average (rounded down) size_t average_mark_count = total_mark_count / heap_count; // compute the remainder size_t remaining_mark_count = total_mark_count - (average_mark_count * heap_count); // compute the target count for this heap - last heap has the remainder if (heap_number == (heap_count - 1)) return (average_mark_count + remaining_mark_count); else return average_mark_count; } NOINLINE uint8_t** gc_heap::equalize_mark_lists (size_t total_mark_list_size) { size_t local_mark_count[MAX_SUPPORTED_CPUS]; size_t total_mark_count = 0; // compute mark count per heap into a local array // compute the total for (int i = 0; i < n_heaps; i++) { gc_heap* hp = g_heaps[i]; size_t mark_count = hp->mark_list_index - hp->mark_list; local_mark_count[i] = mark_count; total_mark_count += mark_count; } // this should agree with our input parameter assert(total_mark_count == total_mark_list_size); // compute the target count for this heap size_t this_target_mark_count = target_mark_count_for_heap (total_mark_count, n_heaps, heap_number); // if our heap has sufficient entries, we can exit early if (local_mark_count[heap_number] >= this_target_mark_count) return (mark_list + this_target_mark_count); // In the following, we try to fill the deficit in heap "deficit_heap_index" with // surplus from "surplus_heap_index". // If there is no deficit or surplus (anymore), the indices are advanced. int surplus_heap_index = 0; for (int deficit_heap_index = 0; deficit_heap_index <= heap_number; deficit_heap_index++) { // compute the target count for this heap - last heap has the remainder size_t deficit_target_mark_count = target_mark_count_for_heap (total_mark_count, n_heaps, deficit_heap_index); // if this heap has the target or larger count, skip it if (local_mark_count[deficit_heap_index] >= deficit_target_mark_count) continue; // while this heap is lower than average, fill it up while ((surplus_heap_index < n_heaps) && (local_mark_count[deficit_heap_index] < deficit_target_mark_count)) { size_t deficit = deficit_target_mark_count - local_mark_count[deficit_heap_index]; size_t surplus_target_mark_count = target_mark_count_for_heap(total_mark_count, n_heaps, surplus_heap_index); if (local_mark_count[surplus_heap_index] > surplus_target_mark_count) { size_t surplus = local_mark_count[surplus_heap_index] - surplus_target_mark_count; size_t amount_to_transfer = min(deficit, surplus); local_mark_count[surplus_heap_index] -= amount_to_transfer; if (deficit_heap_index == heap_number) { // copy amount_to_transfer mark list items memcpy(&g_heaps[deficit_heap_index]->mark_list[local_mark_count[deficit_heap_index]], &g_heaps[surplus_heap_index]->mark_list[local_mark_count[surplus_heap_index]], (amount_to_transfer*sizeof(mark_list[0]))); } local_mark_count[deficit_heap_index] += amount_to_transfer; } else { surplus_heap_index++; } } } return (mark_list + local_mark_count[heap_number]); } NOINLINE size_t gc_heap::sort_mark_list() { if ((settings.condemned_generation >= max_generation) #ifdef USE_REGIONS || (g_mark_list_piece == nullptr) #endif //USE_REGIONS ) { // fake a mark list overflow so merge_mark_lists knows to quit early mark_list_index = mark_list_end + 1; return 0; } // if this heap had a mark list overflow, we don't do anything if (mark_list_index > mark_list_end) { dprintf (2, ("h%d sort_mark_list overflow", heap_number)); mark_list_overflow = true; return 0; } // if any other heap had a mark list overflow, we fake one too, // so we don't use an incomplete mark list by mistake for (int i = 0; i < n_heaps; i++) { if (g_heaps[i]->mark_list_index > g_heaps[i]->mark_list_end) { mark_list_index = mark_list_end + 1; dprintf (2, ("h%d sort_mark_list: detected overflow on heap %d", heap_number, i)); return 0; } } // compute total mark list size and total ephemeral size size_t total_mark_list_size = 0; size_t total_ephemeral_size = 0; uint8_t* low = (uint8_t*)~0; uint8_t* high = 0; for (int i = 0; i < n_heaps; i++) { gc_heap* hp = g_heaps[i]; total_mark_list_size += (hp->mark_list_index - hp->mark_list); #ifdef USE_REGIONS // iterate through the ephemeral regions to get a tighter bound for (int gen_num = settings.condemned_generation; gen_num >= 0; gen_num--) { generation* gen = hp->generation_of (gen_num); for (heap_segment* seg = generation_start_segment (gen); seg != nullptr; seg = heap_segment_next (seg)) { size_t ephemeral_size = heap_segment_allocated (seg) - heap_segment_mem (seg); total_ephemeral_size += ephemeral_size; low = min (low, heap_segment_mem (seg)); high = max (high, heap_segment_allocated (seg)); } } #else //USE_REGIONS size_t ephemeral_size = heap_segment_allocated (hp->ephemeral_heap_segment) - hp->gc_low; total_ephemeral_size += ephemeral_size; low = min (low, hp->gc_low); high = max (high, heap_segment_allocated (hp->ephemeral_heap_segment)); #endif //USE_REGIONS } // give up if the mark list size is unreasonably large if (total_mark_list_size > (total_ephemeral_size / 256)) { mark_list_index = mark_list_end + 1; // let's not count this as a mark list overflow dprintf (2, ("h%d total mark list %Id is too large > (%Id / 256), don't use", heap_number, total_mark_list_size, total_ephemeral_size)); mark_list_overflow = false; return 0; } uint8_t **local_mark_list_index = equalize_mark_lists (total_mark_list_size); #ifdef USE_VXSORT ptrdiff_t item_count = local_mark_list_index - mark_list; //#define WRITE_SORT_DATA #if defined(_DEBUG) || defined(WRITE_SORT_DATA) // in debug, make a copy of the mark list // for checking and debugging purposes uint8_t** mark_list_copy = &g_mark_list_copy[heap_number * mark_list_size]; uint8_t** mark_list_copy_index = &mark_list_copy[item_count]; for (ptrdiff_t i = 0; i < item_count; i++) { uint8_t* item = mark_list[i]; assert ((low <= item) && (item < high)); mark_list_copy[i] = item; } #endif // _DEBUG || WRITE_SORT_DATA do_vxsort (mark_list, item_count, low, high); #ifdef WRITE_SORT_DATA char file_name[256]; sprintf_s (file_name, _countof(file_name), "sort_data_gc%d_heap%d", settings.gc_index, heap_number); FILE* f; errno_t err = fopen_s (&f, file_name, "wb"); if (err == 0) { size_t magic = 'SDAT'; if (fwrite (&magic, sizeof(magic), 1, f) != 1) dprintf (3, ("fwrite failed\n")); if (fwrite (&elapsed_cycles, sizeof(elapsed_cycles), 1, f) != 1) dprintf (3, ("fwrite failed\n")); if (fwrite (&low, sizeof(low), 1, f) != 1) dprintf (3, ("fwrite failed\n")); if (fwrite (&item_count, sizeof(item_count), 1, f) != 1) dprintf (3, ("fwrite failed\n")); if (fwrite (mark_list_copy, sizeof(mark_list_copy[0]), item_count, f) != item_count) dprintf (3, ("fwrite failed\n")); if (fwrite (&magic, sizeof(magic), 1, f) != 1) dprintf (3, ("fwrite failed\n")); if (fclose (f) != 0) dprintf (3, ("fclose failed\n")); } #endif #ifdef _DEBUG // in debug, sort the copy as well using the proven sort, so we can check we got the right result if (mark_list_copy_index > mark_list_copy) { introsort::sort (mark_list_copy, mark_list_copy_index - 1, 0); } for (ptrdiff_t i = 0; i < item_count; i++) { uint8_t* item = mark_list[i]; assert (mark_list_copy[i] == item); } #endif //_DEBUG #else //USE_VXSORT dprintf (3, ("Sorting mark lists")); if (local_mark_list_index > mark_list) { introsort::sort (mark_list, local_mark_list_index - 1, 0); } #endif //USE_VXSORT uint8_t** x = mark_list; #ifdef USE_REGIONS // first set the pieces for all regions to empty assert (g_mark_list_piece_size >= region_count); for (size_t region_index = 0; region_index < region_count; region_index++) { mark_list_piece_start[region_index] = NULL; mark_list_piece_end[region_index] = NULL; } // predicate means: x is still within the mark list, and within the bounds of this region #define predicate(x) (((x) < local_mark_list_index) && (*(x) < region_limit)) while (x < local_mark_list_index) { heap_segment* region = get_region_info_for_address (*x); // sanity check - the object on the mark list should be within the region assert ((heap_segment_mem (region) <= *x) && (*x < heap_segment_allocated (region))); size_t region_index = get_basic_region_index_for_address (heap_segment_mem (region)); uint8_t* region_limit = heap_segment_allocated (region); uint8_t*** mark_list_piece_start_ptr = &mark_list_piece_start[region_index]; uint8_t*** mark_list_piece_end_ptr = &mark_list_piece_end[region_index]; #else // USE_REGIONS // predicate means: x is still within the mark list, and within the bounds of this heap #define predicate(x) (((x) < local_mark_list_index) && (*(x) < heap->ephemeral_high)) // first set the pieces for all heaps to empty int heap_num; for (heap_num = 0; heap_num < n_heaps; heap_num++) { mark_list_piece_start[heap_num] = NULL; mark_list_piece_end[heap_num] = NULL; } heap_num = -1; while (x < local_mark_list_index) { gc_heap* heap; // find the heap x points into - searching cyclically from the last heap, // because in many cases the right heap is the next one or comes soon after #ifdef _DEBUG int last_heap_num = heap_num; #endif //_DEBUG do { heap_num++; if (heap_num >= n_heaps) heap_num = 0; assert(heap_num != last_heap_num); // we should always find the heap - infinite loop if not! heap = g_heaps[heap_num]; } while (!(*x >= heap->ephemeral_low && *x < heap->ephemeral_high)); uint8_t*** mark_list_piece_start_ptr = &mark_list_piece_start[heap_num]; uint8_t*** mark_list_piece_end_ptr = &mark_list_piece_end[heap_num]; #endif // USE_REGIONS // x is the start of the mark list piece for this heap/region *mark_list_piece_start_ptr = x; // to find the end of the mark list piece for this heap/region, find the first x // that has !predicate(x), i.e. that is either not in this heap, or beyond the end of the list if (predicate(x)) { // let's see if we get lucky and the whole rest belongs to this piece if (predicate(local_mark_list_index -1)) { x = local_mark_list_index; *mark_list_piece_end_ptr = x; break; } // we play a variant of binary search to find the point sooner. // the first loop advances by increasing steps until the predicate turns false. // then we retreat the last step, and the second loop advances by decreasing steps, keeping the predicate true. unsigned inc = 1; do { inc *= 2; uint8_t** temp_x = x; x += inc; if (temp_x > x) { break; } } while (predicate(x)); // we know that only the last step was wrong, so we undo it x -= inc; do { // loop invariant - predicate holds at x, but not x + inc assert (predicate(x) && !(((x + inc) > x) && predicate(x + inc))); inc /= 2; if (((x + inc) > x) && predicate(x + inc)) { x += inc; } } while (inc > 1); // the termination condition and the loop invariant together imply this: assert(predicate(x) && !predicate(x + inc) && (inc == 1)); // so the spot we're looking for is one further x += 1; } *mark_list_piece_end_ptr = x; } #undef predicate return total_mark_list_size; } void gc_heap::append_to_mark_list (uint8_t **start, uint8_t **end) { size_t slots_needed = end - start; size_t slots_available = mark_list_end + 1 - mark_list_index; size_t slots_to_copy = min(slots_needed, slots_available); memcpy(mark_list_index, start, slots_to_copy*sizeof(*start)); mark_list_index += slots_to_copy; dprintf (3, ("h%d: appended %Id slots to mark_list\n", heap_number, slots_to_copy)); } #ifdef _DEBUG #if !defined(_MSC_VER) #if !defined(__cdecl) #if defined(__i386__) #define __cdecl __attribute__((cdecl)) #else #define __cdecl #endif #endif #endif static int __cdecl cmp_mark_list_item (const void* vkey, const void* vdatum) { uint8_t** key = (uint8_t**)vkey; uint8_t** datum = (uint8_t**)vdatum; if (*key < *datum) return -1; else if (*key > *datum) return 1; else return 0; } #endif // _DEBUG #ifdef USE_REGIONS uint8_t** gc_heap::get_region_mark_list (uint8_t* start, uint8_t* end, uint8_t*** mark_list_end_ptr) { size_t region_number = get_basic_region_index_for_address (start); size_t source_number = region_number; #else //USE_REGIONS void gc_heap::merge_mark_lists (size_t total_mark_list_size) { // in case of mark list overflow, don't bother if (total_mark_list_size == 0) { return; } #ifdef _DEBUG // if we had more than the average number of mark list items, // make sure these got copied to another heap, i.e. didn't get lost size_t this_mark_list_size = target_mark_count_for_heap (total_mark_list_size, n_heaps, heap_number); for (uint8_t** p = mark_list + this_mark_list_size; p < mark_list_index; p++) { uint8_t* item = *p; uint8_t** found_slot = nullptr; for (int i = 0; i < n_heaps; i++) { uint8_t** heap_mark_list = &g_mark_list[i * mark_list_size]; size_t heap_mark_list_size = target_mark_count_for_heap (total_mark_list_size, n_heaps, i); found_slot = (uint8_t**)bsearch (&item, heap_mark_list, heap_mark_list_size, sizeof(item), cmp_mark_list_item); if (found_slot != nullptr) break; } assert ((found_slot != nullptr) && (*found_slot == item)); } #endif dprintf(3, ("merge_mark_lists: heap_number = %d starts out with %Id entries", heap_number, (mark_list_index - mark_list))); int source_number = heap_number; #endif //USE_REGIONS uint8_t** source[MAX_SUPPORTED_CPUS]; uint8_t** source_end[MAX_SUPPORTED_CPUS]; int source_heap[MAX_SUPPORTED_CPUS]; int source_count = 0; for (int i = 0; i < n_heaps; i++) { gc_heap* heap = g_heaps[i]; if (heap->mark_list_piece_start[source_number] < heap->mark_list_piece_end[source_number]) { source[source_count] = heap->mark_list_piece_start[source_number]; source_end[source_count] = heap->mark_list_piece_end[source_number]; source_heap[source_count] = i; if (source_count < MAX_SUPPORTED_CPUS) source_count++; } } dprintf(3, ("source_number = %d has %d sources\n", source_number, source_count)); #if defined(_DEBUG) || defined(TRACE_GC) for (int j = 0; j < source_count; j++) { dprintf(3, ("source_number = %d ", source_number)); dprintf(3, (" source from heap %d = %Ix .. %Ix (%Id entries)", (size_t)(source_heap[j]), (size_t)(source[j][0]), (size_t)(source_end[j][-1]), (size_t)(source_end[j] - source[j]))); // the sources should all be sorted for (uint8_t **x = source[j]; x < source_end[j] - 1; x++) { if (x[0] > x[1]) { dprintf(3, ("oops, mark_list from source %d for heap %d isn't sorted\n", j, source_number)); assert (0); } } } #endif //_DEBUG || TRACE_GC mark_list = &g_mark_list_copy [heap_number*mark_list_size]; mark_list_index = mark_list; mark_list_end = &mark_list [mark_list_size-1]; int piece_count = 0; if (source_count == 0) { ; // nothing to do } else if (source_count == 1) { mark_list = source[0]; mark_list_index = source_end[0]; mark_list_end = mark_list_index; piece_count++; } else { while (source_count > 1) { // find the lowest and second lowest value in the sources we're merging from int lowest_source = 0; uint8_t *lowest = *source[0]; uint8_t *second_lowest = *source[1]; for (int i = 1; i < source_count; i++) { if (lowest > *source[i]) { second_lowest = lowest; lowest = *source[i]; lowest_source = i; } else if (second_lowest > *source[i]) { second_lowest = *source[i]; } } // find the point in the lowest source where it either runs out or is not <= second_lowest anymore // let's first try to get lucky and see if the whole source is <= second_lowest -- this is actually quite common uint8_t **x; if (source_end[lowest_source][-1] <= second_lowest) x = source_end[lowest_source]; else { // use linear search to find the end -- could also use binary search as in sort_mark_list, // but saw no improvement doing that for (x = source[lowest_source]; x < source_end[lowest_source] && *x <= second_lowest; x++) ; } // blast this piece to the mark list append_to_mark_list(source[lowest_source], x); piece_count++; source[lowest_source] = x; // check whether this source is now exhausted if (x >= source_end[lowest_source]) { // if it's not the source with the highest index, copy the source with the highest index // over it so the non-empty sources are always at the beginning if (lowest_source < source_count-1) { source[lowest_source] = source[source_count-1]; source_end[lowest_source] = source_end[source_count-1]; } source_count--; } } // we're left with just one source that we copy append_to_mark_list(source[0], source_end[0]); piece_count++; } #if defined(_DEBUG) || defined(TRACE_GC) // the final mark list must be sorted for (uint8_t **x = mark_list; x < mark_list_index - 1; x++) { if (x[0] > x[1]) { dprintf(3, ("oops, mark_list for heap %d isn't sorted at the end of merge_mark_lists", heap_number)); assert (0); } } #endif //_DEBUG || TRACE_GC #ifdef USE_REGIONS *mark_list_end_ptr = mark_list_index; return mark_list; #endif // USE_REGIONS } #else #ifdef USE_REGIONS // a variant of binary search that doesn't look for an exact match, // but finds the first element >= e static uint8_t** binary_search (uint8_t** left, uint8_t** right, uint8_t* e) { if (left == right) return left; assert (left < right); uint8_t** a = left; size_t l = 0; size_t r = (size_t)(right - left); while ((r - l) >= 2) { size_t m = l + (r - l) / 2; // loop condition says that r - l is at least 2 // so l, m, r are all different assert ((l < m) && (m < r)); if (a[m] < e) { l = m; } else { r = m; } } if (a[l] < e) return a + l + 1; else return a + l; } uint8_t** gc_heap::get_region_mark_list (uint8_t* start, uint8_t* end, uint8_t*** mark_list_end_ptr) { // do a binary search over the sorted marked list to find start and end of the // mark list for this region *mark_list_end_ptr = binary_search (mark_list, mark_list_index, end); return binary_search (mark_list, *mark_list_end_ptr, start); } #endif //USE_REGIONS #endif //MULTIPLE_HEAPS void gc_heap::grow_mark_list () { // with vectorized sorting, we can use bigger mark lists #ifdef USE_VXSORT #ifdef MULTIPLE_HEAPS const size_t MAX_MARK_LIST_SIZE = IsSupportedInstructionSet (InstructionSet::AVX2) ? (1000 * 1024) : (200 * 1024); #else //MULTIPLE_HEAPS const size_t MAX_MARK_LIST_SIZE = IsSupportedInstructionSet (InstructionSet::AVX2) ? (32 * 1024) : (16 * 1024); #endif //MULTIPLE_HEAPS #else //USE_VXSORT #ifdef MULTIPLE_HEAPS const size_t MAX_MARK_LIST_SIZE = 200 * 1024; #else //MULTIPLE_HEAPS const size_t MAX_MARK_LIST_SIZE = 16 * 1024; #endif //MULTIPLE_HEAPS #endif //USE_VXSORT size_t new_mark_list_size = min (mark_list_size * 2, MAX_MARK_LIST_SIZE); if (new_mark_list_size == mark_list_size) return; #ifdef MULTIPLE_HEAPS uint8_t** new_mark_list = make_mark_list (new_mark_list_size * n_heaps); uint8_t** new_mark_list_copy = make_mark_list (new_mark_list_size * n_heaps); if ((new_mark_list != nullptr) && (new_mark_list_copy != nullptr)) { delete[] g_mark_list; g_mark_list = new_mark_list; delete[] g_mark_list_copy; g_mark_list_copy = new_mark_list_copy; mark_list_size = new_mark_list_size; } else { delete[] new_mark_list; delete[] new_mark_list_copy; } #else //MULTIPLE_HEAPS uint8_t** new_mark_list = make_mark_list (new_mark_list_size); if (new_mark_list != nullptr) { delete[] mark_list; g_mark_list = new_mark_list; mark_list_size = new_mark_list_size; } #endif //MULTIPLE_HEAPS } class seg_free_spaces { struct seg_free_space { BOOL is_plug; void* start; }; struct free_space_bucket { seg_free_space* free_space; ptrdiff_t count_add; // Assigned when we first construct the array. ptrdiff_t count_fit; // How many items left when we are fitting plugs. }; void move_bucket (int old_power2, int new_power2) { // PREFAST warning 22015: old_power2 could be negative assert (old_power2 >= 0); assert (old_power2 >= new_power2); if (old_power2 == new_power2) { return; } seg_free_space* src_index = free_space_buckets[old_power2].free_space; for (int i = old_power2; i > new_power2; i--) { seg_free_space** dest = &(free_space_buckets[i].free_space); (*dest)++; seg_free_space* dest_index = free_space_buckets[i - 1].free_space; if (i > (new_power2 + 1)) { seg_free_space temp = *src_index; *src_index = *dest_index; *dest_index = temp; } src_index = dest_index; } free_space_buckets[old_power2].count_fit--; free_space_buckets[new_power2].count_fit++; } #ifdef _DEBUG void dump_free_space (seg_free_space* item) { uint8_t* addr = 0; size_t len = 0; if (item->is_plug) { mark* m = (mark*)(item->start); len = pinned_len (m); addr = pinned_plug (m) - len; } else { heap_segment* seg = (heap_segment*)(item->start); addr = heap_segment_plan_allocated (seg); len = heap_segment_committed (seg) - addr; } dprintf (SEG_REUSE_LOG_1, ("[%d]0x%Ix %Id", heap_num, addr, len)); } void dump() { seg_free_space* item = NULL; int i = 0; dprintf (SEG_REUSE_LOG_1, ("[%d]----------------------------------\nnow the free spaces look like:", heap_num)); for (i = 0; i < (free_space_bucket_count - 1); i++) { dprintf (SEG_REUSE_LOG_1, ("[%d]Free spaces for 2^%d bucket:", heap_num, (base_power2 + i))); dprintf (SEG_REUSE_LOG_1, ("[%d]%s %s", heap_num, "start", "len")); item = free_space_buckets[i].free_space; while (item < free_space_buckets[i + 1].free_space) { dump_free_space (item); item++; } dprintf (SEG_REUSE_LOG_1, ("[%d]----------------------------------", heap_num)); } dprintf (SEG_REUSE_LOG_1, ("[%d]Free spaces for 2^%d bucket:", heap_num, (base_power2 + i))); dprintf (SEG_REUSE_LOG_1, ("[%d]%s %s", heap_num, "start", "len")); item = free_space_buckets[i].free_space; while (item <= &seg_free_space_array[free_space_item_count - 1]) { dump_free_space (item); item++; } dprintf (SEG_REUSE_LOG_1, ("[%d]----------------------------------", heap_num)); } #endif //_DEBUG free_space_bucket* free_space_buckets; seg_free_space* seg_free_space_array; ptrdiff_t free_space_bucket_count; ptrdiff_t free_space_item_count; int base_power2; int heap_num; #ifdef _DEBUG BOOL has_end_of_seg; #endif //_DEBUG public: seg_free_spaces (int h_number) { heap_num = h_number; } BOOL alloc () { size_t total_prealloc_size = MAX_NUM_BUCKETS * sizeof (free_space_bucket) + MAX_NUM_FREE_SPACES * sizeof (seg_free_space); free_space_buckets = (free_space_bucket*) new (nothrow) uint8_t[total_prealloc_size]; return (!!free_space_buckets); } // We take the ordered free space array we got from the 1st pass, // and feed the portion that we decided to use to this method, ie, // the largest item_count free spaces. void add_buckets (int base, size_t* ordered_free_spaces, int bucket_count, size_t item_count) { assert (free_space_buckets); assert (item_count <= (size_t)MAX_PTR); free_space_bucket_count = bucket_count; free_space_item_count = item_count; base_power2 = base; #ifdef _DEBUG has_end_of_seg = FALSE; #endif //_DEBUG ptrdiff_t total_item_count = 0; ptrdiff_t i = 0; seg_free_space_array = (seg_free_space*)(free_space_buckets + free_space_bucket_count); for (i = 0; i < (ptrdiff_t)item_count; i++) { seg_free_space_array[i].start = 0; seg_free_space_array[i].is_plug = FALSE; } for (i = 0; i < bucket_count; i++) { free_space_buckets[i].count_add = ordered_free_spaces[i]; free_space_buckets[i].count_fit = ordered_free_spaces[i]; free_space_buckets[i].free_space = &seg_free_space_array[total_item_count]; total_item_count += free_space_buckets[i].count_add; } assert (total_item_count == (ptrdiff_t)item_count); } // If we are adding a free space before a plug we pass the // mark stack position so we can update the length; we could // also be adding the free space after the last plug in which // case start is the segment which we'll need to update the // heap_segment_plan_allocated. void add (void* start, BOOL plug_p, BOOL first_p) { size_t size = (plug_p ? pinned_len ((mark*)start) : (heap_segment_committed ((heap_segment*)start) - heap_segment_plan_allocated ((heap_segment*)start))); if (plug_p) { dprintf (SEG_REUSE_LOG_1, ("[%d]Adding a free space before plug: %Id", heap_num, size)); } else { dprintf (SEG_REUSE_LOG_1, ("[%d]Adding a free space at end of seg: %Id", heap_num, size)); #ifdef _DEBUG has_end_of_seg = TRUE; #endif //_DEBUG } if (first_p) { size_t eph_gen_starts = gc_heap::eph_gen_starts_size; size -= eph_gen_starts; if (plug_p) { mark* m = (mark*)(start); pinned_len (m) -= eph_gen_starts; } else { heap_segment* seg = (heap_segment*)start; heap_segment_plan_allocated (seg) += eph_gen_starts; } } int bucket_power2 = index_of_highest_set_bit (size); if (bucket_power2 < base_power2) { return; } free_space_bucket* bucket = &free_space_buckets[bucket_power2 - base_power2]; seg_free_space* bucket_free_space = bucket->free_space; assert (plug_p || (!plug_p && bucket->count_add)); if (bucket->count_add == 0) { dprintf (SEG_REUSE_LOG_1, ("[%d]Already have enough of 2^%d", heap_num, bucket_power2)); return; } ptrdiff_t index = bucket->count_add - 1; dprintf (SEG_REUSE_LOG_1, ("[%d]Building free spaces: adding %Ix; len: %Id (2^%d)", heap_num, (plug_p ? (pinned_plug ((mark*)start) - pinned_len ((mark*)start)) : heap_segment_plan_allocated ((heap_segment*)start)), size, bucket_power2)); if (plug_p) { bucket_free_space[index].is_plug = TRUE; } bucket_free_space[index].start = start; bucket->count_add--; } #ifdef _DEBUG // Do a consistency check after all free spaces are added. void check() { ptrdiff_t i = 0; int end_of_seg_count = 0; for (i = 0; i < free_space_item_count; i++) { assert (seg_free_space_array[i].start); if (!(seg_free_space_array[i].is_plug)) { end_of_seg_count++; } } if (has_end_of_seg) { assert (end_of_seg_count == 1); } else { assert (end_of_seg_count == 0); } for (i = 0; i < free_space_bucket_count; i++) { assert (free_space_buckets[i].count_add == 0); } } #endif //_DEBUG uint8_t* fit (uint8_t* old_loc, size_t plug_size REQD_ALIGN_AND_OFFSET_DCL) { if (old_loc) { #ifdef SHORT_PLUGS assert (!is_plug_padded (old_loc)); #endif //SHORT_PLUGS assert (!node_realigned (old_loc)); } size_t saved_plug_size = plug_size; #ifdef FEATURE_STRUCTALIGN // BARTOKTODO (4841): this code path is disabled (see can_fit_all_blocks_p) until we take alignment requirements into account _ASSERTE(requiredAlignment == DATA_ALIGNMENT && false); #endif // FEATURE_STRUCTALIGN size_t plug_size_to_fit = plug_size; // best fit is only done for gen1 to gen2 and we do not pad in gen2. // however we must account for requirements of large alignment. // which may result in realignment padding. #ifdef RESPECT_LARGE_ALIGNMENT plug_size_to_fit += switch_alignment_size(FALSE); #endif //RESPECT_LARGE_ALIGNMENT int plug_power2 = index_of_highest_set_bit (round_up_power2 (plug_size_to_fit + Align(min_obj_size))); ptrdiff_t i; uint8_t* new_address = 0; if (plug_power2 < base_power2) { plug_power2 = base_power2; } int chosen_power2 = plug_power2 - base_power2; retry: for (i = chosen_power2; i < free_space_bucket_count; i++) { if (free_space_buckets[i].count_fit != 0) { break; } chosen_power2++; } dprintf (SEG_REUSE_LOG_1, ("[%d]Fitting plug len %Id (2^%d) using 2^%d free space", heap_num, plug_size, plug_power2, (chosen_power2 + base_power2))); assert (i < free_space_bucket_count); seg_free_space* bucket_free_space = free_space_buckets[chosen_power2].free_space; ptrdiff_t free_space_count = free_space_buckets[chosen_power2].count_fit; size_t new_free_space_size = 0; BOOL can_fit = FALSE; size_t pad = 0; for (i = 0; i < free_space_count; i++) { size_t free_space_size = 0; pad = 0; if (bucket_free_space[i].is_plug) { mark* m = (mark*)(bucket_free_space[i].start); uint8_t* plug_free_space_start = pinned_plug (m) - pinned_len (m); if (!((old_loc == 0) || same_large_alignment_p (old_loc, plug_free_space_start))) { pad = switch_alignment_size (FALSE); } plug_size = saved_plug_size + pad; free_space_size = pinned_len (m); new_address = pinned_plug (m) - pinned_len (m); if (free_space_size >= (plug_size + Align (min_obj_size)) || free_space_size == plug_size) { new_free_space_size = free_space_size - plug_size; pinned_len (m) = new_free_space_size; #ifdef SIMPLE_DPRINTF dprintf (SEG_REUSE_LOG_0, ("[%d]FP: 0x%Ix->0x%Ix(%Ix)(%Ix), [0x%Ix (2^%d) -> [0x%Ix (2^%d)", heap_num, old_loc, new_address, (plug_size - pad), pad, pinned_plug (m), index_of_highest_set_bit (free_space_size), (pinned_plug (m) - pinned_len (m)), index_of_highest_set_bit (new_free_space_size))); #endif //SIMPLE_DPRINTF if (pad != 0) { set_node_realigned (old_loc); } can_fit = TRUE; } } else { heap_segment* seg = (heap_segment*)(bucket_free_space[i].start); free_space_size = heap_segment_committed (seg) - heap_segment_plan_allocated (seg); if (!((old_loc == 0) || same_large_alignment_p (old_loc, heap_segment_plan_allocated (seg)))) { pad = switch_alignment_size (FALSE); } plug_size = saved_plug_size + pad; if (free_space_size >= (plug_size + Align (min_obj_size)) || free_space_size == plug_size) { new_address = heap_segment_plan_allocated (seg); new_free_space_size = free_space_size - plug_size; heap_segment_plan_allocated (seg) = new_address + plug_size; #ifdef SIMPLE_DPRINTF dprintf (SEG_REUSE_LOG_0, ("[%d]FS: 0x%Ix-> 0x%Ix(%Ix) (2^%d) -> 0x%Ix (2^%d)", heap_num, old_loc, new_address, (plug_size - pad), index_of_highest_set_bit (free_space_size), heap_segment_plan_allocated (seg), index_of_highest_set_bit (new_free_space_size))); #endif //SIMPLE_DPRINTF if (pad != 0) set_node_realigned (old_loc); can_fit = TRUE; } } if (can_fit) { break; } } if (!can_fit) { assert (chosen_power2 == 0); chosen_power2 = 1; goto retry; } new_address += pad; assert ((chosen_power2 && (i == 0)) || ((!chosen_power2) && (i < free_space_count))); int new_bucket_power2 = index_of_highest_set_bit (new_free_space_size); if (new_bucket_power2 < base_power2) { new_bucket_power2 = base_power2; } move_bucket (chosen_power2, new_bucket_power2 - base_power2); //dump(); return new_address; } void cleanup () { if (free_space_buckets) { delete [] free_space_buckets; } if (seg_free_space_array) { delete [] seg_free_space_array; } } }; #define marked(i) header(i)->IsMarked() #define set_marked(i) header(i)->SetMarked() #define clear_marked(i) header(i)->ClearMarked() #define pinned(i) header(i)->IsPinned() #define set_pinned(i) header(i)->SetPinned() #define clear_pinned(i) header(i)->GetHeader()->ClrGCBit(); inline size_t my_get_size (Object* ob) { MethodTable* mT = header(ob)->GetMethodTable(); return (mT->GetBaseSize() + (mT->HasComponentSize() ? ((size_t)((CObjectHeader*)ob)->GetNumComponents() * mT->RawGetComponentSize()) : 0)); } //#define size(i) header(i)->GetSize() #define size(i) my_get_size (header(i)) #define contain_pointers(i) header(i)->ContainsPointers() #ifdef COLLECTIBLE_CLASS #define contain_pointers_or_collectible(i) header(i)->ContainsPointersOrCollectible() #define get_class_object(i) GCToEEInterface::GetLoaderAllocatorObjectForGC((Object *)i) #define is_collectible(i) method_table(i)->Collectible() #else //COLLECTIBLE_CLASS #define contain_pointers_or_collectible(i) header(i)->ContainsPointers() #endif //COLLECTIBLE_CLASS #ifdef BACKGROUND_GC inline void gc_heap::seg_clear_mark_array_bits_soh (heap_segment* seg) { uint8_t* range_beg = 0; uint8_t* range_end = 0; if (bgc_mark_array_range (seg, FALSE, &range_beg, &range_end)) { clear_mark_array (range_beg, align_on_mark_word (range_end), FALSE #ifdef FEATURE_BASICFREEZE , TRUE #endif // FEATURE_BASICFREEZE ); } } void gc_heap::clear_batch_mark_array_bits (uint8_t* start, uint8_t* end) { if ((start < background_saved_highest_address) && (end > background_saved_lowest_address)) { start = max (start, background_saved_lowest_address); end = min (end, background_saved_highest_address); size_t start_mark_bit = mark_bit_of (start); size_t end_mark_bit = mark_bit_of (end); unsigned int startbit = mark_bit_bit (start_mark_bit); unsigned int endbit = mark_bit_bit (end_mark_bit); size_t startwrd = mark_bit_word (start_mark_bit); size_t endwrd = mark_bit_word (end_mark_bit); dprintf (3, ("Clearing all mark array bits between [%Ix:%Ix-[%Ix:%Ix", (size_t)start, (size_t)start_mark_bit, (size_t)end, (size_t)end_mark_bit)); unsigned int firstwrd = lowbits (~0, startbit); unsigned int lastwrd = highbits (~0, endbit); if (startwrd == endwrd) { unsigned int wrd = firstwrd | lastwrd; mark_array[startwrd] &= wrd; return; } // clear the first mark word. if (startbit) { mark_array[startwrd] &= firstwrd; startwrd++; } for (size_t wrdtmp = startwrd; wrdtmp < endwrd; wrdtmp++) { mark_array[wrdtmp] = 0; } // clear the last mark word. if (endbit) { mark_array[endwrd] &= lastwrd; } } } void gc_heap::bgc_clear_batch_mark_array_bits (uint8_t* start, uint8_t* end) { if ((start < background_saved_highest_address) && (end > background_saved_lowest_address)) { start = max (start, background_saved_lowest_address); end = min (end, background_saved_highest_address); clear_batch_mark_array_bits (start, end); } } #endif //BACKGROUND_GC inline BOOL gc_heap::is_mark_set (uint8_t* o) { return marked (o); } #if defined (_MSC_VER) && defined (TARGET_X86) #pragma optimize("y", on) // Small critical routines, don't put in EBP frame #endif //_MSC_VER && TARGET_X86 // return the generation number of an object. // It is assumed that the object is valid. // Note that this will return max_generation for UOH objects int gc_heap::object_gennum (uint8_t* o) { #ifdef USE_REGIONS return get_region_gen_num (o); #else if (in_range_for_segment (o, ephemeral_heap_segment) && (o >= generation_allocation_start (generation_of (max_generation - 1)))) { // in an ephemeral generation. for ( int i = 0; i < max_generation-1; i++) { if ((o >= generation_allocation_start (generation_of (i)))) return i; } return max_generation-1; } else { return max_generation; } #endif //USE_REGIONS } int gc_heap::object_gennum_plan (uint8_t* o) { #ifdef USE_REGIONS return get_region_plan_gen_num (o); #else if (in_range_for_segment (o, ephemeral_heap_segment)) { for (int i = 0; i < ephemeral_generation_count; i++) { uint8_t* plan_start = generation_plan_allocation_start (generation_of (i)); if (plan_start && (o >= plan_start)) { return i; } } } return max_generation; #endif //USE_REGIONS } #if defined(_MSC_VER) && defined(TARGET_X86) #pragma optimize("", on) // Go back to command line default optimizations #endif //_MSC_VER && TARGET_X86 #ifdef USE_REGIONS void get_initial_region(int gen, int hn, uint8_t** region_start, uint8_t** region_end) { *region_start = initial_regions[hn][gen][0]; *region_end = initial_regions[hn][gen][1]; } bool gc_heap::initial_make_soh_regions (gc_heap* hp) { uint8_t* region_start; uint8_t* region_end; uint32_t hn = 0; #ifdef MULTIPLE_HEAPS hn = hp->heap_number; #endif //MULTIPLE_HEAPS for (int i = max_generation; i >= 0; i--) { get_initial_region(i, hn, ®ion_start, ®ion_end); size_t region_size = region_end - region_start; heap_segment* current_region = make_heap_segment (region_start, region_size, hp, i); if (current_region == nullptr) { return false; } uint8_t* gen_start = heap_segment_mem (current_region); make_generation (i, current_region, gen_start); if (i == 0) { ephemeral_heap_segment = current_region; alloc_allocated = heap_segment_allocated (current_region); } } for (int i = max_generation; i >= 0; i--) { dprintf (REGIONS_LOG, ("h%d gen%d alloc seg is %Ix, start seg is %Ix (%Ix-%Ix)", heap_number, i, generation_allocation_segment (generation_of (i)), generation_start_segment (generation_of (i)), heap_segment_mem (generation_start_segment (generation_of (i))), heap_segment_allocated (generation_start_segment (generation_of (i))))); } return true; } bool gc_heap::initial_make_uoh_regions (int gen, gc_heap* hp) { uint8_t* region_start; uint8_t* region_end; uint32_t hn = 0; #ifdef MULTIPLE_HEAPS hn = hp->heap_number; #endif //MULTIPLE_HEAPS get_initial_region(gen, hn, ®ion_start, ®ion_end); size_t region_size = region_end - region_start; heap_segment* uoh_region = make_heap_segment (region_start, region_size, hp, gen); if (uoh_region == nullptr) { return false; } uoh_region->flags |= (gen == loh_generation) ? heap_segment_flags_loh : heap_segment_flags_poh; uint8_t* gen_start = heap_segment_mem (uoh_region); make_generation (gen, uoh_region, gen_start); return true; } void gc_heap::clear_region_info (heap_segment* region) { if (!heap_segment_uoh_p (region)) { //cleanup the brick table back to the empty value clear_brick_table (heap_segment_mem (region), heap_segment_reserved (region)); } // we should really clear cards as well!! #ifdef BACKGROUND_GC ::record_changed_seg ((uint8_t*)region, heap_segment_reserved (region), settings.gc_index, current_bgc_state, seg_deleted); if (dt_high_memory_load_p()) { decommit_mark_array_by_seg (region); } #endif //BACKGROUND_GC } // Note that returning a region to free does not decommit. // REGIONS PERF TODO: should decommit if needed. void gc_heap::return_free_region (heap_segment* region) { clear_region_info (region); region_free_list::add_region (region, free_regions); uint8_t* region_start = get_region_start (region); uint8_t* region_end = heap_segment_reserved (region); int num_basic_regions = (int)((region_end - region_start) >> min_segment_size_shr); dprintf (REGIONS_LOG, ("RETURNING region %Ix (%d basic regions) to free", heap_segment_mem (region), num_basic_regions)); for (int i = 0; i < num_basic_regions; i++) { uint8_t* basic_region_start = region_start + ((size_t)i << min_segment_size_shr); heap_segment* basic_region = get_region_info (basic_region_start); heap_segment_allocated (basic_region) = 0; #ifdef MULTIPLE_HEAPS heap_segment_heap (basic_region) = 0; #endif //MULTIPLE_HEAPS // I'm intentionally not resetting gen_num/plan_gen_num which will show us // which gen/plan gen this region was and that's useful for debugging. } } // USE_REGIONS TODO: SOH should be able to get a large region and split it up into basic regions // if needed. // USE_REGIONS TODO: In Server GC we should allow to get a free region from another heap. heap_segment* gc_heap::get_free_region (int gen_number, size_t size) { heap_segment* region = 0; // TODO: the update to committed_in_free is incorrect - we'd need synchorization 'cause a thread // could be getting a small and another one could be getting a large region at the same time. // This is only used for recording. if (gen_number <= max_generation) { assert (size == 0); region = free_regions[basic_free_region].unlink_region_front(); } else { const size_t LARGE_REGION_SIZE = global_region_allocator.get_large_region_alignment(); assert (size >= LARGE_REGION_SIZE); if (size == LARGE_REGION_SIZE) { // get it from the local list of large free regions if possible region = free_regions[large_free_region].unlink_region_front(); } else { // get it from the local list of huge free regions if possible region = free_regions[huge_free_region].unlink_smallest_region (size); if (region == nullptr) { ASSERT_HOLDING_SPIN_LOCK(&gc_lock); // get it from the global list of huge free regions region = global_free_huge_regions.unlink_smallest_region (size); } } } if (region) { uint8_t* region_start = get_region_start (region); uint8_t* region_end = heap_segment_reserved (region); init_heap_segment (region, __this, region_start, (region_end - region_start), gen_number); dprintf (REGIONS_LOG, ("h%d GFR get region %Ix (%Ix-%Ix) for gen%d", heap_number, (size_t)region, region_start, region_end, gen_number)); } else { // TODO: We should keep enough reserve in the free regions so we don't get OOM when // this is called within GC when we sweep. region = allocate_new_region (__this, gen_number, (gen_number > max_generation), size); } if (region) { if (!init_table_for_region (gen_number, region)) { region = 0; } } return region; } // Note that this gets the basic region index for obj. If the obj is in a large region, // this region may not be the start of it. heap_segment* gc_heap::region_of (uint8_t* obj) { size_t index = (size_t)obj >> gc_heap::min_segment_size_shr; seg_mapping* entry = &seg_mapping_table[index]; return (heap_segment*)entry; } heap_segment* gc_heap::get_region_at_index (size_t index) { index += (size_t)g_gc_lowest_address >> gc_heap::min_segment_size_shr; return (heap_segment*)(&seg_mapping_table[index]); } // For debugging purposes to check that a region looks sane and // do some logging. This was useful to sprinkle in various places // where we were threading regions. void gc_heap::check_seg_gen_num (heap_segment* seg) { #ifdef _DEBUG uint8_t* mem = heap_segment_mem (seg); if ((mem < g_gc_lowest_address) || (mem >= g_gc_highest_address)) { GCToOSInterface::DebugBreak(); } int alloc_seg_gen_num = get_region_gen_num (mem); int alloc_seg_plan_gen_num = get_region_plan_gen_num (mem); dprintf (3, ("seg %Ix->%Ix, num %d, %d", (size_t)seg, mem, alloc_seg_gen_num, alloc_seg_plan_gen_num)); #endif //_DEBUG } int gc_heap::get_region_gen_num (heap_segment* region) { return heap_segment_gen_num (region); } int gc_heap::get_region_gen_num (uint8_t* obj) { return heap_segment_gen_num (region_of (obj)); } int gc_heap::get_region_plan_gen_num (uint8_t* obj) { return heap_segment_plan_gen_num (region_of (obj)); } bool gc_heap::is_region_demoted (uint8_t* obj) { return heap_segment_demoted_p (region_of (obj)); } inline void gc_heap::set_region_gen_num (heap_segment* region, int gen_num) { assert (gen_num < (1 << (sizeof (uint8_t) * 8))); assert (gen_num >= 0); heap_segment_gen_num (region) = (uint8_t)gen_num; } inline void gc_heap::set_region_plan_gen_num (heap_segment* region, int plan_gen_num) { int gen_num = heap_segment_gen_num (region); int supposed_plan_gen_num = get_plan_gen_num (gen_num); dprintf (REGIONS_LOG, ("h%d setting plan gen on %Ix->%Ix(was gen%d) to %d(should be: %d) %s", heap_number, (size_t)region, heap_segment_mem (region), gen_num, plan_gen_num, supposed_plan_gen_num, ((plan_gen_num < supposed_plan_gen_num) ? "DEMOTED" : "ND"))); if (plan_gen_num < supposed_plan_gen_num) { if (!settings.demotion) { settings.demotion = TRUE; } get_gc_data_per_heap()->set_mechanism_bit (gc_demotion_bit); region->flags |= heap_segment_flags_demoted; } else { region->flags &= ~heap_segment_flags_demoted; } heap_segment_plan_gen_num (region) = plan_gen_num; } inline void gc_heap::set_region_plan_gen_num_sip (heap_segment* region, int plan_gen_num) { if (!heap_segment_swept_in_plan (region)) { set_region_plan_gen_num (region, plan_gen_num); } } #endif //USE_REGIONS int gc_heap::get_plan_gen_num (int gen_number) { return ((settings.promotion) ? min ((gen_number + 1), max_generation) : gen_number); } uint8_t* gc_heap::get_uoh_start_object (heap_segment* region, generation* gen) { #ifdef USE_REGIONS uint8_t* o = heap_segment_mem (region); #else uint8_t* o = generation_allocation_start (gen); assert(((CObjectHeader*)o)->IsFree()); size_t s = Align (size (o), get_alignment_constant (FALSE)); assert (s == AlignQword (min_obj_size)); //Skip the generation gap object o += s; #endif //USE_REGIONS return o; } uint8_t* gc_heap::get_soh_start_object (heap_segment* region, generation* gen) { #ifdef USE_REGIONS uint8_t* o = heap_segment_mem (region); #else uint8_t* o = generation_allocation_start (gen); #endif //USE_REGIONS return o; } size_t gc_heap::get_soh_start_obj_len (uint8_t* start_obj) { #ifdef USE_REGIONS return 0; #else return Align (size (start_obj)); #endif //USE_REGIONS } void gc_heap::clear_gen1_cards() { #if defined(_DEBUG) && !defined(USE_REGIONS) for (int x = 0; x <= max_generation; x++) { assert (generation_allocation_start (generation_of (x))); } #endif //_DEBUG && !USE_REGIONS if (!settings.demotion && settings.promotion) { //clear card for generation 1. generation 0 is empty #ifdef USE_REGIONS heap_segment* region = generation_start_segment (generation_of (1)); while (region) { clear_card_for_addresses (heap_segment_mem (region), heap_segment_allocated (region)); region = heap_segment_next (region); } #else //USE_REGIONS clear_card_for_addresses ( generation_allocation_start (generation_of (1)), generation_allocation_start (generation_of (0))); #endif //USE_REGIONS #ifdef _DEBUG uint8_t* start = get_soh_start_object (ephemeral_heap_segment, youngest_generation); assert (heap_segment_allocated (ephemeral_heap_segment) == (start + get_soh_start_obj_len (start))); #endif //_DEBUG } } heap_segment* gc_heap::make_heap_segment (uint8_t* new_pages, size_t size, gc_heap* hp, int gen_num) { gc_oh_num oh = gen_to_oh (gen_num); size_t initial_commit = SEGMENT_INITIAL_COMMIT; int h_number = #ifdef MULTIPLE_HEAPS hp->heap_number; #else 0; #endif //MULTIPLE_HEAPS if (!virtual_commit (new_pages, initial_commit, oh, h_number)) { return 0; } #ifdef USE_REGIONS dprintf (REGIONS_LOG, ("Making region %Ix->%Ix(%Idmb)", new_pages, (new_pages + size), (size / 1024 / 1024))); heap_segment* new_segment = get_region_info (new_pages); uint8_t* start = new_pages + sizeof (aligned_plug_and_gap); #else heap_segment* new_segment = (heap_segment*)new_pages; uint8_t* start = new_pages + segment_info_size; #endif //USE_REGIONS heap_segment_mem (new_segment) = start; heap_segment_used (new_segment) = start; heap_segment_reserved (new_segment) = new_pages + size; heap_segment_committed (new_segment) = (use_large_pages_p ? heap_segment_reserved(new_segment) : (new_pages + initial_commit)); init_heap_segment (new_segment, hp #ifdef USE_REGIONS , new_pages, size, gen_num #endif //USE_REGIONS ); dprintf (2, ("Creating heap segment %Ix", (size_t)new_segment)); return new_segment; } void gc_heap::init_heap_segment (heap_segment* seg, gc_heap* hp #ifdef USE_REGIONS , uint8_t* start, size_t size, int gen_num #endif //USE_REGIONS ) { seg->flags = 0; heap_segment_next (seg) = 0; heap_segment_plan_allocated (seg) = heap_segment_mem (seg); heap_segment_allocated (seg) = heap_segment_mem (seg); heap_segment_saved_allocated (seg) = heap_segment_mem (seg); #ifdef BACKGROUND_GC heap_segment_background_allocated (seg) = 0; heap_segment_saved_bg_allocated (seg) = 0; #endif //BACKGROUND_GC #ifdef MULTIPLE_HEAPS heap_segment_heap (seg) = hp; #endif //MULTIPLE_HEAPS #ifdef USE_REGIONS int gen_num_for_region = min (gen_num, max_generation); heap_segment_gen_num (seg) = (uint8_t)gen_num_for_region; heap_segment_plan_gen_num (seg) = gen_num_for_region; heap_segment_swept_in_plan (seg) = false; #endif //USE_REGIONS #ifdef USE_REGIONS int num_basic_regions = (int)(size >> min_segment_size_shr); size_t basic_region_size = (size_t)1 << min_segment_size_shr; dprintf (REGIONS_LOG, ("this region contains %d basic regions", num_basic_regions)); if (num_basic_regions > 1) { for (int i = 1; i < num_basic_regions; i++) { uint8_t* basic_region_start = start + (i * basic_region_size); heap_segment* basic_region = get_region_info (basic_region_start); heap_segment_allocated (basic_region) = (uint8_t*)(ptrdiff_t)-i; dprintf (REGIONS_LOG, ("Initing basic region %Ix->%Ix(%Idmb) alloc to %Ix", basic_region_start, (basic_region_start + basic_region_size), (size_t)(basic_region_size / 1024 / 1024), heap_segment_allocated (basic_region))); heap_segment_gen_num (basic_region) = (uint8_t)gen_num_for_region; heap_segment_plan_gen_num (basic_region) = gen_num_for_region; #ifdef MULTIPLE_HEAPS heap_segment_heap (basic_region) = hp; #endif //MULTIPLE_HEAPS } } #endif //USE_REGIONS } //Releases the segment to the OS. // this is always called on one thread only so calling seg_table->remove is fine. void gc_heap::delete_heap_segment (heap_segment* seg, BOOL consider_hoarding) { if (!heap_segment_uoh_p (seg)) { //cleanup the brick table back to the empty value clear_brick_table (heap_segment_mem (seg), heap_segment_reserved (seg)); } #ifdef USE_REGIONS return_free_region (seg); #else // USE_REGIONS if (consider_hoarding) { assert ((heap_segment_mem (seg) - (uint8_t*)seg) <= ptrdiff_t(2*OS_PAGE_SIZE)); size_t ss = (size_t) (heap_segment_reserved (seg) - (uint8_t*)seg); //Don't keep the big ones. if (ss <= INITIAL_ALLOC) { dprintf (2, ("Hoarding segment %Ix", (size_t)seg)); #ifdef BACKGROUND_GC // We don't need to clear the decommitted flag because when this segment is used // for a new segment the flags will be cleared. if (!heap_segment_decommitted_p (seg)) #endif //BACKGROUND_GC { decommit_heap_segment (seg); } seg_mapping_table_remove_segment (seg); heap_segment_next (seg) = segment_standby_list; segment_standby_list = seg; seg = 0; } } if (seg != 0) { dprintf (2, ("h%d: del seg: [%Ix, %Ix[", heap_number, (size_t)seg, (size_t)(heap_segment_reserved (seg)))); #ifdef BACKGROUND_GC ::record_changed_seg ((uint8_t*)seg, heap_segment_reserved (seg), settings.gc_index, current_bgc_state, seg_deleted); decommit_mark_array_by_seg (seg); #endif //BACKGROUND_GC seg_mapping_table_remove_segment (seg); release_segment (seg); } #endif //USE_REGIONS } //resets the pages beyond allocates size so they won't be swapped out and back in void gc_heap::reset_heap_segment_pages (heap_segment* seg) { size_t page_start = align_on_page ((size_t)heap_segment_allocated (seg)); size_t size = (size_t)heap_segment_committed (seg) - page_start; if (size != 0) GCToOSInterface::VirtualReset((void*)page_start, size, false /* unlock */); } void gc_heap::decommit_heap_segment_pages (heap_segment* seg, size_t extra_space) { if (use_large_pages_p) return; uint8_t* page_start = align_on_page (heap_segment_allocated(seg)); size_t size = heap_segment_committed (seg) - page_start; extra_space = align_on_page (extra_space); if (size >= max ((extra_space + 2*OS_PAGE_SIZE), MIN_DECOMMIT_SIZE)) { page_start += max(extra_space, 32*OS_PAGE_SIZE); decommit_heap_segment_pages_worker (seg, page_start); } } size_t gc_heap::decommit_heap_segment_pages_worker (heap_segment* seg, uint8_t* new_committed) { #ifdef USE_REGIONS if (!dt_high_memory_load_p()) { return 0; } #endif assert (!use_large_pages_p); uint8_t* page_start = align_on_page (new_committed); size_t size = heap_segment_committed (seg) - page_start; if (size > 0) { bool decommit_succeeded_p = virtual_decommit (page_start, size, heap_segment_oh (seg), heap_number); if (decommit_succeeded_p) { dprintf (3, ("Decommitting heap segment [%Ix, %Ix[(%d)", (size_t)page_start, (size_t)(page_start + size), size)); heap_segment_committed (seg) = page_start; if (heap_segment_used (seg) > heap_segment_committed (seg)) { heap_segment_used (seg) = heap_segment_committed (seg); } } else { dprintf (3, ("Decommitting heap segment failed")); } } return size; } //decommit all pages except one or 2 void gc_heap::decommit_heap_segment (heap_segment* seg) { #ifdef USE_REGIONS if (!dt_high_memory_load_p()) { return; } #endif uint8_t* page_start = align_on_page (heap_segment_mem (seg)); dprintf (3, ("Decommitting heap segment %Ix(%Ix)", (size_t)seg, heap_segment_mem (seg))); #ifdef BACKGROUND_GC page_start += OS_PAGE_SIZE; #endif //BACKGROUND_GC size_t size = heap_segment_committed (seg) - page_start; bool decommit_succeeded_p = virtual_decommit (page_start, size, heap_segment_oh (seg), heap_number); if (decommit_succeeded_p) { //re-init the segment object heap_segment_committed (seg) = page_start; if (heap_segment_used (seg) > heap_segment_committed (seg)) { heap_segment_used (seg) = heap_segment_committed (seg); } } } void gc_heap::clear_gen0_bricks() { if (!gen0_bricks_cleared) { gen0_bricks_cleared = TRUE; //initialize brick table for gen 0 #ifdef USE_REGIONS heap_segment* gen0_region = generation_start_segment (generation_of (0)); while (gen0_region) { uint8_t* clear_start = heap_segment_mem (gen0_region); #else heap_segment* gen0_region = ephemeral_heap_segment; uint8_t* clear_start = generation_allocation_start (generation_of (0)); { #endif //USE_REGIONS for (size_t b = brick_of (clear_start); b < brick_of (align_on_brick (heap_segment_allocated (gen0_region))); b++) { set_brick (b, -1); } #ifdef USE_REGIONS gen0_region = heap_segment_next (gen0_region); #endif //USE_REGIONS } } } #ifdef BACKGROUND_GC void gc_heap::rearrange_small_heap_segments() { heap_segment* seg = freeable_soh_segment; while (seg) { heap_segment* next_seg = heap_segment_next (seg); // TODO: we need to consider hoarding here. delete_heap_segment (seg, FALSE); seg = next_seg; } freeable_soh_segment = 0; } #endif //BACKGROUND_GC void gc_heap::rearrange_uoh_segments() { dprintf (2, ("deleting empty large segments")); heap_segment* seg = freeable_uoh_segment; while (seg) { heap_segment* next_seg = heap_segment_next (seg); delete_heap_segment (seg, GCConfig::GetRetainVM()); seg = next_seg; } freeable_uoh_segment = 0; } #ifndef USE_REGIONS void gc_heap::rearrange_heap_segments(BOOL compacting) { heap_segment* seg = generation_start_segment (generation_of (max_generation)); heap_segment* prev_seg = 0; heap_segment* next_seg = 0; while (seg) { next_seg = heap_segment_next (seg); //link ephemeral segment when expanding if ((next_seg == 0) && (seg != ephemeral_heap_segment)) { seg->next = ephemeral_heap_segment; next_seg = heap_segment_next (seg); } //re-used expanded heap segment if ((seg == ephemeral_heap_segment) && next_seg) { heap_segment_next (prev_seg) = next_seg; heap_segment_next (seg) = 0; } else { uint8_t* end_segment = (compacting ? heap_segment_plan_allocated (seg) : heap_segment_allocated (seg)); // check if the segment was reached by allocation if ((end_segment == heap_segment_mem (seg))&& !heap_segment_read_only_p (seg)) { //if not, unthread and delete assert (prev_seg); assert (seg != ephemeral_heap_segment); heap_segment_next (prev_seg) = next_seg; delete_heap_segment (seg, GCConfig::GetRetainVM()); dprintf (2, ("Deleting heap segment %Ix", (size_t)seg)); } else { if (!heap_segment_read_only_p (seg)) { if (compacting) { heap_segment_allocated (seg) = heap_segment_plan_allocated (seg); } // reset the pages between allocated and committed. if (seg != ephemeral_heap_segment) { decommit_heap_segment_pages (seg, 0); } } prev_seg = seg; } } seg = next_seg; } } #endif //!USE_REGIONS #if defined(USE_REGIONS) // trim down the list of free regions pointed at by free_list down to target_count, moving the extra ones to surplus_list static void remove_surplus_regions (region_free_list* free_list, region_free_list* surplus_list, size_t target_count) { while (free_list->get_num_free_regions() > target_count) { // remove one region from the heap's free list heap_segment* region = free_list->unlink_region_front(); // and put it on the surplus list surplus_list->add_region_front (region); } } // add regions from surplus_list to free_list, trying to reach target_count static int64_t add_regions (region_free_list* free_list, region_free_list* surplus_list, size_t target_count) { int64_t added_count = 0; while (free_list->get_num_free_regions() < target_count) { if (surplus_list->get_num_free_regions() == 0) break; added_count++; // remove one region from the surplus list heap_segment* region = surplus_list->unlink_region_front(); // and put it on the heap's free list free_list->add_region_front (region); } return added_count; } region_free_list::region_free_list() : num_free_regions (0), size_free_regions (0), size_committed_in_free_regions (0), num_free_regions_added (0), num_free_regions_removed (0), head_free_region (nullptr), tail_free_region (nullptr) { } void region_free_list::verify (bool empty_p) { #ifdef _DEBUG assert ((num_free_regions == 0) == empty_p); assert ((size_free_regions == 0) == empty_p); assert ((size_committed_in_free_regions == 0) == empty_p); assert ((head_free_region == nullptr) == empty_p); assert ((tail_free_region == nullptr) == empty_p); assert (num_free_regions == (num_free_regions_added - num_free_regions_removed)); if (!empty_p) { assert (heap_segment_next (tail_free_region) == nullptr); assert (heap_segment_prev_free_region (head_free_region) == nullptr); size_t actual_count = 0; heap_segment* last_region = nullptr; for (heap_segment* region = head_free_region; region != nullptr; region = heap_segment_next(region)) { last_region = region; actual_count++; } assert (num_free_regions == actual_count); assert (last_region == tail_free_region); heap_segment* first_region = nullptr; for (heap_segment* region = tail_free_region; region != nullptr; region = heap_segment_prev_free_region(region)) { first_region = region; actual_count--; } assert (actual_count == 0); assert (head_free_region == first_region); } #endif } void region_free_list::reset() { num_free_regions = 0; size_free_regions = 0; size_committed_in_free_regions = 0; head_free_region = nullptr; tail_free_region = nullptr; } void region_free_list::add_region_front (heap_segment* region) { assert (heap_segment_containing_free_list (region) == nullptr); heap_segment_containing_free_list(region) = this; if (head_free_region != nullptr) { heap_segment_prev_free_region(head_free_region) = region; assert (tail_free_region != nullptr); } else { tail_free_region = region; } heap_segment_next (region) = head_free_region; head_free_region = region; heap_segment_prev_free_region (region) = nullptr; num_free_regions++; num_free_regions_added++; size_t region_size = get_region_size (region); size_free_regions += region_size; size_t region_committed_size = get_region_committed_size (region); size_committed_in_free_regions += region_committed_size; verify (false); } heap_segment* region_free_list::unlink_region_front() { heap_segment* region = head_free_region; if (region != nullptr) { assert (heap_segment_containing_free_list (region) == this); unlink_region (region); } return region; } void region_free_list::unlink_region (heap_segment* region) { region_free_list* rfl = heap_segment_containing_free_list (region); rfl->verify (false); heap_segment* prev = heap_segment_prev_free_region (region); heap_segment* next = heap_segment_next (region); if (prev != nullptr) { assert (region != rfl->head_free_region); assert (heap_segment_next (prev) == region); heap_segment_next (prev) = next; } else { assert (region == rfl->head_free_region); rfl->head_free_region = next; } if (next != nullptr) { assert (region != rfl->tail_free_region); assert (heap_segment_prev_free_region (next) == region); heap_segment_prev_free_region (next) = prev; } else { assert (region == rfl->tail_free_region); rfl->tail_free_region = prev; } heap_segment_containing_free_list (region) = nullptr; rfl->num_free_regions--; rfl->num_free_regions_removed++; size_t region_size = get_region_size (region); assert (rfl->size_free_regions >= region_size); rfl->size_free_regions -= region_size; size_t region_committed_size = get_region_committed_size (region); assert (rfl->size_committed_in_free_regions >= region_committed_size); rfl->size_committed_in_free_regions -= region_committed_size; } free_region_kind region_free_list::get_region_kind (heap_segment* region) { const size_t BASIC_REGION_SIZE = global_region_allocator.get_region_alignment(); const size_t LARGE_REGION_SIZE = global_region_allocator.get_large_region_alignment(); size_t region_size = get_region_size (region); if (region_size == BASIC_REGION_SIZE) return basic_free_region; else if (region_size == LARGE_REGION_SIZE) return large_free_region; else { assert(region_size > LARGE_REGION_SIZE); return huge_free_region; } } heap_segment* region_free_list::unlink_smallest_region (size_t minimum_size) { verify (num_free_regions == 0); // look for the smallest region that is large enough heap_segment* smallest_region = nullptr; size_t smallest_size = (size_t)-1; for (heap_segment* region = head_free_region; region != nullptr; region = heap_segment_next (region)) { uint8_t* region_start = get_region_start(region); uint8_t* region_end = heap_segment_reserved(region); size_t region_size = get_region_size (region); const size_t LARGE_REGION_SIZE = global_region_allocator.get_large_region_alignment(); assert (region_size >= LARGE_REGION_SIZE * 2); if (region_size >= minimum_size) { // found a region that is large enough - see if it's smaller than the smallest so far if (smallest_size > region_size) { smallest_size = region_size; smallest_region = region; } // is the region's size equal to the minimum on this list? if (region_size == LARGE_REGION_SIZE * 2) { // we won't find a smaller one on this list assert (region == smallest_region); break; } } } if (smallest_region != nullptr) { unlink_region (smallest_region); dprintf(REGIONS_LOG, ("get %Ix-%Ix-%Ix", heap_segment_mem(smallest_region), heap_segment_committed(smallest_region), heap_segment_used(smallest_region))); } return smallest_region; } void region_free_list::transfer_regions (region_free_list* from) { this->verify (this->num_free_regions == 0); from->verify (from->num_free_regions == 0); if (from->num_free_regions == 0) { // the from list is empty return; } if (num_free_regions == 0) { // this list is empty head_free_region = from->head_free_region; tail_free_region = from->tail_free_region; } else { // both free lists are non-empty // attach the from list at the tail heap_segment* this_tail = tail_free_region; heap_segment* from_head = from->head_free_region; heap_segment_next (this_tail) = from_head; heap_segment_prev_free_region (from_head) = this_tail; tail_free_region = from->tail_free_region; } for (heap_segment* region = from->head_free_region; region != nullptr; region = heap_segment_next (region)) { heap_segment_containing_free_list (region) = this; } num_free_regions += from->num_free_regions; num_free_regions_added += from->num_free_regions; size_free_regions += from->size_free_regions; size_committed_in_free_regions += from->size_committed_in_free_regions; from->num_free_regions_removed += from->num_free_regions; from->reset(); verify (false); } size_t region_free_list::get_num_free_regions() { #ifdef _DEBUG verify (num_free_regions == 0); #endif //_DEBUG return num_free_regions; } void region_free_list::add_region (heap_segment* region, region_free_list to_free_list[count_free_region_kinds]) { free_region_kind kind = get_region_kind (region); to_free_list[kind].add_region_front (region); } #endif //USE_REGIONS void gc_heap::distribute_free_regions() { #ifdef USE_REGIONS const int kind_count = large_free_region + 1; // first step: accumulate the number of free regions and the budget over all heaps // and move huge regions to global free list size_t total_num_free_regions[kind_count] = { 0, 0 }; size_t total_budget[kind_count] = { 0, 0 }; #ifdef MULTIPLE_HEAPS for (int i = 0; i < n_heaps; i++) { gc_heap* hp = g_heaps[i]; #else //MULTIPLE_HEAPS { gc_heap* hp = pGenGCHeap; // just to reduce the number of #ifdefs in the code below const int i = 0; #endif //MULTIPLE_HEAPS for (int kind = basic_free_region; kind < kind_count; kind++) { total_num_free_regions[kind] += hp->free_regions[kind].get_num_free_regions(); } global_free_huge_regions.transfer_regions (&hp->free_regions[huge_free_region]); for (int gen = soh_gen0; gen < total_generation_count; gen++) { ptrdiff_t budget_gen = hp->estimate_gen_growth (gen); assert (budget_gen >= 0); total_budget[gen >= loh_generation] += budget_gen; } } global_free_huge_regions.transfer_regions (&global_regions_to_decommit[huge_free_region]); size_t free_space_in_huge_regions = global_free_huge_regions.get_size_free_regions(); size_t region_size[kind_count] = { global_region_allocator.get_region_alignment(), global_region_allocator.get_large_region_alignment() }; region_free_list surplus_regions[kind_count]; ptrdiff_t num_regions_to_decommit[kind_count]; size_t total_budget_in_region_units[kind_count]; size_t target_num_regions[kind_count]; int region_factor[kind_count] = { 1, LARGE_REGION_FACTOR }; #ifdef TRACE_GC const char* kind_name[count_free_region_kinds] = { "basic", "large", "huge"}; #endif // TRACE_GC #ifndef MULTIPLE_HEAPS // just to reduce the number of #ifdefs in the code below const int n_heaps = 1; #endif //!MULTIPLE_HEAPS size_t num_huge_region_units_to_consider[kind_count] = { 0, free_space_in_huge_regions / region_size[large_free_region] }; for (int kind = basic_free_region; kind < kind_count; kind++) { // we may still have regions left on the regions_to_decommit list - // use these to fill the budget as well surplus_regions[kind].transfer_regions (&global_regions_to_decommit[kind]); num_regions_to_decommit[kind] = surplus_regions[kind].get_num_free_regions(); total_budget_in_region_units[kind] = (total_budget[kind] + (region_size[kind] - 1)) / region_size[kind]; dprintf(REGIONS_LOG, ("%Id %s free regions, %Id regions budget, %Id regions on decommit list, %Id huge regions to consider", total_num_free_regions[kind], kind_name[kind], total_budget_in_region_units[kind], num_regions_to_decommit[kind], num_huge_region_units_to_consider[kind])); // check if the free regions exceed the budget // if so, put the highest free regions on the decommit list total_num_free_regions[kind] += num_regions_to_decommit[kind]; if (background_running_p() || ((total_num_free_regions[kind] + num_huge_region_units_to_consider[kind]) < total_budget_in_region_units[kind])) { dprintf (REGIONS_LOG, ("distributing the %Id %s regions deficit", total_budget_in_region_units[kind] - total_num_free_regions[kind], kind_name[kind])); target_num_regions[kind] = (total_num_free_regions[kind] + (n_heaps - 1)) / n_heaps; } else { target_num_regions[kind] = (total_budget_in_region_units[kind] + (n_heaps - 1)) / n_heaps; total_budget_in_region_units[kind] = target_num_regions[kind] * n_heaps; num_regions_to_decommit[kind] = total_num_free_regions[kind] + num_huge_region_units_to_consider[kind] - total_budget_in_region_units[kind]; dprintf(REGIONS_LOG, ("distributing the %Id %s regions, removing %Id regions", total_budget_in_region_units[kind], kind_name[kind], num_regions_to_decommit[kind])); if (num_regions_to_decommit[kind] > 0) { // put the highest regions on the decommit list global_region_allocator.move_highest_free_regions (num_regions_to_decommit[kind]*region_factor[kind], kind == basic_free_region, global_regions_to_decommit); dprintf (REGIONS_LOG, ("Moved %Id %s regions to decommit list", global_regions_to_decommit[kind].get_num_free_regions(), kind_name[kind])); if (kind == basic_free_region) { assert (global_regions_to_decommit[kind].get_num_free_regions() == (size_t)num_regions_to_decommit[kind]); } else { dprintf (REGIONS_LOG, ("Moved %Id %s regions to decommit list", global_regions_to_decommit[huge_free_region].get_num_free_regions(), kind_name[huge_free_region])); // cannot assert we moved any regions because there may be a single huge region with more than we want to decommit } } } } for (int kind = basic_free_region; kind < kind_count; kind++) { #ifdef MULTIPLE_HEAPS // now go through all the heaps and remove any free regions above the target count for (int i = 0; i < n_heaps; i++) { gc_heap* hp = g_heaps[i]; if (hp->free_regions[kind].get_num_free_regions() > target_num_regions[kind]) { dprintf (REGIONS_LOG, ("removing %Id %s regions from heap %d", hp->free_regions[kind].get_num_free_regions() - target_num_regions[kind], kind_name[kind], i)); remove_surplus_regions (&hp->free_regions[kind], &surplus_regions[kind], target_num_regions[kind]); } } // finally go through all the heaps and distribute any surplus regions to heaps having too few free regions for (int i = 0; i < n_heaps; i++) { gc_heap* hp = g_heaps[i]; #else //MULTIPLE_HEAPS { gc_heap* hp = pGenGCHeap; const int i = 0; #endif //MULTIPLE_HEAPS if (hp->free_regions[kind].get_num_free_regions() < target_num_regions[kind]) { int64_t num_added_regions = add_regions(&hp->free_regions[kind], &surplus_regions[kind], target_num_regions[kind]); dprintf(REGIONS_LOG, ("added %d regions to heap %d", num_added_regions, kind_name[kind], i)); } } // should have exhausted the surplus_regions assert(surplus_regions[kind].get_num_free_regions() == 0); } #ifdef MULTIPLE_HEAPS gradual_decommit_in_progress_p = FALSE; for (int kind = basic_free_region; kind < count_free_region_kinds; kind++) { if (global_regions_to_decommit[kind].get_num_free_regions() != 0) { gradual_decommit_in_progress_p = TRUE; break; } } #else //MULTIPLE_HEAPS while (decommit_step()) { } #endif //MULTIPLE_HEAPS #endif //USE_REGIONS } #ifdef WRITE_WATCH uint8_t* g_addresses [array_size+2]; // to get around the bug in GetWriteWatch #ifdef CARD_BUNDLE inline void gc_heap::verify_card_bundle_bits_set(size_t first_card_word, size_t last_card_word) { #ifdef _DEBUG for (size_t x = cardw_card_bundle (first_card_word); x < cardw_card_bundle (last_card_word); x++) { if (!card_bundle_set_p (x)) { assert (!"Card bundle not set"); dprintf (3, ("Card bundle %Ix not set", x)); } } #else UNREFERENCED_PARAMETER(first_card_word); UNREFERENCED_PARAMETER(last_card_word); #endif } // Verifies that any bundles that are not set represent only cards that are not set. inline void gc_heap::verify_card_bundles() { #ifdef _DEBUG size_t lowest_card = card_word (card_of (lowest_address)); size_t highest_card = card_word (card_of (highest_address)); size_t cardb = cardw_card_bundle (lowest_card); size_t end_cardb = cardw_card_bundle (align_cardw_on_bundle (highest_card)); while (cardb < end_cardb) { uint32_t* card_word = &card_table[max(card_bundle_cardw (cardb), lowest_card)]; uint32_t* card_word_end = &card_table[min(card_bundle_cardw (cardb+1), highest_card)]; if (card_bundle_set_p (cardb) == 0) { // Verify that no card is set while (card_word < card_word_end) { if (*card_word != 0) { dprintf (3, ("gc: %d, Card word %Ix for address %Ix set, card_bundle %Ix clear", dd_collection_count (dynamic_data_of (0)), (size_t)(card_word-&card_table[0]), (size_t)(card_address ((size_t)(card_word-&card_table[0]) * card_word_width)), cardb)); } assert((*card_word)==0); card_word++; } } cardb++; } #endif } // If card bundles are enabled, use write watch to find pages in the card table that have // been dirtied, and set the corresponding card bundle bits. void gc_heap::update_card_table_bundle() { if (card_bundles_enabled()) { // The address of the card word containing the card representing the lowest heap address uint8_t* base_address = (uint8_t*)(&card_table[card_word (card_of (lowest_address))]); // The address of the card word containing the card representing the highest heap address uint8_t* high_address = (uint8_t*)(&card_table[card_word (card_of (highest_address))]); uint8_t* saved_base_address = base_address; uintptr_t bcount = array_size; size_t saved_region_size = align_on_page (high_address) - saved_base_address; do { size_t region_size = align_on_page (high_address) - base_address; dprintf (3,("Probing card table pages [%Ix, %Ix[", (size_t)base_address, (size_t)(base_address + region_size))); bool success = GCToOSInterface::GetWriteWatch(false /* resetState */, base_address, region_size, (void**)g_addresses, &bcount); assert (success && "GetWriteWatch failed!"); dprintf (3,("Found %d pages written", bcount)); for (unsigned i = 0; i < bcount; i++) { // Offset of the dirty page from the start of the card table (clamped to base_address) size_t bcardw = (uint32_t*)(max(g_addresses[i],base_address)) - &card_table[0]; // Offset of the end of the page from the start of the card table (clamped to high addr) size_t ecardw = (uint32_t*)(min(g_addresses[i]+OS_PAGE_SIZE, high_address)) - &card_table[0]; assert (bcardw >= card_word (card_of (g_gc_lowest_address))); // Set the card bundle bits representing the dirty card table page card_bundles_set (cardw_card_bundle (bcardw), cardw_card_bundle (align_cardw_on_bundle (ecardw))); dprintf (3,("Set Card bundle [%Ix, %Ix[", cardw_card_bundle (bcardw), cardw_card_bundle (align_cardw_on_bundle (ecardw)))); verify_card_bundle_bits_set(bcardw, ecardw); } if (bcount >= array_size) { base_address = g_addresses [array_size-1] + OS_PAGE_SIZE; bcount = array_size; } } while ((bcount >= array_size) && (base_address < high_address)); // Now that we've updated the card bundle bits, reset the write-tracking state. GCToOSInterface::ResetWriteWatch (saved_base_address, saved_region_size); } } #endif //CARD_BUNDLE // static void gc_heap::reset_write_watch_for_gc_heap(void* base_address, size_t region_size) { #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP SoftwareWriteWatch::ClearDirty(base_address, region_size); #else // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP GCToOSInterface::ResetWriteWatch(base_address, region_size); #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP } // static void gc_heap::get_write_watch_for_gc_heap(bool reset, void *base_address, size_t region_size, void** dirty_pages, uintptr_t* dirty_page_count_ref, bool is_runtime_suspended) { #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP SoftwareWriteWatch::GetDirty(base_address, region_size, dirty_pages, dirty_page_count_ref, reset, is_runtime_suspended); #else // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP UNREFERENCED_PARAMETER(is_runtime_suspended); bool success = GCToOSInterface::GetWriteWatch(reset, base_address, region_size, dirty_pages, dirty_page_count_ref); assert(success); #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP } const size_t ww_reset_quantum = 128*1024*1024; inline void gc_heap::switch_one_quantum() { enable_preemptive (); GCToOSInterface::Sleep (1); disable_preemptive (true); } void gc_heap::reset_ww_by_chunk (uint8_t* start_address, size_t total_reset_size) { size_t reset_size = 0; size_t remaining_reset_size = 0; size_t next_reset_size = 0; while (reset_size != total_reset_size) { remaining_reset_size = total_reset_size - reset_size; next_reset_size = ((remaining_reset_size >= ww_reset_quantum) ? ww_reset_quantum : remaining_reset_size); if (next_reset_size) { reset_write_watch_for_gc_heap(start_address, next_reset_size); reset_size += next_reset_size; switch_one_quantum(); } } assert (reset_size == total_reset_size); } // This does a Sleep(1) for every reset ww_reset_quantum bytes of reset // we do concurrently. void gc_heap::switch_on_reset (BOOL concurrent_p, size_t* current_total_reset_size, size_t last_reset_size) { if (concurrent_p) { *current_total_reset_size += last_reset_size; dprintf (2, ("reset %Id bytes so far", *current_total_reset_size)); if (*current_total_reset_size > ww_reset_quantum) { switch_one_quantum(); *current_total_reset_size = 0; } } } void gc_heap::reset_write_watch (BOOL concurrent_p) { #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP // Software write watch currently requires the runtime to be suspended during reset. // See SoftwareWriteWatch::ClearDirty(). assert(!concurrent_p); #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP dprintf (2, ("bgc lowest: %Ix, bgc highest: %Ix", background_saved_lowest_address, background_saved_highest_address)); size_t reset_size = 0; for (int i = get_start_generation_index(); i < total_generation_count; i++) { heap_segment* seg = heap_segment_rw (generation_start_segment (generation_of (i))); while (seg) { uint8_t* base_address = align_lower_page (heap_segment_mem (seg)); base_address = max (base_address, background_saved_lowest_address); uint8_t* high_address = ((seg == ephemeral_heap_segment) ? alloc_allocated : heap_segment_allocated (seg)); high_address = min (high_address, background_saved_highest_address); if (base_address < high_address) { size_t reset_size = 0; size_t region_size = high_address - base_address; dprintf (3, ("h%d, gen: %Ix, ww: [%Ix(%Id)", heap_number, i, (size_t)base_address, region_size)); //reset_ww_by_chunk (base_address, region_size); reset_write_watch_for_gc_heap(base_address, region_size); switch_on_reset (concurrent_p, &reset_size, region_size); } seg = heap_segment_next_rw (seg); concurrent_print_time_delta (i == max_generation ? "CRWW soh": "CRWW uoh"); } } } #endif //WRITE_WATCH #ifdef BACKGROUND_GC void gc_heap::restart_vm() { //assert (generation_allocation_pointer (youngest_generation) == 0); dprintf (3, ("Restarting EE")); STRESS_LOG0(LF_GC, LL_INFO10000, "Concurrent GC: Restarting EE\n"); ee_proceed_event.Set(); } inline void fire_alloc_wait_event (alloc_wait_reason awr, BOOL begin_p) { if (awr != awr_ignored) { if (begin_p) { FIRE_EVENT(BGCAllocWaitBegin, awr); } else { FIRE_EVENT(BGCAllocWaitEnd, awr); } } } void gc_heap::fire_alloc_wait_event_begin (alloc_wait_reason awr) { fire_alloc_wait_event (awr, TRUE); } void gc_heap::fire_alloc_wait_event_end (alloc_wait_reason awr) { fire_alloc_wait_event (awr, FALSE); } #endif //BACKGROUND_GC void gc_heap::make_generation (int gen_num, heap_segment* seg, uint8_t* start) { generation* gen = generation_of (gen_num); gen->gen_num = gen_num; #ifndef USE_REGIONS gen->allocation_start = start; gen->plan_allocation_start = 0; #endif //USE_REGIONS gen->allocation_context.alloc_ptr = 0; gen->allocation_context.alloc_limit = 0; gen->allocation_context.alloc_bytes = 0; gen->allocation_context.alloc_bytes_uoh = 0; gen->allocation_context_start_region = 0; gen->start_segment = seg; #ifdef USE_REGIONS dprintf (REGIONS_LOG, ("g%d start seg is %Ix-%Ix", gen_num, (size_t)seg, heap_segment_mem (seg))); gen->tail_region = seg; gen->plan_start_segment = 0; gen->tail_ro_region = 0; #endif //USE_REGIONS gen->allocation_segment = seg; gen->free_list_space = 0; gen->pinned_allocated = 0; gen->free_list_allocated = 0; gen->end_seg_allocated = 0; gen->condemned_allocated = 0; gen->sweep_allocated = 0; gen->free_obj_space = 0; gen->allocation_size = 0; gen->pinned_allocation_sweep_size = 0; gen->pinned_allocation_compact_size = 0; gen->allocate_end_seg_p = FALSE; gen->free_list_allocator.clear(); #ifdef DOUBLY_LINKED_FL gen->set_bgc_mark_bit_p = FALSE; #endif //DOUBLY_LINKED_FL #ifdef FREE_USAGE_STATS memset (gen->gen_free_spaces, 0, sizeof (gen->gen_free_spaces)); memset (gen->gen_current_pinned_free_spaces, 0, sizeof (gen->gen_current_pinned_free_spaces)); memset (gen->gen_plugs, 0, sizeof (gen->gen_plugs)); #endif //FREE_USAGE_STATS } void gc_heap::adjust_ephemeral_limits () { #ifndef USE_REGIONS ephemeral_low = generation_allocation_start (generation_of (max_generation - 1)); ephemeral_high = heap_segment_reserved (ephemeral_heap_segment); dprintf (3, ("new ephemeral low: %Ix new ephemeral high: %Ix", (size_t)ephemeral_low, (size_t)ephemeral_high)) #ifndef MULTIPLE_HEAPS // This updates the write barrier helpers with the new info. stomp_write_barrier_ephemeral(ephemeral_low, ephemeral_high); #endif // MULTIPLE_HEAPS #endif //USE_REGIONS } #if defined(TRACE_GC) || defined(GC_CONFIG_DRIVEN) FILE* CreateLogFile(const GCConfigStringHolder& temp_logfile_name, bool is_config) { FILE* logFile; if (!temp_logfile_name.Get()) { return nullptr; } char logfile_name[MAX_LONGPATH+1]; //uint32_t pid = GCToOSInterface::GetCurrentProcessId(); const char* suffix = is_config ? ".config.log" : ".log"; //_snprintf_s(logfile_name, MAX_LONGPATH+1, _TRUNCATE, "%s.%d%s", temp_logfile_name.Get(), pid, suffix); _snprintf_s(logfile_name, MAX_LONGPATH+1, _TRUNCATE, "%s%s", temp_logfile_name.Get(), suffix); logFile = fopen(logfile_name, "wb"); return logFile; } #endif //TRACE_GC || GC_CONFIG_DRIVEN size_t gc_heap::get_segment_size_hard_limit (uint32_t* num_heaps, bool should_adjust_num_heaps) { assert (heap_hard_limit); size_t aligned_hard_limit = align_on_segment_hard_limit (heap_hard_limit); if (should_adjust_num_heaps) { uint32_t max_num_heaps = (uint32_t)(aligned_hard_limit / min_segment_size_hard_limit); if (*num_heaps > max_num_heaps) { *num_heaps = max_num_heaps; } } size_t seg_size = aligned_hard_limit / *num_heaps; size_t aligned_seg_size = (use_large_pages_p ? align_on_segment_hard_limit (seg_size) : round_up_power2 (seg_size)); assert (g_theGCHeap->IsValidSegmentSize (aligned_seg_size)); size_t seg_size_from_config = (size_t)GCConfig::GetSegmentSize(); if (seg_size_from_config) { size_t aligned_seg_size_config = (use_large_pages_p ? align_on_segment_hard_limit (seg_size) : round_up_power2 (seg_size_from_config)); aligned_seg_size = max (aligned_seg_size, aligned_seg_size_config); } //printf ("limit: %Idmb, aligned: %Idmb, %d heaps, seg size from config: %Idmb, seg size %Idmb", // (heap_hard_limit / 1024 / 1024), // (aligned_hard_limit / 1024 / 1024), // *num_heaps, // (seg_size_from_config / 1024 / 1024), // (aligned_seg_size / 1024 / 1024)); return aligned_seg_size; } #ifdef USE_REGIONS bool allocate_initial_regions(int number_of_heaps) { initial_regions = new (nothrow) uint8_t*[number_of_heaps][total_generation_count][2]; if (initial_regions == nullptr) { return false; } for (int i = 0; i < number_of_heaps; i++) { bool succeed = global_region_allocator.allocate_large_region( &initial_regions[i][poh_generation][0], &initial_regions[i][poh_generation][1], allocate_forward); assert(succeed); } for (int i = 0; i < number_of_heaps; i++) { for (int gen = max_generation; gen >= 0; gen--) { bool succeed = global_region_allocator.allocate_basic_region( &initial_regions[i][gen][0], &initial_regions[i][gen][1]); assert(succeed); } } for (int i = 0; i < number_of_heaps; i++) { bool succeed = global_region_allocator.allocate_large_region( &initial_regions[i][loh_generation][0], &initial_regions[i][loh_generation][1], allocate_forward); assert(succeed); } return true; } #endif HRESULT gc_heap::initialize_gc (size_t soh_segment_size, size_t loh_segment_size, size_t poh_segment_size #ifdef MULTIPLE_HEAPS ,int number_of_heaps #endif //MULTIPLE_HEAPS ) { #ifdef TRACE_GC if (GCConfig::GetLogEnabled()) { gc_log = CreateLogFile(GCConfig::GetLogFile(), false); if (gc_log == NULL) return E_FAIL; // GCLogFileSize in MBs. gc_log_file_size = static_cast(GCConfig::GetLogFileSize()); if (gc_log_file_size <= 0 || gc_log_file_size > 500) { fclose (gc_log); return E_FAIL; } gc_log_lock.Initialize(); gc_log_buffer = new (nothrow) uint8_t [gc_log_buffer_size]; if (!gc_log_buffer) { fclose(gc_log); return E_FAIL; } memset (gc_log_buffer, '*', gc_log_buffer_size); max_gc_buffers = gc_log_file_size * 1024 * 1024 / gc_log_buffer_size; } #endif // TRACE_GC #ifdef GC_CONFIG_DRIVEN if (GCConfig::GetConfigLogEnabled()) { gc_config_log = CreateLogFile(GCConfig::GetConfigLogFile(), true); if (gc_config_log == NULL) return E_FAIL; gc_config_log_buffer = new (nothrow) uint8_t [gc_config_log_buffer_size]; if (!gc_config_log_buffer) { fclose(gc_config_log); return E_FAIL; } compact_ratio = static_cast(GCConfig::GetCompactRatio()); // h# | GC | gen | C | EX | NF | BF | ML | DM || PreS | PostS | Merge | Conv | Pre | Post | PrPo | PreP | PostP | cprintf (("%2s | %6s | %1s | %1s | %2s | %2s | %2s | %2s | %2s || %5s | %5s | %5s | %5s | %5s | %5s | %5s | %5s | %5s |", "h#", // heap index "GC", // GC index "g", // generation "C", // compaction (empty means sweeping), 'M' means it was mandatory, 'W' means it was not "EX", // heap expansion "NF", // normal fit "BF", // best fit (if it indicates neither NF nor BF it means it had to acquire a new seg. "ML", // mark list "DM", // demotion "PreS", // short object before pinned plug "PostS", // short object after pinned plug "Merge", // merged pinned plugs "Conv", // converted to pinned plug "Pre", // plug before pinned plug but not after "Post", // plug after pinned plug but not before "PrPo", // plug both before and after pinned plug "PreP", // pre short object padded "PostP" // post short object padded )); } #endif //GC_CONFIG_DRIVEN HRESULT hres = S_OK; #ifdef WRITE_WATCH hardware_write_watch_api_supported(); #ifdef BACKGROUND_GC if (can_use_write_watch_for_gc_heap() && GCConfig::GetConcurrentGC()) { gc_can_use_concurrent = true; #ifndef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP virtual_alloc_hardware_write_watch = true; #endif // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP } else { gc_can_use_concurrent = false; } #endif //BACKGROUND_GC #endif //WRITE_WATCH #ifdef BACKGROUND_GC // leave the first page to contain only segment info // because otherwise we could need to revisit the first page frequently in // background GC. segment_info_size = OS_PAGE_SIZE; #else segment_info_size = Align (sizeof (heap_segment), get_alignment_constant (FALSE)); #endif //BACKGROUND_GC reserved_memory = 0; size_t initial_heap_size = soh_segment_size + loh_segment_size + poh_segment_size; uint16_t* heap_no_to_numa_node = nullptr; #ifdef MULTIPLE_HEAPS reserved_memory_limit = initial_heap_size * number_of_heaps; if (!heap_select::init(number_of_heaps)) return E_OUTOFMEMORY; if (GCToOSInterface::CanEnableGCNumaAware()) heap_no_to_numa_node = heap_select::heap_no_to_numa_node; #else //MULTIPLE_HEAPS reserved_memory_limit = initial_heap_size; int number_of_heaps = 1; #endif //MULTIPLE_HEAPS if (heap_hard_limit) { check_commit_cs.Initialize(); } #ifdef USE_REGIONS if (regions_range) { // REGIONS TODO: we should reserve enough space at the end of what we reserved that's // big enough to accommodate if we were to materialize all the GC bookkeeping datastructures. // We only need to commit what we use and just need to commit more instead of having to // relocate the exising table and then calling copy_brick_card_table. // Right now all the non mark array portions are commmitted since I'm calling mark_card_table // on the whole range. This can be committed as needed. size_t reserve_size = regions_range; uint8_t* reserve_range = (uint8_t*)virtual_alloc (reserve_size, use_large_pages_p); if (!reserve_range) return E_OUTOFMEMORY; if (!global_region_allocator.init (reserve_range, (reserve_range + reserve_size), ((size_t)1 << min_segment_size_shr), &g_gc_lowest_address, &g_gc_highest_address)) return E_OUTOFMEMORY; if (!allocate_initial_regions(number_of_heaps)) return E_OUTOFMEMORY; } else { assert (!"cannot use regions without specifying the range!!!"); return E_FAIL; } #else //USE_REGIONS bool separated_poh_p = use_large_pages_p && heap_hard_limit_oh[soh] && (GCConfig::GetGCHeapHardLimitPOH() == 0) && (GCConfig::GetGCHeapHardLimitPOHPercent() == 0); if (!reserve_initial_memory (soh_segment_size, loh_segment_size, poh_segment_size, number_of_heaps, use_large_pages_p, separated_poh_p, heap_no_to_numa_node)) return E_OUTOFMEMORY; if (separated_poh_p) { heap_hard_limit_oh[poh] = min_segment_size_hard_limit * number_of_heaps; heap_hard_limit += heap_hard_limit_oh[poh]; } #endif //USE_REGIONS #ifdef CARD_BUNDLE //check if we need to turn on card_bundles. #ifdef MULTIPLE_HEAPS // use INT64 arithmetic here because of possible overflow on 32p uint64_t th = (uint64_t)MH_TH_CARD_BUNDLE*number_of_heaps; #else // use INT64 arithmetic here because of possible overflow on 32p uint64_t th = (uint64_t)SH_TH_CARD_BUNDLE; #endif //MULTIPLE_HEAPS if (can_use_write_watch_for_card_table() && reserved_memory >= th) { settings.card_bundles = TRUE; } else { settings.card_bundles = FALSE; } #endif //CARD_BUNDLE settings.first_init(); int latency_level_from_config = static_cast(GCConfig::GetLatencyLevel()); if (latency_level_from_config >= latency_level_first && latency_level_from_config <= latency_level_last) { gc_heap::latency_level = static_cast(latency_level_from_config); } init_static_data(); g_gc_card_table = make_card_table (g_gc_lowest_address, g_gc_highest_address); if (!g_gc_card_table) return E_OUTOFMEMORY; gc_started = FALSE; #ifdef MULTIPLE_HEAPS g_heaps = new (nothrow) gc_heap* [number_of_heaps]; if (!g_heaps) return E_OUTOFMEMORY; #ifdef _PREFAST_ #pragma warning(push) #pragma warning(disable:22011) // Suppress PREFast warning about integer underflow/overflow #endif // _PREFAST_ #if !defined(USE_REGIONS) || defined(_DEBUG) g_promoted = new (nothrow) size_t [number_of_heaps*16]; if (!g_promoted) return E_OUTOFMEMORY; #endif //!USE_REGIONS || _DEBUG g_bpromoted = new (nothrow) size_t [number_of_heaps*16]; if (!g_bpromoted) return E_OUTOFMEMORY; #ifdef MH_SC_MARK g_mark_stack_busy = new (nothrow) int[(number_of_heaps+2)*HS_CACHE_LINE_SIZE/sizeof(int)]; #endif //MH_SC_MARK #ifdef _PREFAST_ #pragma warning(pop) #endif // _PREFAST_ #ifdef MH_SC_MARK if (!g_mark_stack_busy) return E_OUTOFMEMORY; #endif //MH_SC_MARK if (!create_thread_support (number_of_heaps)) return E_OUTOFMEMORY; #endif //MULTIPLE_HEAPS #ifdef MULTIPLE_HEAPS yp_spin_count_unit = 32 * number_of_heaps; #else yp_spin_count_unit = 32 * g_num_processors; #endif //MULTIPLE_HEAPS #if defined(__linux__) GCToEEInterface::UpdateGCEventStatus(static_cast(GCEventStatus::GetEnabledLevel(GCEventProvider_Default)), static_cast(GCEventStatus::GetEnabledKeywords(GCEventProvider_Default)), static_cast(GCEventStatus::GetEnabledLevel(GCEventProvider_Private)), static_cast(GCEventStatus::GetEnabledKeywords(GCEventProvider_Private))); #endif // __linux__ #ifdef USE_VXSORT InitSupportedInstructionSet ((int32_t)GCConfig::GetGCEnabledInstructionSets()); #endif if (!init_semi_shared()) { hres = E_FAIL; } return hres; } //Initializes PER_HEAP_ISOLATED data members. int gc_heap::init_semi_shared() { int ret = 0; #ifdef BGC_SERVO_TUNING uint32_t current_memory_load = 0; uint32_t sweep_flr_goal = 0; uint32_t sweep_flr_goal_loh = 0; #endif //BGC_SERVO_TUNING // This is used for heap expansion - it's to fix exactly the start for gen 0 // through (max_generation-1). When we expand the heap we allocate all these // gen starts at the beginning of the new ephemeral seg. eph_gen_starts_size = (Align (min_obj_size)) * max_generation; #ifdef MULTIPLE_HEAPS mark_list_size = min (100*1024, max (8192, soh_segment_size/(2*10*32))); g_mark_list = make_mark_list (mark_list_size*n_heaps); min_balance_threshold = alloc_quantum_balance_units * CLR_SIZE * 2; g_mark_list_copy = make_mark_list (mark_list_size*n_heaps); if (!g_mark_list_copy) { goto cleanup; } #else //MULTIPLE_HEAPS mark_list_size = max (8192, soh_segment_size/(64*32)); g_mark_list = make_mark_list (mark_list_size); #endif //MULTIPLE_HEAPS dprintf (3, ("mark_list_size: %d", mark_list_size)); if (!g_mark_list) { goto cleanup; } #ifdef MULTIPLE_HEAPS // gradual decommit: set size to some reasonable value per time interval max_decommit_step_size = ((DECOMMIT_SIZE_PER_MILLISECOND * DECOMMIT_TIME_STEP_MILLISECONDS) / n_heaps); // but do at least MIN_DECOMMIT_SIZE per step to make the OS call worthwhile max_decommit_step_size = max (max_decommit_step_size, MIN_DECOMMIT_SIZE); #endif //MULTIPLE_HEAPS #ifdef FEATURE_BASICFREEZE seg_table = sorted_table::make_sorted_table(); if (!seg_table) goto cleanup; #endif //FEATURE_BASICFREEZE segment_standby_list = 0; if (!full_gc_approach_event.CreateManualEventNoThrow(FALSE)) { goto cleanup; } if (!full_gc_end_event.CreateManualEventNoThrow(FALSE)) { goto cleanup; } fgn_loh_percent = 0; full_gc_approach_event_set = false; memset (full_gc_counts, 0, sizeof (full_gc_counts)); memset (&last_ephemeral_gc_info, 0, sizeof (last_ephemeral_gc_info)); memset (&last_full_blocking_gc_info, 0, sizeof (last_full_blocking_gc_info)); #ifdef BACKGROUND_GC memset (&last_bgc_info, 0, sizeof (last_bgc_info)); #endif //BACKGROUND_GC should_expand_in_full_gc = FALSE; #ifdef FEATURE_LOH_COMPACTION loh_compaction_always_p = GCConfig::GetLOHCompactionMode() != 0; loh_compaction_mode = loh_compaction_default; #endif //FEATURE_LOH_COMPACTION loh_size_threshold = (size_t)GCConfig::GetLOHThreshold(); assert (loh_size_threshold >= LARGE_OBJECT_SIZE); #ifdef BGC_SERVO_TUNING memset (bgc_tuning::gen_calc, 0, sizeof (bgc_tuning::gen_calc)); memset (bgc_tuning::gen_stats, 0, sizeof (bgc_tuning::gen_stats)); memset (bgc_tuning::current_bgc_end_data, 0, sizeof (bgc_tuning::current_bgc_end_data)); // for the outer loop - the ML (memory load) loop bgc_tuning::enable_fl_tuning = (GCConfig::GetBGCFLTuningEnabled() != 0); bgc_tuning::memory_load_goal = (uint32_t)GCConfig::GetBGCMemGoal(); bgc_tuning::memory_load_goal_slack = (uint32_t)GCConfig::GetBGCMemGoalSlack(); bgc_tuning::ml_kp = (double)GCConfig::GetBGCMLkp() / 1000.0; bgc_tuning::ml_ki = (double)GCConfig::GetBGCMLki() / 1000.0; bgc_tuning::ratio_correction_step = (double)GCConfig::GetBGCG2RatioStep() / 100.0; // for the inner loop - the alloc loop which calculates the allocated bytes in gen2 before // triggering the next BGC. bgc_tuning::above_goal_kp = (double)GCConfig::GetBGCFLkp() / 1000000.0; bgc_tuning::enable_ki = (GCConfig::GetBGCFLEnableKi() != 0); bgc_tuning::above_goal_ki = (double)GCConfig::GetBGCFLki() / 1000000.0; bgc_tuning::enable_kd = (GCConfig::GetBGCFLEnableKd() != 0); bgc_tuning::above_goal_kd = (double)GCConfig::GetBGCFLkd() / 100.0; bgc_tuning::enable_smooth = (GCConfig::GetBGCFLEnableSmooth() != 0); bgc_tuning::num_gen1s_smooth_factor = (double)GCConfig::GetBGCFLSmoothFactor() / 100.0; bgc_tuning::enable_tbh = (GCConfig::GetBGCFLEnableTBH() != 0); bgc_tuning::enable_ff = (GCConfig::GetBGCFLEnableFF() != 0); bgc_tuning::above_goal_ff = (double)GCConfig::GetBGCFLff() / 100.0; bgc_tuning::enable_gradual_d = (GCConfig::GetBGCFLGradualD() != 0); sweep_flr_goal = (uint32_t)GCConfig::GetBGCFLSweepGoal(); sweep_flr_goal_loh = (uint32_t)GCConfig::GetBGCFLSweepGoalLOH(); bgc_tuning::gen_calc[0].sweep_flr_goal = ((sweep_flr_goal == 0) ? 20.0 : (double)sweep_flr_goal); bgc_tuning::gen_calc[1].sweep_flr_goal = ((sweep_flr_goal_loh == 0) ? 20.0 : (double)sweep_flr_goal_loh); bgc_tuning::available_memory_goal = (uint64_t)((double)gc_heap::total_physical_mem * (double)(100 - bgc_tuning::memory_load_goal) / 100); get_memory_info (¤t_memory_load); dprintf (BGC_TUNING_LOG, ("BTL tuning %s!!!", (bgc_tuning::enable_fl_tuning ? "enabled" : "disabled"))); #ifdef SIMPLE_DPRINTF dprintf (BGC_TUNING_LOG, ("BTL tuning parameters: mem goal: %d%%(%I64d), +/-%d%%, gen2 correction factor: %.2f, sweep flr goal: %d%%, smooth factor: %.3f(%s), TBH: %s, FF: %.3f(%s), ml: kp %.5f, ki %.10f", bgc_tuning::memory_load_goal, bgc_tuning::available_memory_goal, bgc_tuning::memory_load_goal_slack, bgc_tuning::ratio_correction_step, (int)bgc_tuning::gen_calc[0].sweep_flr_goal, bgc_tuning::num_gen1s_smooth_factor, (bgc_tuning::enable_smooth ? "enabled" : "disabled"), (bgc_tuning::enable_tbh ? "enabled" : "disabled"), bgc_tuning::above_goal_ff, (bgc_tuning::enable_ff ? "enabled" : "disabled"), bgc_tuning::ml_kp, bgc_tuning::ml_ki)); dprintf (BGC_TUNING_LOG, ("BTL tuning parameters: kp: %.5f, ki: %.5f (%s), kd: %.3f (kd-%s, gd-%s), ff: %.3f", bgc_tuning::above_goal_kp, bgc_tuning::above_goal_ki, (bgc_tuning::enable_ki ? "enabled" : "disabled"), bgc_tuning::above_goal_kd, (bgc_tuning::enable_kd ? "enabled" : "disabled"), (bgc_tuning::enable_gradual_d ? "enabled" : "disabled"), bgc_tuning::above_goal_ff)); #endif //SIMPLE_DPRINTF if (bgc_tuning::enable_fl_tuning && (current_memory_load < bgc_tuning::memory_load_goal)) { uint32_t distance_to_goal = bgc_tuning::memory_load_goal - current_memory_load; bgc_tuning::stepping_interval = max (distance_to_goal / 10, 1); bgc_tuning::last_stepping_mem_load = current_memory_load; bgc_tuning::last_stepping_bgc_count = 0; dprintf (BGC_TUNING_LOG, ("current ml: %d, %d to goal, interval: %d", current_memory_load, distance_to_goal, bgc_tuning::stepping_interval)); } else { dprintf (BGC_TUNING_LOG, ("current ml: %d, >= goal: %d, disable stepping", current_memory_load, bgc_tuning::memory_load_goal)); bgc_tuning::use_stepping_trigger_p = false; } #endif //BGC_SERVO_TUNING #ifdef BACKGROUND_GC memset (ephemeral_fgc_counts, 0, sizeof (ephemeral_fgc_counts)); bgc_alloc_spin_count = static_cast(GCConfig::GetBGCSpinCount()); bgc_alloc_spin = static_cast(GCConfig::GetBGCSpin()); { int number_bgc_threads = get_num_heaps(); if (!create_bgc_threads_support (number_bgc_threads)) { goto cleanup; } } #endif //BACKGROUND_GC memset (¤t_no_gc_region_info, 0, sizeof (current_no_gc_region_info)); #ifdef GC_CONFIG_DRIVEN compact_or_sweep_gcs[0] = 0; compact_or_sweep_gcs[1] = 0; #endif //GC_CONFIG_DRIVEN #ifdef SHORT_PLUGS short_plugs_pad_ratio = (double)DESIRED_PLUG_LENGTH / (double)(DESIRED_PLUG_LENGTH - Align (min_obj_size)); #endif //SHORT_PLUGS generation_skip_ratio_threshold = (int)GCConfig::GetGCLowSkipRatio(); #ifdef FEATURE_EVENT_TRACE gc_time_info = new (nothrow) uint64_t[max_compact_time_type]; if (!gc_time_info) { goto cleanup; } #ifdef BACKGROUND_GC bgc_time_info = new (nothrow) uint64_t[max_bgc_time_type]; if (!bgc_time_info) { goto cleanup; } #endif //BACKGROUND_GC #ifdef FEATURE_LOH_COMPACTION loh_compact_info = new (nothrow) etw_loh_compact_info [get_num_heaps()]; if (!loh_compact_info) { goto cleanup; } #endif //FEATURE_LOH_COMPACTION #endif //FEATURE_EVENT_TRACE conserve_mem_setting = (int)GCConfig::GetGCConserveMem(); if (conserve_mem_setting < 0) conserve_mem_setting = 0; if (conserve_mem_setting > 9) conserve_mem_setting = 9; dprintf (1, ("conserve_mem_setting = %d", conserve_mem_setting)); ret = 1; cleanup: if (!ret) { if (full_gc_approach_event.IsValid()) { full_gc_approach_event.CloseEvent(); } if (full_gc_end_event.IsValid()) { full_gc_end_event.CloseEvent(); } } return ret; } gc_heap* gc_heap::make_gc_heap ( #ifdef MULTIPLE_HEAPS GCHeap* vm_hp, int heap_number #endif //MULTIPLE_HEAPS ) { gc_heap* res = 0; #ifdef MULTIPLE_HEAPS res = new (nothrow) gc_heap; if (!res) return 0; res->vm_heap = vm_hp; res->alloc_context_count = 0; #ifndef USE_REGIONS res->mark_list_piece_start = new (nothrow) uint8_t**[n_heaps]; if (!res->mark_list_piece_start) return 0; #ifdef _PREFAST_ #pragma warning(push) #pragma warning(disable:22011) // Suppress PREFast warning about integer underflow/overflow #endif // _PREFAST_ res->mark_list_piece_end = new (nothrow) uint8_t**[n_heaps + 32]; // +32 is padding to reduce false sharing #ifdef _PREFAST_ #pragma warning(pop) #endif // _PREFAST_ if (!res->mark_list_piece_end) return 0; #endif //!USE_REGIONS #endif //MULTIPLE_HEAPS if (res->init_gc_heap ( #ifdef MULTIPLE_HEAPS heap_number #else //MULTIPLE_HEAPS 0 #endif //MULTIPLE_HEAPS )==0) { return 0; } #ifdef MULTIPLE_HEAPS return res; #else return (gc_heap*)1; #endif //MULTIPLE_HEAPS } uint32_t gc_heap::wait_for_gc_done(int32_t timeOut) { bool cooperative_mode = enable_preemptive (); uint32_t dwWaitResult = NOERROR; gc_heap* wait_heap = NULL; while (gc_heap::gc_started) { #ifdef MULTIPLE_HEAPS wait_heap = GCHeap::GetHeap(heap_select::select_heap(NULL))->pGenGCHeap; dprintf(2, ("waiting for the gc_done_event on heap %d", wait_heap->heap_number)); #endif // MULTIPLE_HEAPS #ifdef _PREFAST_ PREFIX_ASSUME(wait_heap != NULL); #endif // _PREFAST_ dwWaitResult = wait_heap->gc_done_event.Wait(timeOut, FALSE); } disable_preemptive (cooperative_mode); return dwWaitResult; } void gc_heap::set_gc_done() { enter_gc_done_event_lock(); if (!gc_done_event_set) { gc_done_event_set = true; dprintf (2, ("heap %d: setting gc_done_event", heap_number)); gc_done_event.Set(); } exit_gc_done_event_lock(); } void gc_heap::reset_gc_done() { enter_gc_done_event_lock(); if (gc_done_event_set) { gc_done_event_set = false; dprintf (2, ("heap %d: resetting gc_done_event", heap_number)); gc_done_event.Reset(); } exit_gc_done_event_lock(); } void gc_heap::enter_gc_done_event_lock() { uint32_t dwSwitchCount = 0; retry: if (Interlocked::CompareExchange(&gc_done_event_lock, 0, -1) >= 0) { while (gc_done_event_lock >= 0) { if (g_num_processors > 1) { int spin_count = yp_spin_count_unit; for (int j = 0; j < spin_count; j++) { if (gc_done_event_lock < 0) break; YieldProcessor(); // indicate to the processor that we are spinning } if (gc_done_event_lock >= 0) GCToOSInterface::YieldThread(++dwSwitchCount); } else GCToOSInterface::YieldThread(++dwSwitchCount); } goto retry; } } void gc_heap::exit_gc_done_event_lock() { gc_done_event_lock = -1; } #ifndef MULTIPLE_HEAPS #ifdef RECORD_LOH_STATE int gc_heap::loh_state_index = 0; gc_heap::loh_state_info gc_heap::last_loh_states[max_saved_loh_states]; #endif //RECORD_LOH_STATE VOLATILE(int32_t) gc_heap::gc_done_event_lock; VOLATILE(bool) gc_heap::gc_done_event_set; GCEvent gc_heap::gc_done_event; #endif //!MULTIPLE_HEAPS VOLATILE(bool) gc_heap::internal_gc_done; void gc_heap::add_saved_spinlock_info ( bool loh_p, msl_enter_state enter_state, msl_take_state take_state) { #ifdef SPINLOCK_HISTORY spinlock_info* current = &last_spinlock_info[spinlock_info_index]; current->enter_state = enter_state; current->take_state = take_state; current->thread_id.SetToCurrentThread(); current->loh_p = loh_p; dprintf (SPINLOCK_LOG, ("[%d]%s %s %s", heap_number, (loh_p ? "loh" : "soh"), ((enter_state == me_acquire) ? "E" : "L"), msl_take_state_str[take_state])); spinlock_info_index++; assert (spinlock_info_index <= max_saved_spinlock_info); if (spinlock_info_index >= max_saved_spinlock_info) { spinlock_info_index = 0; } #else UNREFERENCED_PARAMETER(enter_state); UNREFERENCED_PARAMETER(take_state); #endif //SPINLOCK_HISTORY } int gc_heap::init_gc_heap (int h_number) { #ifdef MULTIPLE_HEAPS time_bgc_last = 0; for (int oh_index = 0; oh_index < (gc_oh_num::total_oh_count - 1); oh_index++) allocated_since_last_gc[oh_index] = 0; #ifdef SPINLOCK_HISTORY spinlock_info_index = 0; memset (last_spinlock_info, 0, sizeof(last_spinlock_info)); #endif //SPINLOCK_HISTORY // initialize per heap members. #ifndef USE_REGIONS ephemeral_low = (uint8_t*)1; ephemeral_high = MAX_PTR; #endif //!USE_REGIONS gc_low = 0; gc_high = 0; ephemeral_heap_segment = 0; oomhist_index_per_heap = 0; freeable_uoh_segment = 0; condemned_generation_num = 0; blocking_collection = FALSE; generation_skip_ratio = 100; #ifdef FEATURE_CARD_MARKING_STEALING n_eph_soh = 0; n_gen_soh = 0; n_eph_loh = 0; n_gen_loh = 0; #endif //FEATURE_CARD_MARKING_STEALING mark_stack_tos = 0; mark_stack_bos = 0; mark_stack_array_length = 0; mark_stack_array = 0; #if defined (_DEBUG) && defined (VERIFY_HEAP) verify_pinned_queue_p = FALSE; #endif // _DEBUG && VERIFY_HEAP #ifdef FEATURE_LOH_COMPACTION loh_pinned_queue_tos = 0; loh_pinned_queue_bos = 0; loh_pinned_queue_length = 0; loh_pinned_queue_decay = LOH_PIN_DECAY; loh_pinned_queue = 0; #endif //FEATURE_LOH_COMPACTION min_overflow_address = MAX_PTR; max_overflow_address = 0; gen0_bricks_cleared = FALSE; gen0_must_clear_bricks = 0; allocation_quantum = CLR_SIZE; more_space_lock_soh = gc_lock; more_space_lock_uoh = gc_lock; ro_segments_in_range = FALSE; loh_alloc_since_cg = 0; new_heap_segment = NULL; gen0_allocated_after_gc_p = false; #ifdef RECORD_LOH_STATE loh_state_index = 0; #endif //RECORD_LOH_STATE #endif //MULTIPLE_HEAPS #ifdef MULTIPLE_HEAPS if (h_number > n_heaps) { assert (!"Number of heaps exceeded"); return 0; } heap_number = h_number; #endif //MULTIPLE_HEAPS memset (&oom_info, 0, sizeof (oom_info)); memset (&fgm_result, 0, sizeof (fgm_result)); memset (oomhist_per_heap, 0, sizeof (oomhist_per_heap)); if (!gc_done_event.CreateManualEventNoThrow(FALSE)) { return 0; } gc_done_event_lock = -1; gc_done_event_set = false; if (!init_dynamic_data()) { return 0; } uint32_t* ct = &g_gc_card_table [card_word (card_of (g_gc_lowest_address))]; own_card_table (ct); card_table = translate_card_table (ct); brick_table = card_table_brick_table (ct); highest_address = card_table_highest_address (ct); lowest_address = card_table_lowest_address (ct); #ifdef CARD_BUNDLE card_bundle_table = translate_card_bundle_table (card_table_card_bundle_table (ct), g_gc_lowest_address); assert (&card_bundle_table [card_bundle_word (cardw_card_bundle (card_word (card_of (g_gc_lowest_address))))] == card_table_card_bundle_table (ct)); #endif //CARD_BUNDLE #ifdef BACKGROUND_GC if (gc_can_use_concurrent) mark_array = translate_mark_array (card_table_mark_array (&g_gc_card_table[card_word (card_of (g_gc_lowest_address))])); else mark_array = NULL; #endif //BACKGROUND_GC #ifdef USE_REGIONS #ifdef STRESS_REGIONS // Handle table APIs expect coop so we temporarily switch to coop. disable_preemptive (true); pinning_handles_for_alloc = new (nothrow) (OBJECTHANDLE[PINNING_HANDLE_INITIAL_LENGTH]); for (int i = 0; i < PINNING_HANDLE_INITIAL_LENGTH; i++) { pinning_handles_for_alloc[i] = g_gcGlobalHandleStore->CreateHandleOfType (0, HNDTYPE_PINNED); } enable_preemptive(); ph_index_per_heap = 0; pinning_seg_interval = 2; num_gen0_regions = 0; sip_seg_interval = 2; sip_seg_maxgen_interval = 3; num_condemned_regions = 0; #endif //STRESS_REGIONS committed_in_free = 0; end_gen0_region_space = 0; gen0_pinned_free_space = 0; gen0_large_chunk_found = false; // REGIONS PERF TODO: we should really allocate the POH regions together just so that // they wouldn't prevent us from coalescing free regions to form a large virtual address // range. if (!initial_make_soh_regions (__this) || !initial_make_uoh_regions (loh_generation, __this) || !initial_make_uoh_regions (poh_generation, __this)) { return 0; } #else //USE_REGIONS heap_segment* seg = make_initial_segment (soh_gen0, h_number, __this); if (!seg) return 0; FIRE_EVENT(GCCreateSegment_V1, heap_segment_mem(seg), (size_t)(heap_segment_reserved (seg) - heap_segment_mem(seg)), gc_etw_segment_small_object_heap); seg_mapping_table_add_segment (seg, __this); #ifdef MULTIPLE_HEAPS assert (heap_segment_heap (seg) == __this); #endif //MULTIPLE_HEAPS uint8_t* start = heap_segment_mem (seg); for (int i = max_generation; i >= 0; i--) { make_generation (i, seg, start); start += Align (min_obj_size); } heap_segment_allocated (seg) = start; alloc_allocated = start; heap_segment_used (seg) = start - plug_skew; ephemeral_heap_segment = seg; // Create segments for the large and pinned generations heap_segment* lseg = make_initial_segment(loh_generation, h_number, __this); if (!lseg) return 0; lseg->flags |= heap_segment_flags_loh; FIRE_EVENT(GCCreateSegment_V1, heap_segment_mem(lseg), (size_t)(heap_segment_reserved (lseg) - heap_segment_mem(lseg)), gc_etw_segment_large_object_heap); heap_segment* pseg = make_initial_segment (poh_generation, h_number, __this); if (!pseg) return 0; pseg->flags |= heap_segment_flags_poh; FIRE_EVENT(GCCreateSegment_V1, heap_segment_mem(pseg), (size_t)(heap_segment_reserved (pseg) - heap_segment_mem(pseg)), gc_etw_segment_pinned_object_heap); seg_mapping_table_add_segment (lseg, __this); seg_mapping_table_add_segment (pseg, __this); make_generation (loh_generation, lseg, heap_segment_mem (lseg)); make_generation (poh_generation, pseg, heap_segment_mem (pseg)); heap_segment_allocated (lseg) = heap_segment_mem (lseg) + Align (min_obj_size, get_alignment_constant (FALSE)); heap_segment_used (lseg) = heap_segment_allocated (lseg) - plug_skew; heap_segment_allocated (pseg) = heap_segment_mem (pseg) + Align (min_obj_size, get_alignment_constant (FALSE)); heap_segment_used (pseg) = heap_segment_allocated (pseg) - plug_skew; for (int gen_num = 0; gen_num < total_generation_count; gen_num++) { generation* gen = generation_of (gen_num); make_unused_array (generation_allocation_start (gen), Align (min_obj_size)); } #ifdef MULTIPLE_HEAPS assert (heap_segment_heap (lseg) == __this); assert (heap_segment_heap (pseg) == __this); #endif //MULTIPLE_HEAPS #endif //USE_REGIONS #ifdef MULTIPLE_HEAPS //initialize the alloc context heap generation_alloc_context (generation_of (soh_gen0))->set_alloc_heap(vm_heap); generation_alloc_context (generation_of (loh_generation))->set_alloc_heap(vm_heap); generation_alloc_context (generation_of (poh_generation))->set_alloc_heap(vm_heap); #endif //MULTIPLE_HEAPS generation_of (max_generation)->free_list_allocator = allocator(NUM_GEN2_ALIST, BASE_GEN2_ALIST_BITS, gen2_alloc_list, max_generation); generation_of (loh_generation)->free_list_allocator = allocator(NUM_LOH_ALIST, BASE_LOH_ALIST_BITS, loh_alloc_list); generation_of (poh_generation)->free_list_allocator = allocator(NUM_POH_ALIST, BASE_POH_ALIST_BITS, poh_alloc_list); for (int oh_index = 0; oh_index < (gc_oh_num::total_oh_count - 1); oh_index++) etw_allocation_running_amount[oh_index] = 0; total_alloc_bytes_soh = 0; total_alloc_bytes_uoh = 0; //needs to be done after the dynamic data has been initialized #ifndef MULTIPLE_HEAPS allocation_running_amount = dd_min_size (dynamic_data_of (0)); #endif //!MULTIPLE_HEAPS fgn_maxgen_percent = 0; fgn_last_alloc = dd_min_size (dynamic_data_of (0)); mark* arr = new (nothrow) (mark [MARK_STACK_INITIAL_LENGTH]); if (!arr) return 0; make_mark_stack(arr); #ifdef BACKGROUND_GC #ifdef BGC_SERVO_TUNING loh_a_no_bgc = 0; loh_a_bgc_marking = 0; loh_a_bgc_planning = 0; bgc_maxgen_end_fl_size = 0; #endif //BGC_SERVO_TUNING freeable_soh_segment = 0; gchist_index_per_heap = 0; if (gc_can_use_concurrent) { uint8_t** b_arr = new (nothrow) (uint8_t * [MARK_STACK_INITIAL_LENGTH]); if (!b_arr) return 0; make_background_mark_stack(b_arr); } #endif //BACKGROUND_GC #ifndef USE_REGIONS ephemeral_low = generation_allocation_start(generation_of(max_generation - 1)); ephemeral_high = heap_segment_reserved(ephemeral_heap_segment); #endif //!USE_REGIONS if (heap_number == 0) { stomp_write_barrier_initialize( #if defined(MULTIPLE_HEAPS) || defined(USE_REGIONS) reinterpret_cast(1), reinterpret_cast(~0) #else ephemeral_low, ephemeral_high #endif //!MULTIPLE_HEAPS || USE_REGIONS ); } #ifdef MULTIPLE_HEAPS if (!create_gc_thread ()) return 0; g_heaps [heap_number] = this; #endif //MULTIPLE_HEAPS #ifdef FEATURE_PREMORTEM_FINALIZATION HRESULT hr = AllocateCFinalize(&finalize_queue); if (FAILED(hr)) return 0; #endif // FEATURE_PREMORTEM_FINALIZATION max_free_space_items = MAX_NUM_FREE_SPACES; bestfit_seg = new (nothrow) seg_free_spaces (heap_number); if (!bestfit_seg) { return 0; } if (!bestfit_seg->alloc()) { return 0; } last_gc_before_oom = FALSE; sufficient_gen0_space_p = FALSE; #ifdef MULTIPLE_HEAPS #ifdef HEAP_ANALYZE heap_analyze_success = TRUE; internal_root_array = 0; internal_root_array_index = 0; internal_root_array_length = initial_internal_roots; current_obj = 0; current_obj_size = 0; #endif //HEAP_ANALYZE #endif // MULTIPLE_HEAPS #ifdef BACKGROUND_GC bgc_thread_id.Clear(); if (!create_bgc_thread_support()) { return 0; } bgc_alloc_lock = new (nothrow) exclusive_sync; if (!bgc_alloc_lock) { return 0; } bgc_alloc_lock->init(); bgc_thread_running = 0; bgc_thread = 0; bgc_threads_timeout_cs.Initialize(); current_bgc_state = bgc_not_in_process; background_soh_alloc_count = 0; background_uoh_alloc_count = 0; bgc_overflow_count = 0; end_loh_size = dd_min_size (dynamic_data_of (loh_generation)); end_poh_size = dd_min_size (dynamic_data_of (poh_generation)); current_sweep_pos = 0; #ifdef DOUBLY_LINKED_FL current_sweep_seg = 0; #endif //DOUBLY_LINKED_FL #endif //BACKGROUND_GC #ifdef GC_CONFIG_DRIVEN memset(interesting_data_per_heap, 0, sizeof (interesting_data_per_heap)); memset(compact_reasons_per_heap, 0, sizeof (compact_reasons_per_heap)); memset(expand_mechanisms_per_heap, 0, sizeof (expand_mechanisms_per_heap)); memset(interesting_mechanism_bits_per_heap, 0, sizeof (interesting_mechanism_bits_per_heap)); #endif //GC_CONFIG_DRIVEN return 1; } void gc_heap::destroy_semi_shared() { //TODO: will need to move this to per heap //#ifdef BACKGROUND_GC // if (c_mark_list) // delete c_mark_list; //#endif //BACKGROUND_GC if (g_mark_list) delete g_mark_list; if (seg_mapping_table) delete seg_mapping_table; #ifdef FEATURE_BASICFREEZE //destroy the segment map seg_table->delete_sorted_table(); #endif //FEATURE_BASICFREEZE } void gc_heap::self_destroy() { #ifdef BACKGROUND_GC kill_gc_thread(); #endif //BACKGROUND_GC if (gc_done_event.IsValid()) { gc_done_event.CloseEvent(); } // destroy every segment for (int i = get_start_generation_index(); i < total_generation_count; i++) { heap_segment* seg = heap_segment_rw (generation_start_segment (generation_of (i))); PREFIX_ASSUME(seg != NULL); while (seg) { heap_segment* next_seg = heap_segment_next_rw (seg); delete_heap_segment (seg); seg = next_seg; } } // get rid of the card table release_card_table (card_table); // destroy the mark stack delete mark_stack_array; #ifdef FEATURE_PREMORTEM_FINALIZATION if (finalize_queue) delete finalize_queue; #endif // FEATURE_PREMORTEM_FINALIZATION } void gc_heap::destroy_gc_heap(gc_heap* heap) { heap->self_destroy(); delete heap; } // Destroys resources owned by gc. It is assumed that a last GC has been performed and that // the finalizer queue has been drained. void gc_heap::shutdown_gc() { destroy_semi_shared(); #ifdef MULTIPLE_HEAPS //delete the heaps array delete g_heaps; destroy_thread_support(); n_heaps = 0; #endif //MULTIPLE_HEAPS //destroy seg_manager destroy_initial_memory(); GCToOSInterface::Shutdown(); } inline BOOL gc_heap::size_fit_p (size_t size REQD_ALIGN_AND_OFFSET_DCL, uint8_t* alloc_pointer, uint8_t* alloc_limit, uint8_t* old_loc, int use_padding) { BOOL already_padded = FALSE; #ifdef SHORT_PLUGS if ((old_loc != 0) && (use_padding & USE_PADDING_FRONT)) { alloc_pointer = alloc_pointer + Align (min_obj_size); already_padded = TRUE; } #endif //SHORT_PLUGS if (!((old_loc == 0) || same_large_alignment_p (old_loc, alloc_pointer))) size = size + switch_alignment_size (already_padded); #ifdef FEATURE_STRUCTALIGN alloc_pointer = StructAlign(alloc_pointer, requiredAlignment, alignmentOffset); #endif // FEATURE_STRUCTALIGN // in allocate_in_condemned_generation we can have this when we // set the alloc_limit to plan_allocated which could be less than // alloc_ptr if (alloc_limit < alloc_pointer) { return FALSE; } if (old_loc != 0) { return (((size_t)(alloc_limit - alloc_pointer) >= (size + ((use_padding & USE_PADDING_TAIL)? Align(min_obj_size) : 0))) #ifdef SHORT_PLUGS ||((!(use_padding & USE_PADDING_FRONT)) && ((alloc_pointer + size) == alloc_limit)) #else //SHORT_PLUGS ||((alloc_pointer + size) == alloc_limit) #endif //SHORT_PLUGS ); } else { assert (size == Align (min_obj_size)); return ((size_t)(alloc_limit - alloc_pointer) >= size); } } inline BOOL gc_heap::a_size_fit_p (size_t size, uint8_t* alloc_pointer, uint8_t* alloc_limit, int align_const) { // We could have run into cases where this is true when alloc_allocated is the // the same as the seg committed. if (alloc_limit < alloc_pointer) { return FALSE; } return ((size_t)(alloc_limit - alloc_pointer) >= (size + Align(min_obj_size, align_const))); } // Grow by committing more pages BOOL gc_heap::grow_heap_segment (heap_segment* seg, uint8_t* high_address, bool* hard_limit_exceeded_p) { assert (high_address <= heap_segment_reserved (seg)); if (hard_limit_exceeded_p) *hard_limit_exceeded_p = false; //return 0 if we are at the end of the segment. if (align_on_page (high_address) > heap_segment_reserved (seg)) return FALSE; if (high_address <= heap_segment_committed (seg)) return TRUE; size_t c_size = align_on_page ((size_t)(high_address - heap_segment_committed (seg))); c_size = max (c_size, commit_min_th); c_size = min (c_size, (size_t)(heap_segment_reserved (seg) - heap_segment_committed (seg))); if (c_size == 0) return FALSE; STRESS_LOG2(LF_GC, LL_INFO10000, "Growing heap_segment: %Ix high address: %Ix\n", (size_t)seg, (size_t)high_address); bool ret = virtual_commit (heap_segment_committed (seg), c_size, heap_segment_oh (seg), heap_number, hard_limit_exceeded_p); if (ret) { heap_segment_committed (seg) += c_size; STRESS_LOG1(LF_GC, LL_INFO10000, "New commit: %Ix\n", (size_t)heap_segment_committed (seg)); assert (heap_segment_committed (seg) <= heap_segment_reserved (seg)); assert (high_address <= heap_segment_committed (seg)); #if defined(MULTIPLE_HEAPS) && !defined(USE_REGIONS) // we should never increase committed beyond decommit target when gradual // decommit is in progress - if we do, this means commit and decommit are // going on at the same time. assert (!gradual_decommit_in_progress_p || (seg != ephemeral_heap_segment) || (heap_segment_committed (seg) <= heap_segment_decommit_target (seg))); #endif //MULTIPLE_HEAPS && !USE_REGIONS } return !!ret; } inline int gc_heap::grow_heap_segment (heap_segment* seg, uint8_t* allocated, uint8_t* old_loc, size_t size, BOOL pad_front_p REQD_ALIGN_AND_OFFSET_DCL) { BOOL already_padded = FALSE; #ifdef SHORT_PLUGS if ((old_loc != 0) && pad_front_p) { allocated = allocated + Align (min_obj_size); already_padded = TRUE; } #endif //SHORT_PLUGS if (!((old_loc == 0) || same_large_alignment_p (old_loc, allocated))) size += switch_alignment_size (already_padded); #ifdef FEATURE_STRUCTALIGN size_t pad = ComputeStructAlignPad(allocated, requiredAlignment, alignmentOffset); return grow_heap_segment (seg, allocated + pad + size); #else // FEATURE_STRUCTALIGN return grow_heap_segment (seg, allocated + size); #endif // FEATURE_STRUCTALIGN } // thread this object to the front of gen's free list and update stats. void gc_heap::thread_free_item_front (generation* gen, uint8_t* free_start, size_t free_size) { make_unused_array (free_start, free_size); generation_free_list_space (gen) += free_size; generation_allocator(gen)->thread_item_front (free_start, free_size); add_gen_free (gen->gen_num, free_size); if (gen->gen_num == max_generation) { dprintf (2, ("AO h%d: gen2F+: %Ix(%Id)->%Id, FO: %Id", heap_number, free_start, free_size, generation_free_list_space (gen), generation_free_obj_space (gen))); } } #ifdef DOUBLY_LINKED_FL void gc_heap::thread_item_front_added (generation* gen, uint8_t* free_start, size_t free_size) { make_unused_array (free_start, free_size); generation_free_list_space (gen) += free_size; int bucket_index = generation_allocator(gen)->thread_item_front_added (free_start, free_size); if (gen->gen_num == max_generation) { dprintf (2, ("AO [h%d] gen2FL+: %Ix(%Id)->%Id", heap_number, free_start, free_size, generation_free_list_space (gen))); } add_gen_free (gen->gen_num, free_size); } #endif //DOUBLY_LINKED_FL // this is for free objects that are not on the free list; also update stats. void gc_heap::make_free_obj (generation* gen, uint8_t* free_start, size_t free_size) { make_unused_array (free_start, free_size); generation_free_obj_space (gen) += free_size; if (gen->gen_num == max_generation) { dprintf (2, ("AO [h%d] gen2FO+: %Ix(%Id)->%Id", heap_number, free_start, free_size, generation_free_obj_space (gen))); } } //used only in older generation allocation (i.e during gc). void gc_heap::adjust_limit (uint8_t* start, size_t limit_size, generation* gen) { dprintf (3, ("gc Expanding segment allocation")); heap_segment* seg = generation_allocation_segment (gen); if ((generation_allocation_limit (gen) != start) || (start != heap_segment_plan_allocated (seg))) { if (generation_allocation_limit (gen) == heap_segment_plan_allocated (seg)) { assert (generation_allocation_pointer (gen) >= heap_segment_mem (seg)); assert (generation_allocation_pointer (gen) <= heap_segment_committed (seg)); heap_segment_plan_allocated (generation_allocation_segment (gen)) = generation_allocation_pointer (gen); } else { uint8_t* hole = generation_allocation_pointer (gen); size_t size = (generation_allocation_limit (gen) - generation_allocation_pointer (gen)); if (size != 0) { dprintf (3, ("filling up hole: %Ix, size %Ix", hole, size)); size_t allocated_size = generation_allocation_pointer (gen) - generation_allocation_context_start_region (gen); #ifdef DOUBLY_LINKED_FL if (gen->gen_num == max_generation) { // For BGC since we need to thread the max_gen's free list as a doubly linked list we need to // preserve 5 ptr-sized words: SB | MT | Len | Next | Prev // This means we cannot simply make a filler free object right after what's allocated in this // alloc context if that's < 5-ptr sized. // if (allocated_size <= min_free_item_no_prev) { // We can't make the free object just yet. Need to record the size. size_t* filler_free_obj_size_location = (size_t*)(generation_allocation_context_start_region (gen) + min_free_item_no_prev); size_t filler_free_obj_size = 0; if (size >= (Align (min_free_list) + Align (min_obj_size))) { filler_free_obj_size = Align (min_obj_size); size_t fl_size = size - filler_free_obj_size; thread_item_front_added (gen, (hole + filler_free_obj_size), fl_size); } else { filler_free_obj_size = size; } generation_free_obj_space (gen) += filler_free_obj_size; *filler_free_obj_size_location = filler_free_obj_size; uint8_t* old_loc = generation_last_free_list_allocated (gen); // check if old_loc happens to be in a saved plug_and_gap with a pinned plug after it uint8_t* saved_plug_and_gap = pinned_plug (pinned_plug_of (saved_pinned_plug_index)) - sizeof(plug_and_gap); size_t offset = old_loc - saved_plug_and_gap; if (offset < sizeof(gap_reloc_pair)) { // the object at old_loc must be at least min_obj_size assert (offset <= sizeof(plug_and_gap) - min_obj_size); // if so, set the bit in the saved info instead set_free_obj_in_compact_bit ((uint8_t*)(&pinned_plug_of (saved_pinned_plug_index)->saved_pre_plug_reloc) + offset); } else { #ifdef _DEBUG // check this looks like an object header(old_loc)->Validate(); #endif //_DEBUG set_free_obj_in_compact_bit (old_loc); } dprintf (3333, ("[h%d] ac: %Ix->%Ix((%Id < %Id), Pset %Ix s->%Id", heap_number, generation_allocation_context_start_region (gen), generation_allocation_pointer (gen), allocated_size, min_free_item_no_prev, filler_free_obj_size_location, filler_free_obj_size)); } else { if (size >= Align (min_free_list)) { thread_item_front_added (gen, hole, size); } else { make_free_obj (gen, hole, size); } } } else #endif //DOUBLY_LINKED_FL { // TODO: this should be written the same way as the above, ie, it should check // allocated_size first, but it doesn't need to do MAKE_FREE_OBJ_IN_COMPACT // related things. if (size >= Align (min_free_list)) { if (allocated_size < min_free_item_no_prev) { if (size >= (Align (min_free_list) + Align (min_obj_size))) { //split hole into min obj + threadable free item make_free_obj (gen, hole, min_obj_size); thread_free_item_front (gen, (hole + Align (min_obj_size)), (size - Align (min_obj_size))); } else { dprintf (3, ("allocated size too small, can't put back rest on free list %Ix", allocated_size)); make_free_obj (gen, hole, size); } } else { dprintf (3, ("threading hole in front of free list")); thread_free_item_front (gen, hole, size); } } else { make_free_obj (gen, hole, size); } } } } generation_allocation_pointer (gen) = start; generation_allocation_context_start_region (gen) = start; } generation_allocation_limit (gen) = (start + limit_size); } void verify_mem_cleared (uint8_t* start, size_t size) { if (!Aligned (size)) { FATAL_GC_ERROR(); } PTR_PTR curr_ptr = (PTR_PTR) start; for (size_t i = 0; i < size / sizeof(PTR_PTR); i++) { if (*(curr_ptr++) != 0) { FATAL_GC_ERROR(); } } } #if defined (VERIFY_HEAP) && defined (BACKGROUND_GC) void gc_heap::set_batch_mark_array_bits (uint8_t* start, uint8_t* end) { size_t start_mark_bit = mark_bit_of (start); size_t end_mark_bit = mark_bit_of (end); unsigned int startbit = mark_bit_bit (start_mark_bit); unsigned int endbit = mark_bit_bit (end_mark_bit); size_t startwrd = mark_bit_word (start_mark_bit); size_t endwrd = mark_bit_word (end_mark_bit); dprintf (3, ("Setting all mark array bits between [%Ix:%Ix-[%Ix:%Ix", (size_t)start, (size_t)start_mark_bit, (size_t)end, (size_t)end_mark_bit)); unsigned int firstwrd = ~(lowbits (~0, startbit)); unsigned int lastwrd = ~(highbits (~0, endbit)); if (startwrd == endwrd) { unsigned int wrd = firstwrd & lastwrd; mark_array[startwrd] |= wrd; return; } // set the first mark word. if (startbit) { mark_array[startwrd] |= firstwrd; startwrd++; } for (size_t wrdtmp = startwrd; wrdtmp < endwrd; wrdtmp++) { mark_array[wrdtmp] = ~(unsigned int)0; } // set the last mark word. if (endbit) { mark_array[endwrd] |= lastwrd; } } // makes sure that the mark array bits between start and end are 0. void gc_heap::check_batch_mark_array_bits (uint8_t* start, uint8_t* end) { size_t start_mark_bit = mark_bit_of (start); size_t end_mark_bit = mark_bit_of (end); unsigned int startbit = mark_bit_bit (start_mark_bit); unsigned int endbit = mark_bit_bit (end_mark_bit); size_t startwrd = mark_bit_word (start_mark_bit); size_t endwrd = mark_bit_word (end_mark_bit); //dprintf (3, ("Setting all mark array bits between [%Ix:%Ix-[%Ix:%Ix", // (size_t)start, (size_t)start_mark_bit, // (size_t)end, (size_t)end_mark_bit)); unsigned int firstwrd = ~(lowbits (~0, startbit)); unsigned int lastwrd = ~(highbits (~0, endbit)); if (startwrd == endwrd) { unsigned int wrd = firstwrd & lastwrd; if (mark_array[startwrd] & wrd) { dprintf (1, ("The %Ix portion of mark bits at 0x%Ix:0x%Ix(addr: 0x%Ix) were not cleared", wrd, startwrd, mark_array [startwrd], mark_word_address (startwrd))); FATAL_GC_ERROR(); } return; } // set the first mark word. if (startbit) { if (mark_array[startwrd] & firstwrd) { dprintf (1, ("The %Ix portion of mark bits at 0x%Ix:0x%Ix(addr: 0x%Ix) were not cleared", firstwrd, startwrd, mark_array [startwrd], mark_word_address (startwrd))); FATAL_GC_ERROR(); } startwrd++; } for (size_t wrdtmp = startwrd; wrdtmp < endwrd; wrdtmp++) { if (mark_array[wrdtmp]) { dprintf (1, ("The mark bits at 0x%Ix:0x%Ix(addr: 0x%Ix) were not cleared", wrdtmp, mark_array [wrdtmp], mark_word_address (wrdtmp))); FATAL_GC_ERROR(); } } // set the last mark word. if (endbit) { if (mark_array[endwrd] & lastwrd) { dprintf (1, ("The %Ix portion of mark bits at 0x%Ix:0x%Ix(addr: 0x%Ix) were not cleared", lastwrd, lastwrd, mark_array [lastwrd], mark_word_address (lastwrd))); FATAL_GC_ERROR(); } } } #endif //VERIFY_HEAP && BACKGROUND_GC allocator::allocator (unsigned int num_b, int fbb, alloc_list* b, int gen) { assert (num_b < MAX_BUCKET_COUNT); num_buckets = num_b; first_bucket_bits = fbb; buckets = b; gen_number = gen; } alloc_list& allocator::alloc_list_of (unsigned int bn) { assert (bn < num_buckets); if (bn == 0) return first_bucket; else return buckets [bn-1]; } size_t& allocator::alloc_list_damage_count_of (unsigned int bn) { assert (bn < num_buckets); if (bn == 0) return first_bucket.alloc_list_damage_count(); else return buckets [bn-1].alloc_list_damage_count(); } void allocator::unlink_item (unsigned int bn, uint8_t* item, uint8_t* prev_item, BOOL use_undo_p) { alloc_list* al = &alloc_list_of (bn); uint8_t* next_item = free_list_slot(item); #ifdef DOUBLY_LINKED_FL // if repair_list is TRUE yet use_undo_p is FALSE, it means we do need to make sure // this item does not look like it's on the free list as we will not have a chance to // do that later. BOOL repair_list = !discard_if_no_fit_p (); #endif //DOUBLY_LINKED_FL if (prev_item) { if (use_undo_p && (free_list_undo (prev_item) == UNDO_EMPTY)) { assert (item == free_list_slot (prev_item)); free_list_undo (prev_item) = item; alloc_list_damage_count_of (bn)++; } free_list_slot (prev_item) = next_item; } else { al->alloc_list_head() = next_item; } if (al->alloc_list_tail() == item) { al->alloc_list_tail() = prev_item; } #ifdef DOUBLY_LINKED_FL if (repair_list) { if (!use_undo_p) { free_list_prev (item) = PREV_EMPTY; } } if (gen_number == max_generation) { dprintf (3, ("[g%2d, b%2d]UL: %Ix->%Ix->%Ix (h: %Ix, t: %Ix)", gen_number, bn, free_list_prev (item), item, free_list_slot (item), al->alloc_list_head(), al->alloc_list_tail())); dprintf (3, ("[g%2d, b%2d]UL: exit, h->N: %Ix, h->P: %Ix, t->N: %Ix, t->P: %Ix", gen_number, bn, (al->alloc_list_head() ? free_list_slot (al->alloc_list_head()) : 0), (al->alloc_list_head() ? free_list_prev (al->alloc_list_head()) : 0), (al->alloc_list_tail() ? free_list_slot (al->alloc_list_tail()) : 0), (al->alloc_list_tail() ? free_list_prev (al->alloc_list_tail()) : 0))); } #endif //DOUBLY_LINKED_FL if (al->alloc_list_head() == 0) { assert (al->alloc_list_tail() == 0); } } #ifdef DOUBLY_LINKED_FL void allocator::unlink_item_no_undo (unsigned int bn, uint8_t* item, size_t size) { alloc_list* al = &alloc_list_of (bn); uint8_t* next_item = free_list_slot (item); uint8_t* prev_item = free_list_prev (item); #ifdef FL_VERIFICATION { uint8_t* start = al->alloc_list_head(); BOOL found_p = FALSE; while (start) { if (start == item) { found_p = TRUE; break; } start = free_list_slot (start); } if (!found_p) { dprintf (1, ("could not find %Ix in b%d!!!", item, a_l_number)); FATAL_GC_ERROR(); } } #endif //FL_VERIFICATION if (prev_item) { free_list_slot (prev_item) = next_item; } else { al->alloc_list_head() = next_item; } if (next_item) { free_list_prev (next_item) = prev_item; } if (al->alloc_list_tail() == item) { al->alloc_list_tail() = prev_item; } free_list_prev (item) = PREV_EMPTY; if (gen_number == max_generation) { dprintf (3, ("[g%2d, b%2d]ULN: %Ix->%Ix->%Ix (h: %Ix, t: %Ix)", gen_number, bn, free_list_prev (item), item, free_list_slot (item), al->alloc_list_head(), al->alloc_list_tail())); dprintf (3, ("[g%2d, b%2d]ULN: exit: h->N: %Ix, h->P: %Ix, t->N: %Ix, t->P: %Ix", gen_number, bn, (al->alloc_list_head() ? free_list_slot (al->alloc_list_head()) : 0), (al->alloc_list_head() ? free_list_prev (al->alloc_list_head()) : 0), (al->alloc_list_tail() ? free_list_slot (al->alloc_list_tail()) : 0), (al->alloc_list_tail() ? free_list_prev (al->alloc_list_tail()) : 0))); } } void allocator::unlink_item_no_undo (uint8_t* item, size_t size) { unsigned int bn = first_suitable_bucket (size); unlink_item_no_undo (bn, item, size); } void allocator::unlink_item_no_undo_added (unsigned int bn, uint8_t* item, uint8_t* previous_item) { alloc_list* al = &alloc_list_of (bn); uint8_t* next_item = free_list_slot (item); uint8_t* prev_item = free_list_prev (item); assert (prev_item == previous_item); if (prev_item) { free_list_slot (prev_item) = next_item; } else { al->added_alloc_list_head() = next_item; } if (next_item) { free_list_prev (next_item) = prev_item; } if (al->added_alloc_list_tail() == item) { al->added_alloc_list_tail() = prev_item; } free_list_prev (item) = PREV_EMPTY; if (gen_number == max_generation) { dprintf (3333, ("[g%2d, b%2d]ULNA: %Ix->%Ix->%Ix (h: %Ix, t: %Ix)", gen_number, bn, free_list_prev (item), item, free_list_slot (item), al->added_alloc_list_head(), al->added_alloc_list_tail())); dprintf (3333, ("[g%2d, b%2d]ULNA: exit: h->N: %Ix, h->P: %Ix, t->N: %Ix, t->P: %Ix", gen_number, bn, (al->added_alloc_list_head() ? free_list_slot (al->added_alloc_list_head()) : 0), (al->added_alloc_list_head() ? free_list_prev (al->added_alloc_list_head()) : 0), (al->added_alloc_list_tail() ? free_list_slot (al->added_alloc_list_tail()) : 0), (al->added_alloc_list_tail() ? free_list_prev (al->added_alloc_list_tail()) : 0))); } } int allocator::thread_item_front_added (uint8_t* item, size_t size) { unsigned int a_l_number = first_suitable_bucket (size); alloc_list* al = &alloc_list_of (a_l_number); free_list_slot (item) = al->added_alloc_list_head(); free_list_prev (item) = 0; // this list's UNDO is not useful. free_list_undo (item) = UNDO_EMPTY; if (al->added_alloc_list_head() != 0) { free_list_prev (al->added_alloc_list_head()) = item; } al->added_alloc_list_head() = item; if (al->added_alloc_list_tail() == 0) { al->added_alloc_list_tail() = item; } if (gen_number == max_generation) { dprintf (3333, ("[g%2d, b%2d]TFFA: exit: %Ix->%Ix->%Ix (h: %Ix, t: %Ix)", gen_number, a_l_number, free_list_prev (item), item, free_list_slot (item), al->added_alloc_list_head(), al->added_alloc_list_tail())); dprintf (3333, ("[g%2d, b%2d]TFFA: h->N: %Ix, h->P: %Ix, t->N: %Ix, t->P: %Ix", gen_number, a_l_number, (al->added_alloc_list_head() ? free_list_slot (al->added_alloc_list_head()) : 0), (al->added_alloc_list_head() ? free_list_prev (al->added_alloc_list_head()) : 0), (al->added_alloc_list_tail() ? free_list_slot (al->added_alloc_list_tail()) : 0), (al->added_alloc_list_tail() ? free_list_prev (al->added_alloc_list_tail()) : 0))); } return a_l_number; } #endif //DOUBLY_LINKED_FL void allocator::clear() { for (unsigned int i = 0; i < num_buckets; i++) { alloc_list_head_of (i) = 0; alloc_list_tail_of (i) = 0; } } //always thread to the end. void allocator::thread_item (uint8_t* item, size_t size) { unsigned int a_l_number = first_suitable_bucket (size); alloc_list* al = &alloc_list_of (a_l_number); uint8_t*& head = al->alloc_list_head(); uint8_t*& tail = al->alloc_list_tail(); if (al->alloc_list_head() == 0) { assert (al->alloc_list_tail() == 0); } free_list_slot (item) = 0; free_list_undo (item) = UNDO_EMPTY; assert (item != head); #ifdef DOUBLY_LINKED_FL if (gen_number == max_generation) { free_list_prev (item) = tail; } #endif //DOUBLY_LINKED_FL if (head == 0) { head = item; } else { assert ((free_list_slot(head) != 0) || (tail == head)); assert (item != tail); assert (free_list_slot(tail) == 0); free_list_slot (tail) = item; } tail = item; #ifdef DOUBLY_LINKED_FL if (gen_number == max_generation) { dprintf (3333, ("[g%2d, b%2d]TFE: %Ix->%Ix->%Ix (h: %Ix, t: %Ix)", gen_number, a_l_number, free_list_prev (item), item, free_list_slot (item), al->alloc_list_head(), al->alloc_list_tail())); dprintf (3333, ("[g%2d, b%2d]TFE: exit: h->N: %Ix, h->P: %Ix, t->N: %Ix, t->P: %Ix", gen_number, a_l_number, (al->alloc_list_head() ? free_list_slot (al->alloc_list_head()) : 0), (al->alloc_list_head() ? free_list_prev (al->alloc_list_head()) : 0), (al->alloc_list_tail() ? free_list_slot (al->alloc_list_tail()) : 0), (al->alloc_list_tail() ? free_list_prev (al->alloc_list_tail()) : 0))); } #endif //DOUBLY_LINKED_FL } void allocator::thread_item_front (uint8_t* item, size_t size) { unsigned int a_l_number = first_suitable_bucket (size); alloc_list* al = &alloc_list_of (a_l_number); if (al->alloc_list_head() == 0) { assert (al->alloc_list_tail() == 0); } free_list_slot (item) = al->alloc_list_head(); free_list_undo (item) = UNDO_EMPTY; if (al->alloc_list_tail() == 0) { assert (al->alloc_list_head() == 0); al->alloc_list_tail() = al->alloc_list_head(); } #ifdef DOUBLY_LINKED_FL if (gen_number == max_generation) { if (al->alloc_list_head() != 0) { free_list_prev (al->alloc_list_head()) = item; } } #endif //DOUBLY_LINKED_FL al->alloc_list_head() = item; if (al->alloc_list_tail() == 0) { al->alloc_list_tail() = item; } #ifdef DOUBLY_LINKED_FL if (gen_number == max_generation) { free_list_prev (item) = 0; dprintf (3333, ("[g%2d, b%2d]TFF: exit: %Ix->%Ix->%Ix (h: %Ix, t: %Ix)", gen_number, a_l_number, free_list_prev (item), item, free_list_slot (item), al->alloc_list_head(), al->alloc_list_tail())); dprintf (3333, ("[g%2d, b%2d]TFF: h->N: %Ix, h->P: %Ix, t->N: %Ix, t->P: %Ix", gen_number, a_l_number, (al->alloc_list_head() ? free_list_slot (al->alloc_list_head()) : 0), (al->alloc_list_head() ? free_list_prev (al->alloc_list_head()) : 0), (al->alloc_list_tail() ? free_list_slot (al->alloc_list_tail()) : 0), (al->alloc_list_tail() ? free_list_prev (al->alloc_list_tail()) : 0))); } #endif //DOUBLY_LINKED_FL } void allocator::copy_to_alloc_list (alloc_list* toalist) { for (unsigned int i = 0; i < num_buckets; i++) { toalist [i] = alloc_list_of (i); #ifdef FL_VERIFICATION size_t damage_count = alloc_list_damage_count_of (i); // We are only calling this method to copy to an empty list // so damage count is always 0 assert (damage_count == 0); uint8_t* free_item = alloc_list_head_of (i); size_t count = 0; while (free_item) { count++; free_item = free_list_slot (free_item); } toalist[i].item_count = count; #endif //FL_VERIFICATION } } void allocator::copy_from_alloc_list (alloc_list* fromalist) { BOOL repair_list = !discard_if_no_fit_p (); #ifdef DOUBLY_LINKED_FL BOOL bgc_repair_p = FALSE; if (gen_number == max_generation) { bgc_repair_p = TRUE; if (alloc_list_damage_count_of (0) != 0) { GCToOSInterface::DebugBreak(); } uint8_t* b0_head = alloc_list_head_of (0); if (b0_head) { free_list_prev (b0_head) = 0; } added_alloc_list_head_of (0) = 0; added_alloc_list_tail_of (0) = 0; } unsigned int start_index = (bgc_repair_p ? 1 : 0); #else unsigned int start_index = 0; #endif //DOUBLY_LINKED_FL for (unsigned int i = start_index; i < num_buckets; i++) { size_t count = alloc_list_damage_count_of (i); alloc_list_of (i) = fromalist [i]; assert (alloc_list_damage_count_of (i) == 0); if (repair_list) { //repair the the list //new items may have been added during the plan phase //items may have been unlinked. uint8_t* free_item = alloc_list_head_of (i); while (free_item && count) { assert (((CObjectHeader*)free_item)->IsFree()); if ((free_list_undo (free_item) != UNDO_EMPTY)) { count--; free_list_slot (free_item) = free_list_undo (free_item); free_list_undo (free_item) = UNDO_EMPTY; } free_item = free_list_slot (free_item); } #ifdef DOUBLY_LINKED_FL if (bgc_repair_p) { added_alloc_list_head_of (i) = 0; added_alloc_list_tail_of (i) = 0; } #endif //DOUBLY_LINKED_FL #ifdef FL_VERIFICATION free_item = alloc_list_head_of (i); size_t item_count = 0; while (free_item) { item_count++; free_item = free_list_slot (free_item); } assert (item_count == alloc_list_of (i).item_count); #endif //FL_VERIFICATION } #ifdef DEBUG uint8_t* tail_item = alloc_list_tail_of (i); assert ((tail_item == 0) || (free_list_slot (tail_item) == 0)); #endif } } void allocator::commit_alloc_list_changes() { BOOL repair_list = !discard_if_no_fit_p (); #ifdef DOUBLY_LINKED_FL BOOL bgc_repair_p = FALSE; if (gen_number == max_generation) { bgc_repair_p = TRUE; } #endif //DOUBLY_LINKED_FL if (repair_list) { for (unsigned int i = 0; i < num_buckets; i++) { //remove the undo info from list. uint8_t* free_item = alloc_list_head_of (i); #ifdef DOUBLY_LINKED_FL if (bgc_repair_p) { dprintf (3, ("C[b%2d] ENTRY: h: %Ix t: %Ix", i, alloc_list_head_of (i), alloc_list_tail_of (i))); } if (free_item && bgc_repair_p) { if (free_list_prev (free_item) != 0) free_list_prev (free_item) = 0; } #endif //DOUBLY_LINKED_FL size_t count = alloc_list_damage_count_of (i); while (free_item && count) { assert (((CObjectHeader*)free_item)->IsFree()); if (free_list_undo (free_item) != UNDO_EMPTY) { free_list_undo (free_item) = UNDO_EMPTY; #ifdef DOUBLY_LINKED_FL if (bgc_repair_p) { uint8_t* next_item = free_list_slot (free_item); if (next_item && (free_list_prev (next_item) != free_item)) free_list_prev (next_item) = free_item; } #endif //DOUBLY_LINKED_FL count--; } free_item = free_list_slot (free_item); } alloc_list_damage_count_of (i) = 0; #ifdef DOUBLY_LINKED_FL if (bgc_repair_p) { uint8_t* head = alloc_list_head_of (i); uint8_t* tail_added = added_alloc_list_tail_of (i); if (tail_added) { assert (free_list_slot (tail_added) == 0); if (head) { free_list_slot (tail_added) = head; free_list_prev (head) = tail_added; } } uint8_t* head_added = added_alloc_list_head_of (i); if (head_added) { alloc_list_head_of (i) = head_added; uint8_t* final_head = alloc_list_head_of (i); if (alloc_list_tail_of (i) == 0) { alloc_list_tail_of (i) = tail_added; } } added_alloc_list_head_of (i) = 0; added_alloc_list_tail_of (i) = 0; } #endif //DOUBLY_LINKED_FL } } } #ifdef USE_REGIONS void allocator::thread_sip_fl (heap_segment* region) { uint8_t* region_fl_head = region->free_list_head; uint8_t* region_fl_tail = region->free_list_tail; if (!region_fl_head) { assert (!region_fl_tail); assert (region->free_list_size == 0); return; } if (num_buckets == 1) { dprintf (REGIONS_LOG, ("threading gen%d region %Ix onto gen%d FL", heap_segment_gen_num (region), heap_segment_mem (region), gen_number)); alloc_list* al = &alloc_list_of (0); uint8_t*& head = al->alloc_list_head(); uint8_t*& tail = al->alloc_list_tail(); if (tail == 0) { assert (head == 0); head = region_fl_head; } else { free_list_slot (tail) = region_fl_head; } tail = region_fl_tail; } else { dprintf (REGIONS_LOG, ("threading gen%d region %Ix onto gen%d bucketed FL", heap_segment_gen_num (region), heap_segment_mem (region), gen_number)); // If we have a bucketed free list we'd need to go through the region's free list. uint8_t* region_fl_item = region_fl_head; size_t total_free_size = 0; while (region_fl_item) { uint8_t* next_fl_item = free_list_slot (region_fl_item); size_t size_item = size (region_fl_item); thread_item (region_fl_item, size_item); total_free_size += size_item; region_fl_item = next_fl_item; } assert (total_free_size == region->free_list_size); } } #endif //USE_REGIONS #ifdef FEATURE_EVENT_TRACE uint16_t allocator::count_largest_items (etw_bucket_info* bucket_info, size_t max_size, size_t max_item_count, size_t* recorded_fl_info_size) { assert (gen_number == max_generation); size_t size_counted_total = 0; size_t items_counted_total = 0; uint16_t bucket_info_index = 0; for (int i = (num_buckets - 1); i >= 0; i--) { uint32_t items_counted = 0; size_t size_counted = 0; uint8_t* free_item = alloc_list_head_of ((unsigned int)i); while (free_item) { assert (((CObjectHeader*)free_item)->IsFree()); size_t free_item_size = Align (size (free_item)); size_counted_total += free_item_size; size_counted += free_item_size; items_counted_total++; items_counted++; if ((size_counted_total > max_size) || (items_counted > max_item_count)) { bucket_info[bucket_info_index++].set ((uint16_t)i, items_counted, size_counted); *recorded_fl_info_size = size_counted_total; return bucket_info_index; } free_item = free_list_slot (free_item); } if (items_counted) { bucket_info[bucket_info_index++].set ((uint16_t)i, items_counted, size_counted); } } *recorded_fl_info_size = size_counted_total; return bucket_info_index; } #endif //FEATURE_EVENT_TRACE void gc_heap::adjust_limit_clr (uint8_t* start, size_t limit_size, size_t size, alloc_context* acontext, uint32_t flags, heap_segment* seg, int align_const, int gen_number) { bool uoh_p = (gen_number > 0); GCSpinLock* msl = uoh_p ? &more_space_lock_uoh : &more_space_lock_soh; uint64_t& total_alloc_bytes = uoh_p ? total_alloc_bytes_uoh : total_alloc_bytes_soh; size_t aligned_min_obj_size = Align(min_obj_size, align_const); if (seg) { assert (heap_segment_used (seg) <= heap_segment_committed (seg)); } #ifdef MULTIPLE_HEAPS if (gen_number == 0) { if (!gen0_allocated_after_gc_p) { gen0_allocated_after_gc_p = true; } } #endif //MULTIPLE_HEAPS dprintf (3, ("Expanding segment allocation [%Ix, %Ix[", (size_t)start, (size_t)start + limit_size - aligned_min_obj_size)); if ((acontext->alloc_limit != start) && (acontext->alloc_limit + aligned_min_obj_size)!= start) { uint8_t* hole = acontext->alloc_ptr; if (hole != 0) { size_t ac_size = (acontext->alloc_limit - acontext->alloc_ptr); dprintf (3, ("filling up hole [%Ix, %Ix[", (size_t)hole, (size_t)hole + ac_size + aligned_min_obj_size)); // when we are finishing an allocation from a free list // we know that the free area was Align(min_obj_size) larger acontext->alloc_bytes -= ac_size; total_alloc_bytes -= ac_size; size_t free_obj_size = ac_size + aligned_min_obj_size; make_unused_array (hole, free_obj_size); generation_free_obj_space (generation_of (gen_number)) += free_obj_size; } acontext->alloc_ptr = start; } else { if (gen_number == 0) { #ifdef USE_REGIONS if (acontext->alloc_ptr == 0) { acontext->alloc_ptr = start; } else #endif //USE_REGIONS { size_t pad_size = aligned_min_obj_size; dprintf (3, ("contigous ac: making min obj gap %Ix->%Ix(%Id)", acontext->alloc_ptr, (acontext->alloc_ptr + pad_size), pad_size)); make_unused_array (acontext->alloc_ptr, pad_size); acontext->alloc_ptr += pad_size; } } } acontext->alloc_limit = (start + limit_size - aligned_min_obj_size); size_t added_bytes = limit_size - ((gen_number <= max_generation) ? aligned_min_obj_size : 0); acontext->alloc_bytes += added_bytes; total_alloc_bytes += added_bytes; size_t etw_allocation_amount = 0; bool fire_event_p = update_alloc_info (gen_number, added_bytes, &etw_allocation_amount); uint8_t* saved_used = 0; if (seg) { saved_used = heap_segment_used (seg); } if (seg == ephemeral_heap_segment) { //Sometimes the allocated size is advanced without clearing the //memory. Let's catch up here if (heap_segment_used (seg) < (alloc_allocated - plug_skew)) { heap_segment_used (seg) = alloc_allocated - plug_skew; assert (heap_segment_mem (seg) <= heap_segment_used (seg)); assert (heap_segment_used (seg) <= heap_segment_reserved (seg)); } } #ifdef BACKGROUND_GC else if (seg) { uint8_t* old_allocated = heap_segment_allocated (seg) - plug_skew - limit_size; #ifdef FEATURE_LOH_COMPACTION if (gen_number == loh_generation) { old_allocated -= Align (loh_padding_obj_size, align_const); } #endif //FEATURE_LOH_COMPACTION assert (heap_segment_used (seg) >= old_allocated); } #endif //BACKGROUND_GC // we are going to clear a right-edge exclusive span [clear_start, clear_limit) // but will adjust for cases when object is ok to stay dirty or the space has not seen any use yet // NB: the size and limit_size include syncblock, which is to the -1 of the object start // that effectively shifts the allocation by `plug_skew` uint8_t* clear_start = start - plug_skew; uint8_t* clear_limit = start + limit_size - plug_skew; if (flags & GC_ALLOC_ZEROING_OPTIONAL) { uint8_t* obj_start = acontext->alloc_ptr; assert(start >= obj_start); uint8_t* obj_end = obj_start + size - plug_skew; assert(obj_end >= clear_start); // if clearing at the object start, clear the syncblock. if(obj_start == start) { *(PTR_PTR)clear_start = 0; } // skip the rest of the object dprintf(3, ("zeroing optional: skipping object at %Ix->%Ix(%Id)", clear_start, obj_end, obj_end - clear_start)); clear_start = obj_end; } // check if space to clear is all dirty from prior use or only partially if ((seg == 0) || (clear_limit <= heap_segment_used (seg))) { add_saved_spinlock_info (uoh_p, me_release, mt_clr_mem); leave_spin_lock (msl); if (clear_start < clear_limit) { dprintf(3, ("clearing memory at %Ix for %d bytes", clear_start, clear_limit - clear_start)); memclr(clear_start, clear_limit - clear_start); } } else { // we only need to clear [clear_start, used) and only if clear_start < used uint8_t* used = heap_segment_used (seg); heap_segment_used (seg) = clear_limit; add_saved_spinlock_info (uoh_p, me_release, mt_clr_mem); leave_spin_lock (msl); if (clear_start < used) { if (used != saved_used) { FATAL_GC_ERROR(); } dprintf (2, ("clearing memory before used at %Ix for %Id bytes", clear_start, used - clear_start)); memclr (clear_start, used - clear_start); } } #ifdef FEATURE_EVENT_TRACE if (fire_event_p) { fire_etw_allocation_event (etw_allocation_amount, gen_number, acontext->alloc_ptr, size); } #endif //FEATURE_EVENT_TRACE //this portion can be done after we release the lock if (seg == ephemeral_heap_segment || ((seg == nullptr) && (gen_number == 0) && (limit_size >= CLR_SIZE / 2))) { if (gen0_must_clear_bricks > 0) { //set the brick table to speed up find_object size_t b = brick_of (acontext->alloc_ptr); set_brick (b, acontext->alloc_ptr - brick_address (b)); b++; dprintf (3, ("Allocation Clearing bricks [%Ix, %Ix[", b, brick_of (align_on_brick (start + limit_size)))); volatile short* x = &brick_table [b]; short* end_x = &brick_table [brick_of (align_on_brick (start + limit_size))]; for (;x < end_x;x++) *x = -1; } else { gen0_bricks_cleared = FALSE; } } // verifying the memory is completely cleared. //if (!(flags & GC_ALLOC_ZEROING_OPTIONAL)) //{ // verify_mem_cleared(start - plug_skew, limit_size); //} } size_t gc_heap::new_allocation_limit (size_t size, size_t physical_limit, int gen_number) { dynamic_data* dd = dynamic_data_of (gen_number); ptrdiff_t new_alloc = dd_new_allocation (dd); assert (new_alloc == (ptrdiff_t)Align (new_alloc, get_alignment_constant (gen_number < uoh_start_generation))); ptrdiff_t logical_limit = max (new_alloc, (ptrdiff_t)size); size_t limit = min (logical_limit, (ptrdiff_t)physical_limit); assert (limit == Align (limit, get_alignment_constant (gen_number <= max_generation))); return limit; } size_t gc_heap::limit_from_size (size_t size, uint32_t flags, size_t physical_limit, int gen_number, int align_const) { size_t padded_size = size + Align (min_obj_size, align_const); // for LOH this is not true...we could select a physical_limit that's exactly the same // as size. assert ((gen_number != 0) || (physical_limit >= padded_size)); // For SOH if the size asked for is very small, we want to allocate more than just what's asked for if possible. // Unless we were told not to clean, then we will not force it. size_t min_size_to_allocate = ((gen_number == 0 && !(flags & GC_ALLOC_ZEROING_OPTIONAL)) ? allocation_quantum : 0); size_t desired_size_to_allocate = max (padded_size, min_size_to_allocate); size_t new_physical_limit = min (physical_limit, desired_size_to_allocate); size_t new_limit = new_allocation_limit (padded_size, new_physical_limit, gen_number); assert (new_limit >= (size + Align (min_obj_size, align_const))); dprintf (3, ("h%d requested to allocate %Id bytes, actual size is %Id, phy limit: %Id", heap_number, size, new_limit, physical_limit)); return new_limit; } void gc_heap::add_to_oom_history_per_heap() { oom_history* current_hist = &oomhist_per_heap[oomhist_index_per_heap]; memcpy (current_hist, &oom_info, sizeof (oom_info)); oomhist_index_per_heap++; if (oomhist_index_per_heap == max_oom_history_count) { oomhist_index_per_heap = 0; } } void gc_heap::handle_oom (oom_reason reason, size_t alloc_size, uint8_t* allocated, uint8_t* reserved) { if (reason == oom_budget) { alloc_size = dd_min_size (dynamic_data_of (0)) / 2; } if ((reason == oom_budget) && ((!fgm_result.loh_p) && (fgm_result.fgm != fgm_no_failure))) { // This means during the last GC we needed to reserve and/or commit more memory // but we couldn't. We proceeded with the GC and ended up not having enough // memory at the end. This is a legitimate OOM situtation. Otherwise we // probably made a mistake and didn't expand the heap when we should have. reason = oom_low_mem; } oom_info.reason = reason; oom_info.allocated = allocated; oom_info.reserved = reserved; oom_info.alloc_size = alloc_size; oom_info.gc_index = settings.gc_index; oom_info.fgm = fgm_result.fgm; oom_info.size = fgm_result.size; oom_info.available_pagefile_mb = fgm_result.available_pagefile_mb; oom_info.loh_p = fgm_result.loh_p; add_to_oom_history_per_heap(); fgm_result.fgm = fgm_no_failure; // Break early - before the more_space_lock is release so no other threads // could have allocated on the same heap when OOM happened. if (GCConfig::GetBreakOnOOM()) { GCToOSInterface::DebugBreak(); } } #ifdef BACKGROUND_GC BOOL gc_heap::background_allowed_p() { return ( gc_can_use_concurrent && ((settings.pause_mode == pause_interactive) || (settings.pause_mode == pause_sustained_low_latency)) ); } #endif //BACKGROUND_GC void gc_heap::check_for_full_gc (int gen_num, size_t size) { BOOL should_notify = FALSE; // if we detect full gc because of the allocation budget specified this is TRUE; // it's FALSE if it's due to other factors. BOOL alloc_factor = TRUE; int n_initial = gen_num; BOOL local_blocking_collection = FALSE; BOOL local_elevation_requested = FALSE; int new_alloc_remain_percent = 0; if (full_gc_approach_event_set) { return; } if (gen_num < max_generation) { gen_num = max_generation; } dynamic_data* dd_full = dynamic_data_of (gen_num); ptrdiff_t new_alloc_remain = 0; uint32_t pct = (gen_num >= uoh_start_generation) ? fgn_loh_percent : fgn_maxgen_percent; for (int gen_index = 0; gen_index < total_generation_count; gen_index++) { dprintf (2, ("FGN: h#%d: gen%d: %Id(%Id)", heap_number, gen_index, dd_new_allocation (dynamic_data_of (gen_index)), dd_desired_allocation (dynamic_data_of (gen_index)))); } // For small object allocations we only check every fgn_check_quantum bytes. if (n_initial == 0) { dprintf (2, ("FGN: gen0 last recorded alloc: %Id", fgn_last_alloc)); dynamic_data* dd_0 = dynamic_data_of (n_initial); if (((fgn_last_alloc - dd_new_allocation (dd_0)) < fgn_check_quantum) && (dd_new_allocation (dd_0) >= 0)) { return; } else { fgn_last_alloc = dd_new_allocation (dd_0); dprintf (2, ("FGN: gen0 last recorded alloc is now: %Id", fgn_last_alloc)); } // We don't consider the size that came from soh 'cause it doesn't contribute to the // gen2 budget. size = 0; } int n = 0; for (int i = 1; i <= max_generation; i++) { if (get_new_allocation (i) <= 0) { n = i; } else break; } dprintf (2, ("FGN: h#%d: gen%d budget exceeded", heap_number, n)); if (gen_num == max_generation) { // If it's small object heap we should first see if we will even be looking at gen2 budget // in the next GC or not. If not we should go directly to checking other factors. if (n < (max_generation - 1)) { goto check_other_factors; } } new_alloc_remain = dd_new_allocation (dd_full) - size; new_alloc_remain_percent = (int)(((float)(new_alloc_remain) / (float)dd_desired_allocation (dd_full)) * 100); dprintf (2, ("FGN: alloc threshold for gen%d is %d%%, current threshold is %d%%", gen_num, pct, new_alloc_remain_percent)); if (new_alloc_remain_percent <= (int)pct) { #ifdef BACKGROUND_GC // If background GC is enabled, we still want to check whether this will // be a blocking GC or not because we only want to notify when it's a // blocking full GC. if (background_allowed_p()) { goto check_other_factors; } #endif //BACKGROUND_GC should_notify = TRUE; goto done; } check_other_factors: dprintf (2, ("FGC: checking other factors")); n = generation_to_condemn (n, &local_blocking_collection, &local_elevation_requested, TRUE); if (local_elevation_requested && (n == max_generation)) { if (settings.should_lock_elevation) { int local_elevation_locked_count = settings.elevation_locked_count + 1; if (local_elevation_locked_count != 6) { dprintf (2, ("FGN: lock count is %d - Condemning max_generation-1", local_elevation_locked_count)); n = max_generation - 1; } } } dprintf (2, ("FGN: we estimate gen%d will be collected", n)); #ifdef BACKGROUND_GC // When background GC is enabled it decreases the accuracy of our predictability - // by the time the GC happens, we may not be under BGC anymore. If we try to // predict often enough it should be ok. if ((n == max_generation) && (gc_heap::background_running_p())) { n = max_generation - 1; dprintf (2, ("FGN: bgc - 1 instead of 2")); } if ((n == max_generation) && !local_blocking_collection) { if (!background_allowed_p()) { local_blocking_collection = TRUE; } } #endif //BACKGROUND_GC dprintf (2, ("FGN: we estimate gen%d will be collected: %s", n, (local_blocking_collection ? "blocking" : "background"))); if ((n == max_generation) && local_blocking_collection) { alloc_factor = FALSE; should_notify = TRUE; goto done; } done: if (should_notify) { dprintf (2, ("FGN: gen%d detecting full GC approaching(%s) (GC#%d) (%Id%% left in gen%d)", n_initial, (alloc_factor ? "alloc" : "other"), dd_collection_count (dynamic_data_of (0)), new_alloc_remain_percent, gen_num)); send_full_gc_notification (n_initial, alloc_factor); } } void gc_heap::send_full_gc_notification (int gen_num, BOOL due_to_alloc_p) { if (!full_gc_approach_event_set) { assert (full_gc_approach_event.IsValid()); FIRE_EVENT(GCFullNotify_V1, gen_num, due_to_alloc_p); full_gc_end_event.Reset(); full_gc_approach_event.Set(); full_gc_approach_event_set = true; } } wait_full_gc_status gc_heap::full_gc_wait (GCEvent *event, int time_out_ms) { #ifdef MULTIPLE_HEAPS gc_heap* hp = gc_heap::g_heaps[0]; #else gc_heap* hp = pGenGCHeap; #endif //MULTIPLE_HEAPS if (hp->fgn_maxgen_percent == 0) { return wait_full_gc_na; } uint32_t wait_result = user_thread_wait(event, FALSE, time_out_ms); if ((wait_result == WAIT_OBJECT_0) || (wait_result == WAIT_TIMEOUT)) { if (hp->fgn_maxgen_percent == 0) { return wait_full_gc_cancelled; } if (wait_result == WAIT_OBJECT_0) { #ifdef BACKGROUND_GC if (fgn_last_gc_was_concurrent) { fgn_last_gc_was_concurrent = FALSE; return wait_full_gc_na; } else #endif //BACKGROUND_GC { return wait_full_gc_success; } } else { return wait_full_gc_timeout; } } else { return wait_full_gc_failed; } } size_t gc_heap::get_full_compact_gc_count() { return full_gc_counts[gc_type_compacting]; } // DTREVIEW - we should check this in dt_low_ephemeral_space_p // as well. inline BOOL gc_heap::short_on_end_of_seg (heap_segment* seg) { uint8_t* allocated = heap_segment_allocated (seg); #ifdef USE_REGIONS BOOL sufficient_p = sufficient_space_regions (end_gen0_region_space, end_space_after_gc()); #else BOOL sufficient_p = sufficient_space_end_seg (allocated, heap_segment_committed (seg), heap_segment_reserved (seg), end_space_after_gc()); #endif //USE_REGIONS if (!sufficient_p) { if (sufficient_gen0_space_p) { dprintf (GTC_LOG, ("gen0 has enough free space")); } sufficient_p = sufficient_gen0_space_p; } return !sufficient_p; } #ifdef _MSC_VER #pragma warning(disable:4706) // "assignment within conditional expression" is intentional in this function. #endif // _MSC_VER inline BOOL gc_heap::a_fit_free_list_p (int gen_number, size_t size, alloc_context* acontext, uint32_t flags, int align_const) { BOOL can_fit = FALSE; generation* gen = generation_of (gen_number); allocator* gen_allocator = generation_allocator (gen); for (unsigned int a_l_idx = gen_allocator->first_suitable_bucket(size); a_l_idx < gen_allocator->number_of_buckets(); a_l_idx++) { uint8_t* free_list = gen_allocator->alloc_list_head_of (a_l_idx); uint8_t* prev_free_item = 0; while (free_list != 0) { dprintf (3, ("considering free list %Ix", (size_t)free_list)); size_t free_list_size = unused_array_size (free_list); if ((size + Align (min_obj_size, align_const)) <= free_list_size) { dprintf (3, ("Found adequate unused area: [%Ix, size: %Id", (size_t)free_list, free_list_size)); gen_allocator->unlink_item (a_l_idx, free_list, prev_free_item, FALSE); // We ask for more Align (min_obj_size) // to make sure that we can insert a free object // in adjust_limit will set the limit lower size_t limit = limit_from_size (size, flags, free_list_size, gen_number, align_const); dd_new_allocation (dynamic_data_of (gen_number)) -= limit; uint8_t* remain = (free_list + limit); size_t remain_size = (free_list_size - limit); if (remain_size >= Align(min_free_list, align_const)) { make_unused_array (remain, remain_size); gen_allocator->thread_item_front (remain, remain_size); assert (remain_size >= Align (min_obj_size, align_const)); } else { //absorb the entire free list limit += remain_size; } generation_free_list_space (gen) -= limit; assert ((ptrdiff_t)generation_free_list_space (gen) >= 0); adjust_limit_clr (free_list, limit, size, acontext, flags, 0, align_const, gen_number); can_fit = TRUE; goto end; } else if (gen_allocator->discard_if_no_fit_p()) { assert (prev_free_item == 0); dprintf (3, ("couldn't use this free area, discarding")); generation_free_obj_space (gen) += free_list_size; gen_allocator->unlink_item (a_l_idx, free_list, prev_free_item, FALSE); generation_free_list_space (gen) -= free_list_size; assert ((ptrdiff_t)generation_free_list_space (gen) >= 0); } else { prev_free_item = free_list; } free_list = free_list_slot (free_list); } } end: return can_fit; } #ifdef BACKGROUND_GC void gc_heap::bgc_uoh_alloc_clr (uint8_t* alloc_start, size_t size, alloc_context* acontext, uint32_t flags, int gen_number, int align_const, int lock_index, BOOL check_used_p, heap_segment* seg) { make_unused_array (alloc_start, size); #ifdef DOUBLY_LINKED_FL clear_prev_bit (alloc_start, size); #endif //DOUBLY_LINKED_FL size_t size_of_array_base = sizeof(ArrayBase); bgc_alloc_lock->uoh_alloc_done_with_index (lock_index); // clear memory while not holding the lock. size_t size_to_skip = size_of_array_base; size_t size_to_clear = size - size_to_skip - plug_skew; size_t saved_size_to_clear = size_to_clear; if (check_used_p) { uint8_t* end = alloc_start + size - plug_skew; uint8_t* used = heap_segment_used (seg); if (used < end) { if ((alloc_start + size_to_skip) < used) { size_to_clear = used - (alloc_start + size_to_skip); } else { size_to_clear = 0; } dprintf (2, ("bgc uoh: setting used to %Ix", end)); heap_segment_used (seg) = end; } dprintf (2, ("bgc uoh: used: %Ix, alloc: %Ix, end of alloc: %Ix, clear %Id bytes", used, alloc_start, end, size_to_clear)); } else { dprintf (2, ("bgc uoh: [%Ix-[%Ix(%Id)", alloc_start, alloc_start+size, size)); } #ifdef VERIFY_HEAP // since we filled in 0xcc for free object when we verify heap, // we need to make sure we clear those bytes. if (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_GC) { if (size_to_clear < saved_size_to_clear) { size_to_clear = saved_size_to_clear; } } #endif //VERIFY_HEAP size_t allocated_size = size - Align (min_obj_size, align_const); total_alloc_bytes_uoh += allocated_size; size_t etw_allocation_amount = 0; bool fire_event_p = update_alloc_info (gen_number, allocated_size, &etw_allocation_amount); dprintf (SPINLOCK_LOG, ("[%d]Lmsl to clear uoh obj", heap_number)); add_saved_spinlock_info (true, me_release, mt_clr_large_mem); leave_spin_lock (&more_space_lock_uoh); #ifdef FEATURE_EVENT_TRACE if (fire_event_p) { fire_etw_allocation_event (etw_allocation_amount, gen_number, alloc_start, size); } #endif //FEATURE_EVENT_TRACE ((void**) alloc_start)[-1] = 0; //clear the sync block if (!(flags & GC_ALLOC_ZEROING_OPTIONAL)) { memclr(alloc_start + size_to_skip, size_to_clear); } bgc_alloc_lock->uoh_alloc_set (alloc_start); acontext->alloc_ptr = alloc_start; acontext->alloc_limit = (alloc_start + size - Align (min_obj_size, align_const)); // need to clear the rest of the object before we hand it out. clear_unused_array(alloc_start, size); } #endif //BACKGROUND_GC BOOL gc_heap::a_fit_free_list_uoh_p (size_t size, alloc_context* acontext, uint32_t flags, int align_const, int gen_number) { BOOL can_fit = FALSE; generation* gen = generation_of (gen_number); allocator* allocator = generation_allocator (gen); #ifdef FEATURE_LOH_COMPACTION size_t loh_pad = gen_number == loh_generation ? Align (loh_padding_obj_size, align_const) : 0; #endif //FEATURE_LOH_COMPACTION #ifdef BACKGROUND_GC int cookie = -1; #endif //BACKGROUND_GC for (unsigned int a_l_idx = allocator->first_suitable_bucket(size); a_l_idx < allocator->number_of_buckets(); a_l_idx++) { uint8_t* free_list = allocator->alloc_list_head_of (a_l_idx); uint8_t* prev_free_item = 0; while (free_list != 0) { dprintf (3, ("considering free list %Ix", (size_t)free_list)); size_t free_list_size = unused_array_size(free_list); ptrdiff_t diff = free_list_size - size; #ifdef FEATURE_LOH_COMPACTION diff -= loh_pad; #endif //FEATURE_LOH_COMPACTION // must fit exactly or leave formattable space if ((diff == 0) || (diff >= (ptrdiff_t)Align (min_obj_size, align_const))) { #ifdef BACKGROUND_GC cookie = bgc_alloc_lock->uoh_alloc_set (free_list); bgc_track_uoh_alloc(); #endif //BACKGROUND_GC allocator->unlink_item (a_l_idx, free_list, prev_free_item, FALSE); remove_gen_free (gen_number, free_list_size); // Substract min obj size because limit_from_size adds it. Not needed for LOH size_t limit = limit_from_size (size - Align(min_obj_size, align_const), flags, free_list_size, gen_number, align_const); dd_new_allocation (dynamic_data_of (gen_number)) -= limit; #ifdef FEATURE_LOH_COMPACTION if (loh_pad) { make_unused_array (free_list, loh_pad); generation_free_obj_space (gen) += loh_pad; limit -= loh_pad; free_list += loh_pad; free_list_size -= loh_pad; } #endif //FEATURE_LOH_COMPACTION uint8_t* remain = (free_list + limit); size_t remain_size = (free_list_size - limit); if (remain_size != 0) { assert (remain_size >= Align (min_obj_size, align_const)); make_unused_array (remain, remain_size); } if (remain_size >= Align(min_free_list, align_const)) { uoh_thread_gap_front (remain, remain_size, gen); add_gen_free (gen_number, remain_size); assert (remain_size >= Align (min_obj_size, align_const)); } else { generation_free_obj_space (gen) += remain_size; } generation_free_list_space (gen) -= free_list_size; assert ((ptrdiff_t)generation_free_list_space (gen) >= 0); generation_free_list_allocated (gen) += limit; dprintf (3, ("found fit on loh at %Ix", free_list)); #ifdef BACKGROUND_GC if (cookie != -1) { bgc_uoh_alloc_clr (free_list, limit, acontext, flags, gen_number, align_const, cookie, FALSE, 0); } else #endif //BACKGROUND_GC { adjust_limit_clr (free_list, limit, size, acontext, flags, 0, align_const, gen_number); } //fix the limit to compensate for adjust_limit_clr making it too short acontext->alloc_limit += Align (min_obj_size, align_const); can_fit = TRUE; goto exit; } prev_free_item = free_list; free_list = free_list_slot (free_list); } } exit: return can_fit; } #ifdef _MSC_VER #pragma warning(default:4706) #endif // _MSC_VER BOOL gc_heap::a_fit_segment_end_p (int gen_number, heap_segment* seg, size_t size, alloc_context* acontext, uint32_t flags, int align_const, BOOL* commit_failed_p) { *commit_failed_p = FALSE; size_t limit = 0; bool hard_limit_short_seg_end_p = false; #ifdef BACKGROUND_GC int cookie = -1; #endif //BACKGROUND_GC uint8_t*& allocated = ((gen_number == 0) ? alloc_allocated : heap_segment_allocated(seg)); size_t pad = Align (min_obj_size, align_const); #ifdef FEATURE_LOH_COMPACTION size_t loh_pad = Align (loh_padding_obj_size, align_const); if (gen_number == loh_generation) { pad += loh_pad; } #endif //FEATURE_LOH_COMPACTION uint8_t* end = heap_segment_committed (seg) - pad; if (a_size_fit_p (size, allocated, end, align_const)) { limit = limit_from_size (size, flags, (end - allocated), gen_number, align_const); goto found_fit; } end = heap_segment_reserved (seg) - pad; if ((heap_segment_reserved (seg) != heap_segment_committed (seg)) && (a_size_fit_p (size, allocated, end, align_const))) { limit = limit_from_size (size, flags, (end - allocated), gen_number, align_const); if (grow_heap_segment (seg, (allocated + limit), &hard_limit_short_seg_end_p)) { goto found_fit; } else { if (!hard_limit_short_seg_end_p) { dprintf (2, ("can't grow segment, doing a full gc")); *commit_failed_p = TRUE; } else { assert (heap_hard_limit); } } } goto found_no_fit; found_fit: dd_new_allocation (dynamic_data_of (gen_number)) -= limit; #ifdef BACKGROUND_GC if (gen_number != 0) { cookie = bgc_alloc_lock->uoh_alloc_set (allocated); bgc_track_uoh_alloc(); } #endif //BACKGROUND_GC #ifdef FEATURE_LOH_COMPACTION if (gen_number == loh_generation) { make_unused_array (allocated, loh_pad); generation_free_obj_space (generation_of (gen_number)) += loh_pad; allocated += loh_pad; limit -= loh_pad; } #endif //FEATURE_LOH_COMPACTION #if defined (VERIFY_HEAP) && defined (_DEBUG) // we are responsible for cleaning the syncblock and we will do it later // as a part of cleanup routine and when not holding the heap lock. // However, once we move "allocated" forward and if another thread initiate verification of // the previous object, it may consider the syncblock in the "next" eligible for validation. // (see also: object.cpp/Object::ValidateInner) // Make sure it will see cleaned up state to prevent triggering occasional verification failures. // And make sure the write happens before updating "allocated" VolatileStore(((void**)allocated - 1), (void*)0); //clear the sync block #endif //VERIFY_HEAP && _DEBUG uint8_t* old_alloc; old_alloc = allocated; dprintf (3, ("found fit at end of seg: %Ix", old_alloc)); #ifdef BACKGROUND_GC if (cookie != -1) { allocated += limit; bgc_uoh_alloc_clr (old_alloc, limit, acontext, flags, gen_number, align_const, cookie, TRUE, seg); } else #endif //BACKGROUND_GC { // In a contiguous AC case with GC_ALLOC_ZEROING_OPTIONAL, deduct unspent space from the limit to // clear only what is necessary. if ((flags & GC_ALLOC_ZEROING_OPTIONAL) && ((allocated == acontext->alloc_limit) || (allocated == (acontext->alloc_limit + Align (min_obj_size, align_const))))) { assert(gen_number == 0); assert(allocated > acontext->alloc_ptr); size_t extra = allocated - acontext->alloc_ptr; limit -= extra; // Since we are not consuming all the memory we already deducted from the budget, // we should put the extra back. dynamic_data* dd = dynamic_data_of (0); dd_new_allocation (dd) += extra; // add space for an AC continuity divider limit += Align(min_obj_size, align_const); } allocated += limit; adjust_limit_clr (old_alloc, limit, size, acontext, flags, seg, align_const, gen_number); } return TRUE; found_no_fit: return FALSE; } BOOL gc_heap::uoh_a_fit_segment_end_p (int gen_number, size_t size, alloc_context* acontext, uint32_t flags, int align_const, BOOL* commit_failed_p, oom_reason* oom_r) { *commit_failed_p = FALSE; generation* gen = generation_of (gen_number); heap_segment* seg = generation_allocation_segment (gen); BOOL can_allocate_p = FALSE; while (seg) { #ifdef BACKGROUND_GC if (seg->flags & heap_segment_flags_uoh_delete) { dprintf (3, ("h%d skipping seg %Ix to be deleted", heap_number, (size_t)seg)); } else #endif //BACKGROUND_GC { if (a_fit_segment_end_p (gen_number, seg, (size - Align (min_obj_size, align_const)), acontext, flags, align_const, commit_failed_p)) { acontext->alloc_limit += Align (min_obj_size, align_const); can_allocate_p = TRUE; break; } if (*commit_failed_p) { *oom_r = oom_cant_commit; break; } } seg = heap_segment_next_rw (seg); } if (can_allocate_p) { generation_end_seg_allocated (gen) += size; } return can_allocate_p; } #ifdef BACKGROUND_GC inline void gc_heap::wait_for_background (alloc_wait_reason awr, bool loh_p) { GCSpinLock* msl = loh_p ? &more_space_lock_uoh : &more_space_lock_soh; dprintf (2, ("BGC is already in progress, waiting for it to finish")); add_saved_spinlock_info (loh_p, me_release, mt_wait_bgc); leave_spin_lock (msl); background_gc_wait (awr); enter_spin_lock (msl); add_saved_spinlock_info (loh_p, me_acquire, mt_wait_bgc); } void gc_heap::wait_for_bgc_high_memory (alloc_wait_reason awr, bool loh_p) { if (gc_heap::background_running_p()) { uint32_t memory_load; get_memory_info (&memory_load); if (memory_load >= m_high_memory_load_th) { dprintf (GTC_LOG, ("high mem - wait for BGC to finish, wait reason: %d", awr)); wait_for_background (awr, loh_p); } } } #endif //BACKGROUND_GC // We request to trigger an ephemeral GC but we may get a full compacting GC. // return TRUE if that's the case. BOOL gc_heap::trigger_ephemeral_gc (gc_reason gr) { #ifdef BACKGROUND_GC wait_for_bgc_high_memory (awr_loh_oos_bgc, false); #endif //BACKGROUND_GC BOOL did_full_compact_gc = FALSE; dprintf (1, ("h%d triggering a gen1 GC", heap_number)); size_t last_full_compact_gc_count = get_full_compact_gc_count(); vm_heap->GarbageCollectGeneration(max_generation - 1, gr); #ifdef MULTIPLE_HEAPS enter_spin_lock (&more_space_lock_soh); add_saved_spinlock_info (false, me_acquire, mt_t_eph_gc); #endif //MULTIPLE_HEAPS size_t current_full_compact_gc_count = get_full_compact_gc_count(); if (current_full_compact_gc_count > last_full_compact_gc_count) { dprintf (2, ("attempted to trigger an ephemeral GC and got a full compacting GC")); did_full_compact_gc = TRUE; } return did_full_compact_gc; } BOOL gc_heap::soh_try_fit (int gen_number, size_t size, alloc_context* acontext, uint32_t flags, int align_const, BOOL* commit_failed_p, BOOL* short_seg_end_p) { BOOL can_allocate = TRUE; if (short_seg_end_p) { *short_seg_end_p = FALSE; } can_allocate = a_fit_free_list_p (gen_number, size, acontext, flags, align_const); if (!can_allocate) { if (short_seg_end_p) { *short_seg_end_p = short_on_end_of_seg (ephemeral_heap_segment); } // If the caller doesn't care, we always try to fit at the end of seg; // otherwise we would only try if we are actually not short at end of seg. if (!short_seg_end_p || !(*short_seg_end_p)) { #ifdef USE_REGIONS while (ephemeral_heap_segment) #endif //USE_REGIONS { can_allocate = a_fit_segment_end_p (gen_number, ephemeral_heap_segment, size, acontext, flags, align_const, commit_failed_p); #ifdef USE_REGIONS if (can_allocate) { break; } dprintf (REGIONS_LOG, ("h%d fixing region %Ix end to alloc ptr: %Ix, alloc_allocated %Ix", heap_number, heap_segment_mem (ephemeral_heap_segment), acontext->alloc_ptr, alloc_allocated)); fix_allocation_context (acontext, TRUE, FALSE); fix_youngest_allocation_area(); heap_segment* next_seg = heap_segment_next (ephemeral_heap_segment); bool new_seg = false; if (!next_seg) { assert (ephemeral_heap_segment == generation_tail_region (generation_of (gen_number))); next_seg = get_new_region (gen_number); new_seg = true; } if (next_seg) { dprintf (REGIONS_LOG, ("eph seg %Ix -> next %Ix", heap_segment_mem (ephemeral_heap_segment), heap_segment_mem (next_seg))); ephemeral_heap_segment = next_seg; if (new_seg) { GCToEEInterface::DiagAddNewRegion( heap_segment_gen_num (next_seg), heap_segment_mem (next_seg), heap_segment_allocated (next_seg), heap_segment_reserved (next_seg) ); } } else { *commit_failed_p = TRUE; dprintf (REGIONS_LOG, ("couldn't get a new ephemeral region")); return FALSE; } alloc_allocated = heap_segment_allocated (ephemeral_heap_segment); dprintf (REGIONS_LOG, ("h%d alloc_allocated is now %Ix", heap_number, alloc_allocated)); #endif //USE_REGIONS } } } return can_allocate; } allocation_state gc_heap::allocate_soh (int gen_number, size_t size, alloc_context* acontext, uint32_t flags, int align_const) { #if defined (BACKGROUND_GC) && !defined (MULTIPLE_HEAPS) if (gc_heap::background_running_p()) { background_soh_alloc_count++; if ((background_soh_alloc_count % bgc_alloc_spin_count) == 0) { add_saved_spinlock_info (false, me_release, mt_alloc_small); leave_spin_lock (&more_space_lock_soh); bool cooperative_mode = enable_preemptive(); GCToOSInterface::Sleep (bgc_alloc_spin); disable_preemptive (cooperative_mode); enter_spin_lock (&more_space_lock_soh); add_saved_spinlock_info (false, me_acquire, mt_alloc_small); } else { //GCToOSInterface::YieldThread (0); } } #endif //BACKGROUND_GC && !MULTIPLE_HEAPS gc_reason gr = reason_oos_soh; oom_reason oom_r = oom_no_failure; // No variable values should be "carried over" from one state to the other. // That's why there are local variable for each state allocation_state soh_alloc_state = a_state_start; // If we can get a new seg it means allocation will succeed. while (1) { dprintf (3, ("[h%d]soh state is %s", heap_number, allocation_state_str[soh_alloc_state])); switch (soh_alloc_state) { case a_state_can_allocate: case a_state_cant_allocate: { goto exit; } case a_state_start: { soh_alloc_state = a_state_try_fit; break; } case a_state_try_fit: { BOOL commit_failed_p = FALSE; BOOL can_use_existing_p = FALSE; can_use_existing_p = soh_try_fit (gen_number, size, acontext, flags, align_const, &commit_failed_p, NULL); soh_alloc_state = (can_use_existing_p ? a_state_can_allocate : (commit_failed_p ? a_state_trigger_full_compact_gc : a_state_trigger_ephemeral_gc)); break; } case a_state_try_fit_after_bgc: { BOOL commit_failed_p = FALSE; BOOL can_use_existing_p = FALSE; BOOL short_seg_end_p = FALSE; can_use_existing_p = soh_try_fit (gen_number, size, acontext, flags, align_const, &commit_failed_p, &short_seg_end_p); soh_alloc_state = (can_use_existing_p ? a_state_can_allocate : (short_seg_end_p ? a_state_trigger_2nd_ephemeral_gc : a_state_trigger_full_compact_gc)); break; } case a_state_try_fit_after_cg: { BOOL commit_failed_p = FALSE; BOOL can_use_existing_p = FALSE; BOOL short_seg_end_p = FALSE; can_use_existing_p = soh_try_fit (gen_number, size, acontext, flags, align_const, &commit_failed_p, &short_seg_end_p); if (can_use_existing_p) { soh_alloc_state = a_state_can_allocate; } #ifdef MULTIPLE_HEAPS else if (gen0_allocated_after_gc_p) { // some other threads already grabbed the more space lock and allocated // so we should attempt an ephemeral GC again. soh_alloc_state = a_state_trigger_ephemeral_gc; } #endif //MULTIPLE_HEAPS else if (short_seg_end_p) { soh_alloc_state = a_state_cant_allocate; oom_r = oom_budget; } else { assert (commit_failed_p || heap_hard_limit); soh_alloc_state = a_state_cant_allocate; oom_r = oom_cant_commit; } break; } case a_state_check_and_wait_for_bgc: { BOOL bgc_in_progress_p = FALSE; BOOL did_full_compacting_gc = FALSE; bgc_in_progress_p = check_and_wait_for_bgc (awr_gen0_oos_bgc, &did_full_compacting_gc, false); soh_alloc_state = (did_full_compacting_gc ? a_state_try_fit_after_cg : a_state_try_fit_after_bgc); break; } case a_state_trigger_ephemeral_gc: { BOOL commit_failed_p = FALSE; BOOL can_use_existing_p = FALSE; BOOL short_seg_end_p = FALSE; BOOL bgc_in_progress_p = FALSE; BOOL did_full_compacting_gc = FALSE; did_full_compacting_gc = trigger_ephemeral_gc (gr); if (did_full_compacting_gc) { soh_alloc_state = a_state_try_fit_after_cg; } else { can_use_existing_p = soh_try_fit (gen_number, size, acontext, flags, align_const, &commit_failed_p, &short_seg_end_p); #ifdef BACKGROUND_GC bgc_in_progress_p = gc_heap::background_running_p(); #endif //BACKGROUND_GC if (can_use_existing_p) { soh_alloc_state = a_state_can_allocate; } else { if (short_seg_end_p) { if (should_expand_in_full_gc) { dprintf (2, ("gen1 GC wanted to expand!")); soh_alloc_state = a_state_trigger_full_compact_gc; } else { soh_alloc_state = (bgc_in_progress_p ? a_state_check_and_wait_for_bgc : a_state_trigger_full_compact_gc); } } else if (commit_failed_p) { soh_alloc_state = a_state_trigger_full_compact_gc; } else { #ifdef MULTIPLE_HEAPS // some other threads already grabbed the more space lock and allocated // so we should attempt an ephemeral GC again. assert (gen0_allocated_after_gc_p); soh_alloc_state = a_state_trigger_ephemeral_gc; #else //MULTIPLE_HEAPS assert (!"shouldn't get here"); #endif //MULTIPLE_HEAPS } } } break; } case a_state_trigger_2nd_ephemeral_gc: { BOOL commit_failed_p = FALSE; BOOL can_use_existing_p = FALSE; BOOL short_seg_end_p = FALSE; BOOL did_full_compacting_gc = FALSE; did_full_compacting_gc = trigger_ephemeral_gc (gr); if (did_full_compacting_gc) { soh_alloc_state = a_state_try_fit_after_cg; } else { can_use_existing_p = soh_try_fit (gen_number, size, acontext, flags, align_const, &commit_failed_p, &short_seg_end_p); if (short_seg_end_p || commit_failed_p) { soh_alloc_state = a_state_trigger_full_compact_gc; } else { assert (can_use_existing_p); soh_alloc_state = a_state_can_allocate; } } break; } case a_state_trigger_full_compact_gc: { if (fgn_maxgen_percent) { dprintf (2, ("FGN: SOH doing last GC before we throw OOM")); send_full_gc_notification (max_generation, FALSE); } BOOL got_full_compacting_gc = FALSE; got_full_compacting_gc = trigger_full_compact_gc (gr, &oom_r, false); soh_alloc_state = (got_full_compacting_gc ? a_state_try_fit_after_cg : a_state_cant_allocate); break; } default: { assert (!"Invalid state!"); break; } } } exit: if (soh_alloc_state == a_state_cant_allocate) { assert (oom_r != oom_no_failure); handle_oom (oom_r, size, heap_segment_allocated (ephemeral_heap_segment), heap_segment_reserved (ephemeral_heap_segment)); add_saved_spinlock_info (false, me_release, mt_alloc_small_cant); leave_spin_lock (&more_space_lock_soh); } assert ((soh_alloc_state == a_state_can_allocate) || (soh_alloc_state == a_state_cant_allocate) || (soh_alloc_state == a_state_retry_allocate)); return soh_alloc_state; } #ifdef BACKGROUND_GC inline void gc_heap::bgc_track_uoh_alloc() { if (current_c_gc_state == c_gc_state_planning) { Interlocked::Increment (&uoh_alloc_thread_count); dprintf (3, ("h%d: inc lc: %d", heap_number, (int32_t)uoh_alloc_thread_count)); } } inline void gc_heap::bgc_untrack_uoh_alloc() { if (current_c_gc_state == c_gc_state_planning) { Interlocked::Decrement (&uoh_alloc_thread_count); dprintf (3, ("h%d: dec lc: %d", heap_number, (int32_t)uoh_alloc_thread_count)); } } int bgc_allocate_spin(size_t min_gc_size, size_t bgc_begin_size, size_t bgc_size_increased, size_t end_size) { if ((bgc_begin_size + bgc_size_increased) < (min_gc_size * 10)) { // just do it, no spinning return 0; } if ((bgc_begin_size >= (2 * end_size)) || (bgc_size_increased >= bgc_begin_size)) { if (bgc_begin_size >= (2 * end_size)) { dprintf (3, ("alloc-ed too much before bgc started")); } else { dprintf (3, ("alloc-ed too much after bgc started")); } // -1 means wait for bgc return -1; } else { return (int)(((float)bgc_size_increased / (float)bgc_begin_size) * 10); } } int gc_heap::bgc_loh_allocate_spin() { size_t min_gc_size = dd_min_size (dynamic_data_of (loh_generation)); size_t bgc_begin_size = bgc_begin_loh_size; size_t bgc_size_increased = bgc_loh_size_increased; size_t end_size = end_loh_size; return bgc_allocate_spin(min_gc_size, bgc_begin_size, bgc_size_increased, end_size); } int gc_heap::bgc_poh_allocate_spin() { size_t min_gc_size = dd_min_size (dynamic_data_of (poh_generation)); size_t bgc_begin_size = bgc_begin_poh_size; size_t bgc_size_increased = bgc_poh_size_increased; size_t end_size = end_poh_size; return bgc_allocate_spin(min_gc_size, bgc_begin_size, bgc_size_increased, end_size); } #endif //BACKGROUND_GC size_t gc_heap::get_uoh_seg_size (size_t size) { size_t default_seg_size = #ifdef USE_REGIONS global_region_allocator.get_large_region_alignment(); #else min_uoh_segment_size; #endif //USE_REGIONS size_t align_size = default_seg_size; int align_const = get_alignment_constant (FALSE); size_t large_seg_size = align_on_page ( max (default_seg_size, ((size + 2 * Align(min_obj_size, align_const) + OS_PAGE_SIZE + align_size) / align_size * align_size))); return large_seg_size; } BOOL gc_heap::uoh_get_new_seg (int gen_number, size_t size, BOOL* did_full_compact_gc, oom_reason* oom_r) { *did_full_compact_gc = FALSE; size_t seg_size = get_uoh_seg_size (size); heap_segment* new_seg = get_uoh_segment (gen_number, seg_size, did_full_compact_gc); if (new_seg && (gen_number == loh_generation)) { loh_alloc_since_cg += seg_size; } else { *oom_r = oom_loh; } return (new_seg != 0); } // PERF TODO: this is too aggressive; and in hard limit we should // count the actual allocated bytes instead of only updating it during // getting a new seg. BOOL gc_heap::retry_full_compact_gc (size_t size) { size_t seg_size = get_uoh_seg_size (size); if (loh_alloc_since_cg >= (2 * (uint64_t)seg_size)) { return TRUE; } #ifdef MULTIPLE_HEAPS uint64_t total_alloc_size = 0; for (int i = 0; i < n_heaps; i++) { total_alloc_size += g_heaps[i]->loh_alloc_since_cg; } if (total_alloc_size >= (2 * (uint64_t)seg_size)) { return TRUE; } #endif //MULTIPLE_HEAPS return FALSE; } BOOL gc_heap::check_and_wait_for_bgc (alloc_wait_reason awr, BOOL* did_full_compact_gc, bool loh_p) { BOOL bgc_in_progress = FALSE; *did_full_compact_gc = FALSE; #ifdef BACKGROUND_GC if (gc_heap::background_running_p()) { bgc_in_progress = TRUE; size_t last_full_compact_gc_count = get_full_compact_gc_count(); wait_for_background (awr, loh_p); size_t current_full_compact_gc_count = get_full_compact_gc_count(); if (current_full_compact_gc_count > last_full_compact_gc_count) { *did_full_compact_gc = TRUE; } } #endif //BACKGROUND_GC return bgc_in_progress; } BOOL gc_heap::uoh_try_fit (int gen_number, size_t size, alloc_context* acontext, uint32_t flags, int align_const, BOOL* commit_failed_p, oom_reason* oom_r) { BOOL can_allocate = TRUE; if (!a_fit_free_list_uoh_p (size, acontext, flags, align_const, gen_number)) { can_allocate = uoh_a_fit_segment_end_p (gen_number, size, acontext, flags, align_const, commit_failed_p, oom_r); #ifdef BACKGROUND_GC if (can_allocate && gc_heap::background_running_p()) { if (gen_number == poh_generation) { bgc_poh_size_increased += size; } else { bgc_loh_size_increased += size; } } #endif //BACKGROUND_GC } return can_allocate; } BOOL gc_heap::trigger_full_compact_gc (gc_reason gr, oom_reason* oom_r, bool loh_p) { BOOL did_full_compact_gc = FALSE; size_t last_full_compact_gc_count = get_full_compact_gc_count(); // Set this so the next GC will be a full compacting GC. if (!last_gc_before_oom) { last_gc_before_oom = TRUE; } #ifdef BACKGROUND_GC if (gc_heap::background_running_p()) { wait_for_background (((gr == reason_oos_soh) ? awr_gen0_oos_bgc : awr_loh_oos_bgc), loh_p); dprintf (2, ("waited for BGC - done")); } #endif //BACKGROUND_GC GCSpinLock* msl = loh_p ? &more_space_lock_uoh : &more_space_lock_soh; size_t current_full_compact_gc_count = get_full_compact_gc_count(); if (current_full_compact_gc_count > last_full_compact_gc_count) { dprintf (3, ("a full compacting GC triggered while waiting for BGC (%d->%d)", last_full_compact_gc_count, current_full_compact_gc_count)); assert (current_full_compact_gc_count > last_full_compact_gc_count); did_full_compact_gc = TRUE; goto exit; } dprintf (3, ("h%d full GC", heap_number)); trigger_gc_for_alloc (max_generation, gr, msl, loh_p, mt_t_full_gc); current_full_compact_gc_count = get_full_compact_gc_count(); if (current_full_compact_gc_count == last_full_compact_gc_count) { dprintf (2, ("attempted to trigger a full compacting GC but didn't get it")); // We requested a full GC but didn't get because of the elevation logic // which means we should fail. *oom_r = oom_unproductive_full_gc; } else { dprintf (3, ("h%d: T full compacting GC (%d->%d)", heap_number, last_full_compact_gc_count, current_full_compact_gc_count)); assert (current_full_compact_gc_count > last_full_compact_gc_count); did_full_compact_gc = TRUE; } exit: return did_full_compact_gc; } #ifdef RECORD_LOH_STATE void gc_heap::add_saved_loh_state (allocation_state loh_state_to_save, EEThreadId thread_id) { // When the state is can_allocate we already have released the more // space lock. So we are not logging states here since this code // is not thread safe. if (loh_state_to_save != a_state_can_allocate) { last_loh_states[loh_state_index].alloc_state = loh_state_to_save; last_loh_states[loh_state_index].thread_id = thread_id; loh_state_index++; if (loh_state_index == max_saved_loh_states) { loh_state_index = 0; } assert (loh_state_index < max_saved_loh_states); } } #endif //RECORD_LOH_STATE bool gc_heap::should_retry_other_heap (int gen_number, size_t size) { #ifdef MULTIPLE_HEAPS if (heap_hard_limit) { size_t min_size = dd_min_size (g_heaps[0]->dynamic_data_of (gen_number)); size_t slack_space = max (commit_min_th, min_size); bool retry_p = ((current_total_committed + size) < (heap_hard_limit - slack_space)); dprintf (1, ("%Id - %Id - total committed %Id - size %Id = %Id, %s", heap_hard_limit, slack_space, current_total_committed, size, (heap_hard_limit - slack_space - current_total_committed - size), (retry_p ? "retry" : "no retry"))); return retry_p; } else #endif //MULTIPLE_HEAPS { return false; } } allocation_state gc_heap::allocate_uoh (int gen_number, size_t size, alloc_context* acontext, uint32_t flags, int align_const) { #ifdef BACKGROUND_GC if (gc_heap::background_running_p()) { #ifdef BGC_SERVO_TUNING bool planning_p = (current_c_gc_state == c_gc_state_planning); #endif //BGC_SERVO_TUNING background_uoh_alloc_count++; //if ((background_loh_alloc_count % bgc_alloc_spin_count_loh) == 0) { #ifdef BGC_SERVO_TUNING if (planning_p) { loh_a_bgc_planning += size; } else { loh_a_bgc_marking += size; } #endif //BGC_SERVO_TUNING int spin_for_allocation = (gen_number == loh_generation) ? bgc_loh_allocate_spin() : bgc_poh_allocate_spin(); if (spin_for_allocation > 0) { add_saved_spinlock_info (true, me_release, mt_alloc_large); leave_spin_lock (&more_space_lock_uoh); bool cooperative_mode = enable_preemptive(); GCToOSInterface::YieldThread (spin_for_allocation); disable_preemptive (cooperative_mode); enter_spin_lock (&more_space_lock_uoh); add_saved_spinlock_info (true, me_acquire, mt_alloc_large); dprintf (SPINLOCK_LOG, ("[%d]spin Emsl uoh", heap_number)); } else if (spin_for_allocation < 0) { wait_for_background (awr_uoh_alloc_during_bgc, true); } } } #ifdef BGC_SERVO_TUNING else { loh_a_no_bgc += size; } #endif //BGC_SERVO_TUNING #endif //BACKGROUND_GC gc_reason gr = reason_oos_loh; generation* gen = generation_of (gen_number); oom_reason oom_r = oom_no_failure; size_t current_full_compact_gc_count = 0; // No variable values should be "carried over" from one state to the other. // That's why there are local variable for each state allocation_state uoh_alloc_state = a_state_start; #ifdef RECORD_LOH_STATE EEThreadId current_thread_id; current_thread_id.SetToCurrentThread(); #endif //RECORD_LOH_STATE // If we can get a new seg it means allocation will succeed. while (1) { dprintf (3, ("[h%d]loh state is %s", heap_number, allocation_state_str[uoh_alloc_state])); #ifdef RECORD_LOH_STATE add_saved_loh_state (loh_uoh_alloc_state, current_thread_id); #endif //RECORD_LOH_STATE switch (uoh_alloc_state) { case a_state_can_allocate: case a_state_cant_allocate: { goto exit; } case a_state_start: { uoh_alloc_state = a_state_try_fit; break; } case a_state_try_fit: { BOOL commit_failed_p = FALSE; BOOL can_use_existing_p = FALSE; can_use_existing_p = uoh_try_fit (gen_number, size, acontext, flags, align_const, &commit_failed_p, &oom_r); uoh_alloc_state = (can_use_existing_p ? a_state_can_allocate : (commit_failed_p ? a_state_trigger_full_compact_gc : a_state_acquire_seg)); assert ((uoh_alloc_state == a_state_can_allocate) == (acontext->alloc_ptr != 0)); break; } case a_state_try_fit_new_seg: { BOOL commit_failed_p = FALSE; BOOL can_use_existing_p = FALSE; can_use_existing_p = uoh_try_fit (gen_number, size, acontext, flags, align_const, &commit_failed_p, &oom_r); // Even after we got a new seg it doesn't necessarily mean we can allocate, // another LOH allocating thread could have beat us to acquire the msl so // we need to try again. uoh_alloc_state = (can_use_existing_p ? a_state_can_allocate : a_state_try_fit); assert ((uoh_alloc_state == a_state_can_allocate) == (acontext->alloc_ptr != 0)); break; } case a_state_try_fit_after_cg: { BOOL commit_failed_p = FALSE; BOOL can_use_existing_p = FALSE; can_use_existing_p = uoh_try_fit (gen_number, size, acontext, flags, align_const, &commit_failed_p, &oom_r); // If we failed to commit, we bail right away 'cause we already did a // full compacting GC. uoh_alloc_state = (can_use_existing_p ? a_state_can_allocate : (commit_failed_p ? a_state_cant_allocate : a_state_acquire_seg_after_cg)); assert ((uoh_alloc_state == a_state_can_allocate) == (acontext->alloc_ptr != 0)); break; } case a_state_try_fit_after_bgc: { BOOL commit_failed_p = FALSE; BOOL can_use_existing_p = FALSE; can_use_existing_p = uoh_try_fit (gen_number, size, acontext, flags, align_const, &commit_failed_p, &oom_r); uoh_alloc_state = (can_use_existing_p ? a_state_can_allocate : (commit_failed_p ? a_state_trigger_full_compact_gc : a_state_acquire_seg_after_bgc)); assert ((uoh_alloc_state == a_state_can_allocate) == (acontext->alloc_ptr != 0)); break; } case a_state_acquire_seg: { BOOL can_get_new_seg_p = FALSE; BOOL did_full_compacting_gc = FALSE; current_full_compact_gc_count = get_full_compact_gc_count(); can_get_new_seg_p = uoh_get_new_seg (gen_number, size, &did_full_compacting_gc, &oom_r); uoh_alloc_state = (can_get_new_seg_p ? a_state_try_fit_new_seg : (did_full_compacting_gc ? a_state_check_retry_seg : a_state_check_and_wait_for_bgc)); break; } case a_state_acquire_seg_after_cg: { BOOL can_get_new_seg_p = FALSE; BOOL did_full_compacting_gc = FALSE; current_full_compact_gc_count = get_full_compact_gc_count(); can_get_new_seg_p = uoh_get_new_seg (gen_number, size, &did_full_compacting_gc, &oom_r); // Since we release the msl before we try to allocate a seg, other // threads could have allocated a bunch of segments before us so // we might need to retry. uoh_alloc_state = (can_get_new_seg_p ? a_state_try_fit_after_cg : a_state_check_retry_seg); break; } case a_state_acquire_seg_after_bgc: { BOOL can_get_new_seg_p = FALSE; BOOL did_full_compacting_gc = FALSE; current_full_compact_gc_count = get_full_compact_gc_count(); can_get_new_seg_p = uoh_get_new_seg (gen_number, size, &did_full_compacting_gc, &oom_r); uoh_alloc_state = (can_get_new_seg_p ? a_state_try_fit_new_seg : (did_full_compacting_gc ? a_state_check_retry_seg : a_state_trigger_full_compact_gc)); assert ((uoh_alloc_state != a_state_cant_allocate) || (oom_r != oom_no_failure)); break; } case a_state_check_and_wait_for_bgc: { BOOL bgc_in_progress_p = FALSE; BOOL did_full_compacting_gc = FALSE; bgc_in_progress_p = check_and_wait_for_bgc (awr_loh_oos_bgc, &did_full_compacting_gc, true); uoh_alloc_state = (!bgc_in_progress_p ? a_state_trigger_full_compact_gc : (did_full_compacting_gc ? a_state_try_fit_after_cg : a_state_try_fit_after_bgc)); break; } case a_state_trigger_full_compact_gc: { if (fgn_maxgen_percent) { dprintf (2, ("FGN: LOH doing last GC before we throw OOM")); send_full_gc_notification (max_generation, FALSE); } BOOL got_full_compacting_gc = FALSE; got_full_compacting_gc = trigger_full_compact_gc (gr, &oom_r, true); uoh_alloc_state = (got_full_compacting_gc ? a_state_try_fit_after_cg : a_state_cant_allocate); assert ((uoh_alloc_state != a_state_cant_allocate) || (oom_r != oom_no_failure)); break; } case a_state_check_retry_seg: { BOOL should_retry_gc = retry_full_compact_gc (size); BOOL should_retry_get_seg = FALSE; if (!should_retry_gc) { size_t last_full_compact_gc_count = current_full_compact_gc_count; current_full_compact_gc_count = get_full_compact_gc_count(); if (current_full_compact_gc_count > last_full_compact_gc_count) { should_retry_get_seg = TRUE; } } uoh_alloc_state = (should_retry_gc ? a_state_trigger_full_compact_gc : (should_retry_get_seg ? a_state_try_fit_after_cg : a_state_cant_allocate)); assert ((uoh_alloc_state != a_state_cant_allocate) || (oom_r != oom_no_failure)); break; } default: { assert (!"Invalid state!"); break; } } } exit: if (uoh_alloc_state == a_state_cant_allocate) { assert (oom_r != oom_no_failure); if ((oom_r != oom_cant_commit) && should_retry_other_heap (gen_number, size)) { uoh_alloc_state = a_state_retry_allocate; } else { handle_oom (oom_r, size, 0, 0); } add_saved_spinlock_info (true, me_release, mt_alloc_large_cant); leave_spin_lock (&more_space_lock_uoh); } assert ((uoh_alloc_state == a_state_can_allocate) || (uoh_alloc_state == a_state_cant_allocate) || (uoh_alloc_state == a_state_retry_allocate)); return uoh_alloc_state; } // BGC's final mark phase will acquire the msl, so release it here and re-acquire. void gc_heap::trigger_gc_for_alloc (int gen_number, gc_reason gr, GCSpinLock* msl, bool loh_p, msl_take_state take_state) { #ifdef BACKGROUND_GC if (loh_p) { add_saved_spinlock_info (loh_p, me_release, take_state); leave_spin_lock (msl); } #endif //BACKGROUND_GC vm_heap->GarbageCollectGeneration (gen_number, gr); #ifdef MULTIPLE_HEAPS if (!loh_p) { enter_spin_lock (msl); add_saved_spinlock_info (loh_p, me_acquire, take_state); } #endif //MULTIPLE_HEAPS #ifdef BACKGROUND_GC if (loh_p) { enter_spin_lock (msl); add_saved_spinlock_info (loh_p, me_acquire, take_state); } #endif //BACKGROUND_GC } inline bool gc_heap::update_alloc_info (int gen_number, size_t allocated_size, size_t* etw_allocation_amount) { bool exceeded_p = false; int oh_index = gen_to_oh (gen_number); allocated_since_last_gc[oh_index] += allocated_size; size_t& etw_allocated = etw_allocation_running_amount[oh_index]; etw_allocated += allocated_size; if (etw_allocated > etw_allocation_tick) { *etw_allocation_amount = etw_allocated; exceeded_p = true; etw_allocated = 0; } return exceeded_p; } allocation_state gc_heap::try_allocate_more_space (alloc_context* acontext, size_t size, uint32_t flags, int gen_number) { if (gc_heap::gc_started) { wait_for_gc_done(); return a_state_retry_allocate; } bool loh_p = (gen_number > 0); GCSpinLock* msl = loh_p ? &more_space_lock_uoh : &more_space_lock_soh; #ifdef SYNCHRONIZATION_STATS int64_t msl_acquire_start = GCToOSInterface::QueryPerformanceCounter(); #endif //SYNCHRONIZATION_STATS enter_spin_lock (msl); add_saved_spinlock_info (loh_p, me_acquire, mt_try_alloc); dprintf (SPINLOCK_LOG, ("[%d]Emsl for alloc", heap_number)); #ifdef SYNCHRONIZATION_STATS int64_t msl_acquire = GCToOSInterface::QueryPerformanceCounter() - msl_acquire_start; total_msl_acquire += msl_acquire; num_msl_acquired++; if (msl_acquire > 200) { num_high_msl_acquire++; } else { num_low_msl_acquire++; } #endif //SYNCHRONIZATION_STATS /* // We are commenting this out 'cause we don't see the point - we already // have checked gc_started when we were acquiring the msl - no need to check // again. This complicates the logic in bgc_suspend_EE 'cause that one would // need to release msl which causes all sorts of trouble. if (gc_heap::gc_started) { #ifdef SYNCHRONIZATION_STATS good_suspension++; #endif //SYNCHRONIZATION_STATS BOOL fStress = (g_pConfig->GetGCStressLevel() & GCConfig::GCSTRESS_TRANSITION) != 0; if (!fStress) { //Rendez vous early (MP scaling issue) //dprintf (1, ("[%d]waiting for gc", heap_number)); wait_for_gc_done(); #ifdef MULTIPLE_HEAPS return -1; #endif //MULTIPLE_HEAPS } } */ dprintf (3, ("requested to allocate %d bytes on gen%d", size, gen_number)); int align_const = get_alignment_constant (gen_number <= max_generation); if (fgn_maxgen_percent) { check_for_full_gc (gen_number, size); } #ifdef BGC_SERVO_TUNING if ((gen_number != 0) && bgc_tuning::should_trigger_bgc_loh()) { trigger_gc_for_alloc (max_generation, reason_bgc_tuning_loh, msl, loh_p, mt_try_servo_budget); } else #endif //BGC_SERVO_TUNING { bool trigger_on_budget_loh_p = #ifdef BGC_SERVO_TUNING !bgc_tuning::enable_fl_tuning; #else true; #endif //BGC_SERVO_TUNING bool check_budget_p = true; if (gen_number != 0) { check_budget_p = trigger_on_budget_loh_p; } if (check_budget_p && !(new_allocation_allowed (gen_number))) { if (fgn_maxgen_percent && (gen_number == 0)) { // We only check gen0 every so often, so take this opportunity to check again. check_for_full_gc (gen_number, size); } #ifdef BACKGROUND_GC wait_for_bgc_high_memory (awr_gen0_alloc, loh_p); #endif //BACKGROUND_GC #ifdef SYNCHRONIZATION_STATS bad_suspension++; #endif //SYNCHRONIZATION_STATS dprintf (2, ("h%d running out of budget on gen%d, gc", heap_number, gen_number)); if (!settings.concurrent || (gen_number == 0)) { trigger_gc_for_alloc (0, ((gen_number == 0) ? reason_alloc_soh : reason_alloc_loh), msl, loh_p, mt_try_budget); } } } allocation_state can_allocate = ((gen_number == 0) ? allocate_soh (gen_number, size, acontext, flags, align_const) : allocate_uoh (gen_number, size, acontext, flags, align_const)); return can_allocate; } #ifdef MULTIPLE_HEAPS void gc_heap::balance_heaps (alloc_context* acontext) { if (acontext->alloc_count < 4) { if (acontext->alloc_count == 0) { int home_hp_num = heap_select::select_heap (acontext); acontext->set_home_heap (GCHeap::GetHeap (home_hp_num)); gc_heap* hp = acontext->get_home_heap ()->pGenGCHeap; acontext->set_alloc_heap (acontext->get_home_heap ()); hp->alloc_context_count++; #ifdef HEAP_BALANCE_INSTRUMENTATION uint16_t ideal_proc_no = 0; GCToOSInterface::GetCurrentThreadIdealProc (&ideal_proc_no); uint32_t proc_no = GCToOSInterface::GetCurrentProcessorNumber (); add_to_hb_numa (proc_no, ideal_proc_no, home_hp_num, false, true, false); dprintf (HEAP_BALANCE_TEMP_LOG, ("TEMPafter GC: 1st alloc on p%3d, h%d, ip: %d", proc_no, home_hp_num, ideal_proc_no)); #endif //HEAP_BALANCE_INSTRUMENTATION } } else { BOOL set_home_heap = FALSE; gc_heap* home_hp = NULL; int proc_hp_num = 0; #ifdef HEAP_BALANCE_INSTRUMENTATION bool alloc_count_p = true; bool multiple_procs_p = false; bool set_ideal_p = false; uint32_t proc_no = GCToOSInterface::GetCurrentProcessorNumber (); uint32_t last_proc_no = proc_no; #endif //HEAP_BALANCE_INSTRUMENTATION if (heap_select::can_find_heap_fast ()) { assert (acontext->get_home_heap () != NULL); home_hp = acontext->get_home_heap ()->pGenGCHeap; proc_hp_num = heap_select::select_heap (acontext); if (home_hp != gc_heap::g_heaps[proc_hp_num]) { #ifdef HEAP_BALANCE_INSTRUMENTATION alloc_count_p = false; #endif //HEAP_BALANCE_INSTRUMENTATION set_home_heap = TRUE; } else if ((acontext->alloc_count & 15) == 0) set_home_heap = TRUE; } else { if ((acontext->alloc_count & 3) == 0) set_home_heap = TRUE; } if (set_home_heap) { /* // Since we are balancing up to MAX_SUPPORTED_CPUS, no need for this. if (n_heaps > MAX_SUPPORTED_CPUS) { // on machines with many processors cache affinity is really king, so don't even try // to balance on these. acontext->home_heap = GCHeap::GetHeap( heap_select::select_heap(acontext)); acontext->alloc_heap = acontext->home_heap; } else */ { gc_heap* org_hp = acontext->get_alloc_heap ()->pGenGCHeap; int org_hp_num = org_hp->heap_number; int final_alloc_hp_num = org_hp_num; dynamic_data* dd = org_hp->dynamic_data_of (0); ptrdiff_t org_size = dd_new_allocation (dd); ptrdiff_t total_size = (ptrdiff_t)dd_desired_allocation (dd); #ifdef HEAP_BALANCE_INSTRUMENTATION dprintf (HEAP_BALANCE_TEMP_LOG, ("TEMP[p%3d] ph h%3d, hh: %3d, ah: %3d (%dmb-%dmb), ac: %5d(%s)", proc_no, proc_hp_num, home_hp->heap_number, org_hp_num, (total_size / 1024 / 1024), (org_size / 1024 / 1024), acontext->alloc_count, ((proc_hp_num == home_hp->heap_number) ? "AC" : "H"))); #endif //HEAP_BALANCE_INSTRUMENTATION int org_alloc_context_count; int max_alloc_context_count; gc_heap* max_hp; int max_hp_num = 0; ptrdiff_t max_size; size_t local_delta = max (((size_t)org_size >> 6), min_gen0_balance_delta); size_t delta = local_delta; if (((size_t)org_size + 2 * delta) >= (size_t)total_size) { acontext->alloc_count++; return; } #ifdef HEAP_BALANCE_INSTRUMENTATION proc_no = GCToOSInterface::GetCurrentProcessorNumber (); if (proc_no != last_proc_no) { dprintf (HEAP_BALANCE_TEMP_LOG, ("TEMPSP: %d->%d", last_proc_no, proc_no)); multiple_procs_p = true; last_proc_no = proc_no; } int new_home_hp_num = heap_select::proc_no_to_heap_no[proc_no]; #else int new_home_hp_num = heap_select::select_heap(acontext); #endif //HEAP_BALANCE_INSTRUMENTATION gc_heap* new_home_hp = gc_heap::g_heaps[new_home_hp_num]; acontext->set_home_heap (new_home_hp->vm_heap); int start, end, finish; heap_select::get_heap_range_for_heap (new_home_hp_num, &start, &end); finish = start + n_heaps; do { max_hp = org_hp; max_hp_num = org_hp_num; max_size = org_size + delta; org_alloc_context_count = org_hp->alloc_context_count; max_alloc_context_count = org_alloc_context_count; if (org_hp == new_home_hp) max_size = max_size + delta; if (max_alloc_context_count > 1) max_size /= max_alloc_context_count; // check if the new home heap has more space if (org_hp != new_home_hp) { dd = new_home_hp->dynamic_data_of(0); ptrdiff_t size = dd_new_allocation(dd); // favor new home heap over org heap size += delta * 2; int new_home_hp_alloc_context_count = new_home_hp->alloc_context_count; if (new_home_hp_alloc_context_count > 0) size /= (new_home_hp_alloc_context_count + 1); if (size > max_size) { #ifdef HEAP_BALANCE_INSTRUMENTATION dprintf(HEAP_BALANCE_TEMP_LOG, ("TEMPorg h%d(%dmb), m h%d(%dmb)", org_hp_num, (max_size / 1024 / 1024), new_home_hp_num, (size / 1024 / 1024))); #endif //HEAP_BALANCE_INSTRUMENTATION max_hp = new_home_hp; max_size = size; max_hp_num = new_home_hp_num; max_alloc_context_count = new_home_hp_alloc_context_count; } } // consider heaps both inside our local NUMA node, // and outside, but with different thresholds enum { LOCAL_NUMA_NODE, REMOTE_NUMA_NODE }; for (int pass = LOCAL_NUMA_NODE; pass <= REMOTE_NUMA_NODE; pass++) { int count = end - start; int max_tries = min(count, 4); // we will consider max_tries consecutive (in a circular sense) // other heaps from a semi random starting point // alloc_count often increases by multiples of 16 (due to logic at top of routine), // and we want to advance the starting point by 4 between successive calls, // therefore the shift right by 2 bits int heap_num = start + ((acontext->alloc_count >> 2) + new_home_hp_num) % count; #ifdef HEAP_BALANCE_INSTRUMENTATION dprintf(HEAP_BALANCE_TEMP_LOG, ("TEMP starting at h%d (home_heap_num = %d, alloc_count = %d)", heap_num, new_home_hp_num, acontext->alloc_count)); #endif //HEAP_BALANCE_INSTRUMENTATION for (int tries = max_tries; --tries >= 0; heap_num++) { // wrap around if we hit the end of our range if (heap_num >= end) heap_num -= count; // wrap around if we hit the end of the heap numbers if (heap_num >= n_heaps) heap_num -= n_heaps; assert (heap_num < n_heaps); gc_heap* hp = gc_heap::g_heaps[heap_num]; dd = hp->dynamic_data_of(0); ptrdiff_t size = dd_new_allocation(dd); #ifdef HEAP_BALANCE_INSTRUMENTATION dprintf(HEAP_BALANCE_TEMP_LOG, ("TEMP looking at h%d(%dmb)", heap_num, (size / 1024 / 1024))); #endif //HEAP_BALANCE_INSTRUMENTATION // if the size is not bigger than what we already have, // give up immediately, as it can't be a winner... // this is a micro-optimization to avoid fetching the // alloc_context_count and possibly dividing by it if (size <= max_size) continue; int hp_alloc_context_count = hp->alloc_context_count; if (hp_alloc_context_count > 0) { size /= (hp_alloc_context_count + 1); } if (size > max_size) { #ifdef HEAP_BALANCE_INSTRUMENTATION dprintf(HEAP_BALANCE_TEMP_LOG, ("TEMPorg h%d(%dmb), m h%d(%dmb)", org_hp_num, (max_size / 1024 / 1024), hp->heap_number, (size / 1024 / 1024))); #endif //HEAP_BALANCE_INSTRUMENTATION max_hp = hp; max_size = size; max_hp_num = max_hp->heap_number; max_alloc_context_count = hp_alloc_context_count; } } if ((max_hp == org_hp) && (end < finish)) { start = end; end = finish; delta = local_delta * 2; // Make it twice as hard to balance to remote nodes on NUMA. } else { // we already found a better heap, or there are no remote NUMA nodes break; } } } while (org_alloc_context_count != org_hp->alloc_context_count || max_alloc_context_count != max_hp->alloc_context_count); #ifdef HEAP_BALANCE_INSTRUMENTATION uint16_t ideal_proc_no_before_set_ideal = 0; GCToOSInterface::GetCurrentThreadIdealProc (&ideal_proc_no_before_set_ideal); #endif //HEAP_BALANCE_INSTRUMENTATION if (max_hp != org_hp) { final_alloc_hp_num = max_hp->heap_number; org_hp->alloc_context_count--; max_hp->alloc_context_count++; acontext->set_alloc_heap (GCHeap::GetHeap (final_alloc_hp_num)); if (!gc_thread_no_affinitize_p) { uint16_t src_proc_no = heap_select::find_proc_no_from_heap_no (org_hp->heap_number); uint16_t dst_proc_no = heap_select::find_proc_no_from_heap_no (max_hp->heap_number); dprintf (HEAP_BALANCE_TEMP_LOG, ("TEMPSW! h%d(p%d)->h%d(p%d)", org_hp_num, src_proc_no, final_alloc_hp_num, dst_proc_no)); #ifdef HEAP_BALANCE_INSTRUMENTATION int current_proc_no_before_set_ideal = GCToOSInterface::GetCurrentProcessorNumber (); if (current_proc_no_before_set_ideal != last_proc_no) { dprintf (HEAP_BALANCE_TEMP_LOG, ("TEMPSPa: %d->%d", last_proc_no, current_proc_no_before_set_ideal)); multiple_procs_p = true; } #endif //HEAP_BALANCE_INSTRUMENTATION if (!GCToOSInterface::SetCurrentThreadIdealAffinity (src_proc_no, dst_proc_no)) { dprintf (HEAP_BALANCE_TEMP_LOG, ("TEMPFailed to set the ideal processor for heap %d %d->%d", org_hp->heap_number, (int)src_proc_no, (int)dst_proc_no)); } #ifdef HEAP_BALANCE_INSTRUMENTATION else { set_ideal_p = true; } #endif //HEAP_BALANCE_INSTRUMENTATION } } #ifdef HEAP_BALANCE_INSTRUMENTATION add_to_hb_numa (proc_no, ideal_proc_no_before_set_ideal, final_alloc_hp_num, multiple_procs_p, alloc_count_p, set_ideal_p); #endif //HEAP_BALANCE_INSTRUMENTATION } } } acontext->alloc_count++; } ptrdiff_t gc_heap::get_balance_heaps_uoh_effective_budget (int generation_num) { if (heap_hard_limit) { const ptrdiff_t free_list_space = generation_free_list_space (generation_of (generation_num)); heap_segment* seg = generation_start_segment (generation_of (generation_num)); assert (heap_segment_next (seg) == nullptr); const ptrdiff_t allocated = heap_segment_allocated (seg) - seg->mem; // We could calculate the actual end_of_seg_space by taking reserved - allocated, // but all heaps have the same reserved memory and this value is only used for comparison. return free_list_space - allocated; } else { return dd_new_allocation (dynamic_data_of (generation_num)); } } gc_heap* gc_heap::balance_heaps_uoh (alloc_context* acontext, size_t alloc_size, int generation_num) { const int home_hp_num = heap_select::select_heap(acontext); dprintf (3, ("[h%d] LA: %Id", home_hp_num, alloc_size)); gc_heap* home_hp = GCHeap::GetHeap(home_hp_num)->pGenGCHeap; dynamic_data* dd = home_hp->dynamic_data_of (generation_num); const ptrdiff_t home_hp_size = home_hp->get_balance_heaps_uoh_effective_budget (generation_num); size_t delta = dd_min_size (dd) / 2; int start, end; heap_select::get_heap_range_for_heap(home_hp_num, &start, &end); const int finish = start + n_heaps; try_again: gc_heap* max_hp = home_hp; ptrdiff_t max_size = home_hp_size + delta; dprintf (3, ("home hp: %d, max size: %d", home_hp_num, max_size)); for (int i = start; i < end; i++) { gc_heap* hp = GCHeap::GetHeap(i%n_heaps)->pGenGCHeap; const ptrdiff_t size = hp->get_balance_heaps_uoh_effective_budget (generation_num); dprintf (3, ("hp: %d, size: %d", hp->heap_number, size)); if (size > max_size) { max_hp = hp; max_size = size; dprintf (3, ("max hp: %d, max size: %d", max_hp->heap_number, max_size)); } } if ((max_hp == home_hp) && (end < finish)) { start = end; end = finish; delta = dd_min_size (dd) * 3 / 2; // Make it harder to balance to remote nodes on NUMA. goto try_again; } if (max_hp != home_hp) { dprintf (3, ("uoh: %d(%Id)->%d(%Id)", home_hp->heap_number, dd_new_allocation (home_hp->dynamic_data_of (generation_num)), max_hp->heap_number, dd_new_allocation (max_hp->dynamic_data_of (generation_num)))); } return max_hp; } gc_heap* gc_heap::balance_heaps_uoh_hard_limit_retry (alloc_context* acontext, size_t alloc_size, int generation_num) { assert (heap_hard_limit); const int home_heap = heap_select::select_heap(acontext); dprintf (3, ("[h%d] balance_heaps_loh_hard_limit_retry alloc_size: %d", home_heap, alloc_size)); int start, end; heap_select::get_heap_range_for_heap (home_heap, &start, &end); const int finish = start + n_heaps; gc_heap* max_hp = nullptr; size_t max_end_of_seg_space = alloc_size; // Must be more than this much, or return NULL try_again: { for (int i = start; i < end; i++) { gc_heap* hp = GCHeap::GetHeap (i%n_heaps)->pGenGCHeap; heap_segment* seg = generation_start_segment (hp->generation_of (generation_num)); // With a hard limit, there is only one segment. assert (heap_segment_next (seg) == nullptr); const size_t end_of_seg_space = heap_segment_reserved (seg) - heap_segment_allocated (seg); if (end_of_seg_space >= max_end_of_seg_space) { dprintf (3, ("Switching heaps in hard_limit_retry! To: [h%d], New end_of_seg_space: %d", hp->heap_number, end_of_seg_space)); max_end_of_seg_space = end_of_seg_space; max_hp = hp; } } } // Only switch to a remote NUMA node if we didn't find space on this one. if ((max_hp == nullptr) && (end < finish)) { start = end; end = finish; goto try_again; } return max_hp; } #endif //MULTIPLE_HEAPS BOOL gc_heap::allocate_more_space(alloc_context* acontext, size_t size, uint32_t flags, int alloc_generation_number) { allocation_state status = a_state_start; do { #ifdef MULTIPLE_HEAPS if (alloc_generation_number == 0) { balance_heaps (acontext); status = acontext->get_alloc_heap()->pGenGCHeap->try_allocate_more_space (acontext, size, flags, alloc_generation_number); } else { gc_heap* alloc_heap; if (heap_hard_limit && (status == a_state_retry_allocate)) { alloc_heap = balance_heaps_uoh_hard_limit_retry (acontext, size, alloc_generation_number); if (alloc_heap == nullptr) { return false; } } else { alloc_heap = balance_heaps_uoh (acontext, size, alloc_generation_number); } status = alloc_heap->try_allocate_more_space (acontext, size, flags, alloc_generation_number); if (status == a_state_retry_allocate) { dprintf (3, ("UOH h%d alloc retry!", alloc_heap->heap_number)); } } #else status = try_allocate_more_space (acontext, size, flags, alloc_generation_number); #endif //MULTIPLE_HEAPS } while (status == a_state_retry_allocate); return (status == a_state_can_allocate); } inline CObjectHeader* gc_heap::allocate (size_t jsize, alloc_context* acontext, uint32_t flags) { size_t size = Align (jsize); assert (size >= Align (min_obj_size)); { retry: uint8_t* result = acontext->alloc_ptr; acontext->alloc_ptr+=size; if (acontext->alloc_ptr <= acontext->alloc_limit) { CObjectHeader* obj = (CObjectHeader*)result; assert (obj != 0); return obj; } else { acontext->alloc_ptr -= size; #ifdef _MSC_VER #pragma inline_depth(0) #endif //_MSC_VER if (! allocate_more_space (acontext, size, flags, 0)) return 0; #ifdef _MSC_VER #pragma inline_depth(20) #endif //_MSC_VER goto retry; } } } void gc_heap::leave_allocation_segment (generation* gen) { adjust_limit (0, 0, gen); } void gc_heap::init_free_and_plug() { #ifdef FREE_USAGE_STATS int i = (settings.concurrent ? max_generation : 0); for (; i <= settings.condemned_generation; i++) { generation* gen = generation_of (i); #ifdef DOUBLY_LINKED_FL print_free_and_plug ("BGC"); #else memset (gen->gen_free_spaces, 0, sizeof (gen->gen_free_spaces)); #endif //DOUBLY_LINKED_FL memset (gen->gen_plugs, 0, sizeof (gen->gen_plugs)); memset (gen->gen_current_pinned_free_spaces, 0, sizeof (gen->gen_current_pinned_free_spaces)); } if (settings.condemned_generation != max_generation) { for (int i = (settings.condemned_generation + 1); i <= max_generation; i++) { generation* gen = generation_of (i); memset (gen->gen_plugs, 0, sizeof (gen->gen_plugs)); } } #endif //FREE_USAGE_STATS } void gc_heap::print_free_and_plug (const char* msg) { #ifdef FREE_USAGE_STATS int older_gen = ((settings.condemned_generation == max_generation) ? max_generation : (settings.condemned_generation + 1)); for (int i = 0; i <= older_gen; i++) { generation* gen = generation_of (i); for (int j = 0; j < NUM_GEN_POWER2; j++) { if ((gen->gen_free_spaces[j] != 0) || (gen->gen_plugs[j] != 0)) { dprintf (2, ("[%s][h%d][%s#%d]gen%d: 2^%d: F: %Id, P: %Id", msg, heap_number, (settings.concurrent ? "BGC" : "GC"), settings.gc_index, i, (j + 9), gen->gen_free_spaces[j], gen->gen_plugs[j])); } } } #else UNREFERENCED_PARAMETER(msg); #endif //FREE_USAGE_STATS } // replace with allocator::first_suitable_bucket int gc_heap::find_bucket (size_t size) { size_t sz = BASE_GEN_SIZE; int i = 0; for (; i < (NUM_GEN_POWER2 - 1); i++) { if (size < sz) { break; } sz = sz * 2; } return i; } void gc_heap::add_gen_plug (int gen_number, size_t plug_size) { #ifdef FREE_USAGE_STATS dprintf (3, ("adding plug size %Id to gen%d", plug_size, gen_number)); generation* gen = generation_of (gen_number); size_t sz = BASE_GEN_SIZE; int i = find_bucket (plug_size); (gen->gen_plugs[i])++; #else UNREFERENCED_PARAMETER(gen_number); UNREFERENCED_PARAMETER(plug_size); #endif //FREE_USAGE_STATS } void gc_heap::add_item_to_current_pinned_free (int gen_number, size_t free_size) { #ifdef FREE_USAGE_STATS generation* gen = generation_of (gen_number); size_t sz = BASE_GEN_SIZE; int i = find_bucket (free_size); (gen->gen_current_pinned_free_spaces[i])++; generation_pinned_free_obj_space (gen) += free_size; dprintf (3, ("left pin free %Id(2^%d) to gen%d, total %Id bytes (%Id)", free_size, (i + 10), gen_number, generation_pinned_free_obj_space (gen), gen->gen_current_pinned_free_spaces[i])); #else UNREFERENCED_PARAMETER(gen_number); UNREFERENCED_PARAMETER(free_size); #endif //FREE_USAGE_STATS } // This is only for items large enough to be on the FL // Ideally we should keep track of smaller ones too but for now // it's easier to make the accounting right void gc_heap::add_gen_free (int gen_number, size_t free_size) { #ifdef FREE_USAGE_STATS dprintf (3, ("adding free size %Id to gen%d", free_size, gen_number)); if (free_size < min_free_list) return; generation* gen = generation_of (gen_number); size_t sz = BASE_GEN_SIZE; int i = find_bucket (free_size); (gen->gen_free_spaces[i])++; if (gen_number == max_generation) { dprintf (3, ("Mb b%d: f+ %Id (%Id)", i, free_size, gen->gen_free_spaces[i])); } #else UNREFERENCED_PARAMETER(gen_number); UNREFERENCED_PARAMETER(free_size); #endif //FREE_USAGE_STATS } void gc_heap::remove_gen_free (int gen_number, size_t free_size) { #ifdef FREE_USAGE_STATS dprintf (3, ("removing free %Id from gen%d", free_size, gen_number)); if (free_size < min_free_list) return; generation* gen = generation_of (gen_number); size_t sz = BASE_GEN_SIZE; int i = find_bucket (free_size); (gen->gen_free_spaces[i])--; if (gen_number == max_generation) { dprintf (3, ("Mb b%d: f- %Id (%Id)", i, free_size, gen->gen_free_spaces[i])); } #else UNREFERENCED_PARAMETER(gen_number); UNREFERENCED_PARAMETER(free_size); #endif //FREE_USAGE_STATS } #ifdef DOUBLY_LINKED_FL // This is only called on free spaces. BOOL gc_heap::should_set_bgc_mark_bit (uint8_t* o) { if (!current_sweep_seg) { assert (current_bgc_state == bgc_not_in_process); return FALSE; } // This is cheaper so I am doing this comparision first before having to get the seg for o. if (in_range_for_segment (o, current_sweep_seg)) { // The current sweep seg could have free spaces beyond its background_allocated so we need // to check for that. if ((o >= current_sweep_pos) && (o < heap_segment_background_allocated (current_sweep_seg))) { #ifndef USE_REGIONS if (current_sweep_seg == saved_sweep_ephemeral_seg) { return (o < saved_sweep_ephemeral_start); } else #endif //!USE_REGIONS { return TRUE; } } else return FALSE; } else { // We can have segments outside the BGC range that were allocated during mark - and we // wouldn't have committed the mark array for them and their background_allocated would be // non-zero. Don't set mark bits for those. // The ones allocated during BGC sweep would have their background_allocated as 0. if ((o >= background_saved_lowest_address) && (o < background_saved_highest_address)) { heap_segment* seg = seg_mapping_table_segment_of (o); // if bgc_allocated is 0 it means it was allocated during bgc sweep, // and everything on it should be considered live. uint8_t* background_allocated = heap_segment_background_allocated (seg); if (background_allocated == 0) return FALSE; // During BGC sweep gen1 GCs could add some free spaces in gen2. // If we use those, we should not set the mark bits on them. // They could either be a newly allocated seg which is covered by the // above case; or they are on a seg that's seen but beyond what BGC mark // saw. else if (o >= background_allocated) return FALSE; else return (!heap_segment_swept_p (seg)); } else return FALSE; } } #endif //DOUBLY_LINKED_FL uint8_t* gc_heap::allocate_in_older_generation (generation* gen, size_t size, int from_gen_number, uint8_t* old_loc REQD_ALIGN_AND_OFFSET_DCL) { size = Align (size); assert (size >= Align (min_obj_size)); assert (from_gen_number < max_generation); assert (from_gen_number >= 0); assert (generation_of (from_gen_number + 1) == gen); #ifdef DOUBLY_LINKED_FL BOOL consider_bgc_mark_p = FALSE; BOOL check_current_sweep_p = FALSE; BOOL check_saved_sweep_p = FALSE; BOOL try_added_list_p = (gen->gen_num == max_generation); BOOL record_free_list_allocated_p = ((gen->gen_num == max_generation) && (current_c_gc_state == c_gc_state_planning)); #endif //DOUBLY_LINKED_FL allocator* gen_allocator = generation_allocator (gen); BOOL discard_p = gen_allocator->discard_if_no_fit_p (); #ifdef SHORT_PLUGS int pad_in_front = ((old_loc != 0) && ((from_gen_number+1) != max_generation)) ? USE_PADDING_FRONT : 0; #else //SHORT_PLUGS int pad_in_front = 0; #endif //SHORT_PLUGS size_t real_size = size + Align (min_obj_size); if (pad_in_front) real_size += Align (min_obj_size); #ifdef RESPECT_LARGE_ALIGNMENT real_size += switch_alignment_size (pad_in_front); #endif //RESPECT_LARGE_ALIGNMENT if (! (size_fit_p (size REQD_ALIGN_AND_OFFSET_ARG, generation_allocation_pointer (gen), generation_allocation_limit (gen), old_loc, USE_PADDING_TAIL | pad_in_front))) { for (unsigned int a_l_idx = gen_allocator->first_suitable_bucket(real_size * 2); a_l_idx < gen_allocator->number_of_buckets(); a_l_idx++) { uint8_t* free_list = 0; uint8_t* prev_free_item = 0; BOOL use_undo_p = !discard_p; #ifdef DOUBLY_LINKED_FL if (a_l_idx == 0) { use_undo_p = FALSE; } if (try_added_list_p) { free_list = gen_allocator->added_alloc_list_head_of (a_l_idx); while (free_list != 0) { dprintf (3, ("considering free list in added list%Ix", (size_t)free_list)); size_t free_list_size = unused_array_size (free_list); if (size_fit_p (size REQD_ALIGN_AND_OFFSET_ARG, free_list, (free_list + free_list_size), old_loc, USE_PADDING_TAIL | pad_in_front)) { dprintf (4, ("F:%Ix-%Id", (size_t)free_list, free_list_size)); gen_allocator->unlink_item_no_undo_added (a_l_idx, free_list, prev_free_item); generation_free_list_space (gen) -= free_list_size; assert ((ptrdiff_t)generation_free_list_space (gen) >= 0); remove_gen_free (gen->gen_num, free_list_size); if (record_free_list_allocated_p) { generation_set_bgc_mark_bit_p (gen) = should_set_bgc_mark_bit (free_list); dprintf (3333, ("SFA: %Ix->%Ix(%d)", free_list, (free_list + free_list_size), (generation_set_bgc_mark_bit_p (gen) ? 1 : 0))); } adjust_limit (free_list, free_list_size, gen); generation_allocate_end_seg_p (gen) = FALSE; goto finished; } // We do first fit on bucket 0 because we are not guaranteed to find a fit there. else if (a_l_idx == 0) { dprintf (3, ("couldn't use this free area, discarding")); generation_free_obj_space (gen) += free_list_size; gen_allocator->unlink_item_no_undo_added (a_l_idx, free_list, prev_free_item); generation_free_list_space (gen) -= free_list_size; assert ((ptrdiff_t)generation_free_list_space (gen) >= 0); remove_gen_free (gen->gen_num, free_list_size); } else { prev_free_item = free_list; } free_list = free_list_slot (free_list); } } #endif //DOUBLY_LINKED_FL free_list = gen_allocator->alloc_list_head_of (a_l_idx); prev_free_item = 0; while (free_list != 0) { dprintf (3, ("considering free list %Ix", (size_t)free_list)); size_t free_list_size = unused_array_size (free_list); if (size_fit_p (size REQD_ALIGN_AND_OFFSET_ARG, free_list, (free_list + free_list_size), old_loc, USE_PADDING_TAIL | pad_in_front)) { dprintf (4, ("F:%Ix-%Id", (size_t)free_list, free_list_size)); gen_allocator->unlink_item (a_l_idx, free_list, prev_free_item, use_undo_p); generation_free_list_space (gen) -= free_list_size; assert ((ptrdiff_t)generation_free_list_space (gen) >= 0); remove_gen_free (gen->gen_num, free_list_size); #ifdef DOUBLY_LINKED_FL if (!discard_p && !use_undo_p) { gen2_removed_no_undo += free_list_size; dprintf (3, ("h%d: remove with no undo %Id = %Id", heap_number, free_list_size, gen2_removed_no_undo)); } if (record_free_list_allocated_p) { generation_set_bgc_mark_bit_p (gen) = should_set_bgc_mark_bit (free_list); dprintf (3333, ("SF: %Ix(%d)", free_list, (generation_set_bgc_mark_bit_p (gen) ? 1 : 0))); } #endif //DOUBLY_LINKED_FL adjust_limit (free_list, free_list_size, gen); generation_allocate_end_seg_p (gen) = FALSE; goto finished; } // We do first fit on bucket 0 because we are not guaranteed to find a fit there. else if (discard_p || (a_l_idx == 0)) { dprintf (3, ("couldn't use this free area, discarding")); generation_free_obj_space (gen) += free_list_size; gen_allocator->unlink_item (a_l_idx, free_list, prev_free_item, FALSE); generation_free_list_space (gen) -= free_list_size; assert ((ptrdiff_t)generation_free_list_space (gen) >= 0); remove_gen_free (gen->gen_num, free_list_size); #ifdef DOUBLY_LINKED_FL if (!discard_p) { gen2_removed_no_undo += free_list_size; dprintf (3, ("h%d: b0 remove with no undo %Id = %Id", heap_number, free_list_size, gen2_removed_no_undo)); } #endif //DOUBLY_LINKED_FL } else { prev_free_item = free_list; } free_list = free_list_slot (free_list); } } #ifdef USE_REGIONS // We don't want to always go back to the first region since there might be many. heap_segment* seg = generation_allocation_segment (gen); dprintf (3, ("end of seg, starting from alloc seg %Ix", heap_segment_mem (seg))); assert (seg != ephemeral_heap_segment); while (true) #else //go back to the beginning of the segment list heap_segment* seg = heap_segment_rw (generation_start_segment (gen)); if (seg != generation_allocation_segment (gen)) { leave_allocation_segment (gen); generation_allocation_segment (gen) = seg; } while (seg != ephemeral_heap_segment) #endif //USE_REGIONS { if (size_fit_p(size REQD_ALIGN_AND_OFFSET_ARG, heap_segment_plan_allocated (seg), heap_segment_committed (seg), old_loc, USE_PADDING_TAIL | pad_in_front)) { adjust_limit (heap_segment_plan_allocated (seg), (heap_segment_committed (seg) - heap_segment_plan_allocated (seg)), gen); generation_allocate_end_seg_p (gen) = TRUE; heap_segment_plan_allocated (seg) = heap_segment_committed (seg); dprintf (3, ("seg %Ix is used for end of seg alloc", heap_segment_mem (seg))); goto finished; } else { if (size_fit_p (size REQD_ALIGN_AND_OFFSET_ARG, heap_segment_plan_allocated (seg), heap_segment_reserved (seg), old_loc, USE_PADDING_TAIL | pad_in_front) && grow_heap_segment (seg, heap_segment_plan_allocated (seg), old_loc, size, pad_in_front REQD_ALIGN_AND_OFFSET_ARG)) { adjust_limit (heap_segment_plan_allocated (seg), (heap_segment_committed (seg) - heap_segment_plan_allocated (seg)), gen); generation_allocate_end_seg_p (gen) = TRUE; heap_segment_plan_allocated (seg) = heap_segment_committed (seg); dprintf (3, ("seg %Ix is used for end of seg alloc after grow, %Ix", heap_segment_mem (seg), heap_segment_committed (seg))); goto finished; } else { leave_allocation_segment (gen); heap_segment* next_seg = heap_segment_next_rw (seg); #ifdef USE_REGIONS assert (next_seg != ephemeral_heap_segment); #endif //USE_REGIONS if (next_seg) { generation_allocation_segment (gen) = next_seg; generation_allocation_pointer (gen) = heap_segment_mem (next_seg); generation_allocation_limit (gen) = generation_allocation_pointer (gen); dprintf (3, ("alloc region advanced to %Ix", heap_segment_mem (next_seg))); } else { size = 0; goto finished; } } } seg = generation_allocation_segment (gen); } //No need to fix the last region. Will be done later size = 0; goto finished; } finished: if (0 == size) { return 0; } else { uint8_t* result = generation_allocation_pointer (gen); size_t pad = 0; #ifdef SHORT_PLUGS if ((pad_in_front & USE_PADDING_FRONT) && (((generation_allocation_pointer (gen) - generation_allocation_context_start_region (gen))==0) || ((generation_allocation_pointer (gen) - generation_allocation_context_start_region (gen))>=DESIRED_PLUG_LENGTH))) { pad = Align (min_obj_size); set_plug_padded (old_loc); } #endif //SHORT_PLUGS #ifdef FEATURE_STRUCTALIGN _ASSERTE(!old_loc || alignmentOffset != 0); _ASSERTE(old_loc || requiredAlignment == DATA_ALIGNMENT); if (old_loc != 0) { size_t pad1 = ComputeStructAlignPad(result+pad, requiredAlignment, alignmentOffset); set_node_aligninfo (old_loc, requiredAlignment, pad1); pad += pad1; } #else // FEATURE_STRUCTALIGN if (!((old_loc == 0) || same_large_alignment_p (old_loc, result+pad))) { pad += switch_alignment_size (pad != 0); set_node_realigned (old_loc); dprintf (3, ("Allocation realignment old_loc: %Ix, new_loc:%Ix", (size_t)old_loc, (size_t)(result+pad))); assert (same_large_alignment_p (result + pad, old_loc)); } #endif // FEATURE_STRUCTALIGN dprintf (3, ("Allocate %Id bytes", size)); if ((old_loc == 0) || (pad != 0)) { //allocating a non plug or a gap, so reset the start region generation_allocation_context_start_region (gen) = generation_allocation_pointer (gen); } generation_allocation_pointer (gen) += size + pad; assert (generation_allocation_pointer (gen) <= generation_allocation_limit (gen)); generation_free_obj_space (gen) += pad; if (generation_allocate_end_seg_p (gen)) { generation_end_seg_allocated (gen) += size; } else { #ifdef DOUBLY_LINKED_FL if (generation_set_bgc_mark_bit_p (gen)) { dprintf (2, ("IOM: %Ix(->%Ix(%Id) (%Ix-%Ix)", old_loc, result, pad, (size_t)(&mark_array [mark_word_of (result)]), (size_t)(mark_array [mark_word_of (result)]))); set_plug_bgc_mark_bit (old_loc); } generation_last_free_list_allocated (gen) = old_loc; #endif //DOUBLY_LINKED_FL generation_free_list_allocated (gen) += size; } generation_allocation_size (gen) += size; dprintf (3, ("aio: ptr: %Ix, limit: %Ix, sr: %Ix", generation_allocation_pointer (gen), generation_allocation_limit (gen), generation_allocation_context_start_region (gen))); return (result + pad); } } #ifndef USE_REGIONS void gc_heap::repair_allocation_in_expanded_heap (generation* consing_gen) { //make sure that every generation has a planned allocation start int gen_number = max_generation - 1; while (gen_number>= 0) { generation* gen = generation_of (gen_number); if (0 == generation_plan_allocation_start (gen)) { realloc_plan_generation_start (gen, consing_gen); assert (generation_plan_allocation_start (gen)); } gen_number--; } // now we know the planned allocation size size_t size = (generation_allocation_limit (consing_gen) - generation_allocation_pointer (consing_gen)); heap_segment* seg = generation_allocation_segment (consing_gen); if (generation_allocation_limit (consing_gen) == heap_segment_plan_allocated (seg)) { if (size != 0) { heap_segment_plan_allocated (seg) = generation_allocation_pointer (consing_gen); } } else { assert (settings.condemned_generation == max_generation); uint8_t* first_address = generation_allocation_limit (consing_gen); //look through the pinned plugs for relevant ones. //Look for the right pinned plug to start from. size_t mi = 0; mark* m = 0; while (mi != mark_stack_tos) { m = pinned_plug_of (mi); if ((pinned_plug (m) == first_address)) break; else mi++; } assert (mi != mark_stack_tos); pinned_len (m) = size; } } //tododefrag optimize for new segment (plan_allocated == mem) uint8_t* gc_heap::allocate_in_expanded_heap (generation* gen, size_t size, BOOL& adjacentp, uint8_t* old_loc, #ifdef SHORT_PLUGS BOOL set_padding_on_saved_p, mark* pinned_plug_entry, #endif //SHORT_PLUGS BOOL consider_bestfit, int active_new_gen_number REQD_ALIGN_AND_OFFSET_DCL) { dprintf (3, ("aie: P: %Ix, size: %Ix", old_loc, size)); size = Align (size); assert (size >= Align (min_obj_size)); #ifdef SHORT_PLUGS int pad_in_front = ((old_loc != 0) && (active_new_gen_number != max_generation)) ? USE_PADDING_FRONT : 0; #else //SHORT_PLUGS int pad_in_front = 0; #endif //SHORT_PLUGS if (consider_bestfit && use_bestfit) { assert (bestfit_seg); dprintf (SEG_REUSE_LOG_1, ("reallocating 0x%Ix in expanded heap, size: %Id", old_loc, size)); return bestfit_seg->fit (old_loc, size REQD_ALIGN_AND_OFFSET_ARG); } heap_segment* seg = generation_allocation_segment (gen); if (! (size_fit_p (size REQD_ALIGN_AND_OFFSET_ARG, generation_allocation_pointer (gen), generation_allocation_limit (gen), old_loc, ((generation_allocation_limit (gen) != heap_segment_plan_allocated (seg))? USE_PADDING_TAIL : 0) | pad_in_front))) { dprintf (3, ("aie: can't fit: ptr: %Ix, limit: %Ix", generation_allocation_pointer (gen), generation_allocation_limit (gen))); adjacentp = FALSE; uint8_t* first_address = (generation_allocation_limit (gen) ? generation_allocation_limit (gen) : heap_segment_mem (seg)); assert (in_range_for_segment (first_address, seg)); uint8_t* end_address = heap_segment_reserved (seg); dprintf (3, ("aie: first_addr: %Ix, gen alloc limit: %Ix, end_address: %Ix", first_address, generation_allocation_limit (gen), end_address)); size_t mi = 0; mark* m = 0; if (heap_segment_allocated (seg) != heap_segment_mem (seg)) { assert (settings.condemned_generation == max_generation); //look through the pinned plugs for relevant ones. //Look for the right pinned plug to start from. while (mi != mark_stack_tos) { m = pinned_plug_of (mi); if ((pinned_plug (m) >= first_address) && (pinned_plug (m) < end_address)) { dprintf (3, ("aie: found pin: %Ix", pinned_plug (m))); break; } else mi++; } if (mi != mark_stack_tos) { //fix old free list. size_t hsize = (generation_allocation_limit (gen) - generation_allocation_pointer (gen)); { dprintf(3,("gc filling up hole")); ptrdiff_t mi1 = (ptrdiff_t)mi; while ((mi1 >= 0) && (pinned_plug (pinned_plug_of(mi1)) != generation_allocation_limit (gen))) { dprintf (3, ("aie: checking pin %Ix", pinned_plug (pinned_plug_of(mi1)))); mi1--; } if (mi1 >= 0) { size_t saved_pinned_len = pinned_len (pinned_plug_of(mi1)); pinned_len (pinned_plug_of(mi1)) = hsize; dprintf (3, ("changing %Ix len %Ix->%Ix", pinned_plug (pinned_plug_of(mi1)), saved_pinned_len, pinned_len (pinned_plug_of(mi1)))); } } } } else { assert (generation_allocation_limit (gen) == generation_allocation_pointer (gen)); mi = mark_stack_tos; } while ((mi != mark_stack_tos) && in_range_for_segment (pinned_plug (m), seg)) { size_t len = pinned_len (m); uint8_t* free_list = (pinned_plug (m) - len); dprintf (3, ("aie: testing free item: %Ix->%Ix(%Ix)", free_list, (free_list + len), len)); if (size_fit_p (size REQD_ALIGN_AND_OFFSET_ARG, free_list, (free_list + len), old_loc, USE_PADDING_TAIL | pad_in_front)) { dprintf (3, ("aie: Found adequate unused area: %Ix, size: %Id", (size_t)free_list, len)); { generation_allocation_pointer (gen) = free_list; generation_allocation_context_start_region (gen) = generation_allocation_pointer (gen); generation_allocation_limit (gen) = (free_list + len); } goto allocate_in_free; } mi++; m = pinned_plug_of (mi); } //switch to the end of the segment. generation_allocation_pointer (gen) = heap_segment_plan_allocated (seg); generation_allocation_context_start_region (gen) = generation_allocation_pointer (gen); heap_segment_plan_allocated (seg) = heap_segment_committed (seg); generation_allocation_limit (gen) = heap_segment_plan_allocated (seg); dprintf (3, ("aie: switching to end of seg: %Ix->%Ix(%Ix)", generation_allocation_pointer (gen), generation_allocation_limit (gen), (generation_allocation_limit (gen) - generation_allocation_pointer (gen)))); if (!size_fit_p (size REQD_ALIGN_AND_OFFSET_ARG, generation_allocation_pointer (gen), generation_allocation_limit (gen), old_loc, USE_PADDING_TAIL | pad_in_front)) { dprintf (3, ("aie: ptr: %Ix, limit: %Ix, can't alloc", generation_allocation_pointer (gen), generation_allocation_limit (gen))); assert (!"Can't allocate if no free space"); return 0; } } else { adjacentp = TRUE; } allocate_in_free: { uint8_t* result = generation_allocation_pointer (gen); size_t pad = 0; #ifdef SHORT_PLUGS if ((pad_in_front & USE_PADDING_FRONT) && (((generation_allocation_pointer (gen) - generation_allocation_context_start_region (gen))==0) || ((generation_allocation_pointer (gen) - generation_allocation_context_start_region (gen))>=DESIRED_PLUG_LENGTH))) { pad = Align (min_obj_size); set_padding_in_expand (old_loc, set_padding_on_saved_p, pinned_plug_entry); } #endif //SHORT_PLUGS #ifdef FEATURE_STRUCTALIGN _ASSERTE(!old_loc || alignmentOffset != 0); _ASSERTE(old_loc || requiredAlignment == DATA_ALIGNMENT); if (old_loc != 0) { size_t pad1 = ComputeStructAlignPad(result+pad, requiredAlignment, alignmentOffset); set_node_aligninfo (old_loc, requiredAlignment, pad1); pad += pad1; adjacentp = FALSE; } #else // FEATURE_STRUCTALIGN if (!((old_loc == 0) || same_large_alignment_p (old_loc, result+pad))) { pad += switch_alignment_size (pad != 0); set_node_realigned (old_loc); dprintf (3, ("Allocation realignment old_loc: %Ix, new_loc:%Ix", (size_t)old_loc, (size_t)(result+pad))); assert (same_large_alignment_p (result + pad, old_loc)); adjacentp = FALSE; } #endif // FEATURE_STRUCTALIGN if ((old_loc == 0) || (pad != 0)) { //allocating a non plug or a gap, so reset the start region generation_allocation_context_start_region (gen) = generation_allocation_pointer (gen); } generation_allocation_pointer (gen) += size + pad; assert (generation_allocation_pointer (gen) <= generation_allocation_limit (gen)); dprintf (3, ("Allocated in expanded heap %Ix:%Id", (size_t)(result+pad), size)); dprintf (3, ("aie: ptr: %Ix, limit: %Ix, sr: %Ix", generation_allocation_pointer (gen), generation_allocation_limit (gen), generation_allocation_context_start_region (gen))); return result + pad; } } generation* gc_heap::ensure_ephemeral_heap_segment (generation* consing_gen) { heap_segment* seg = generation_allocation_segment (consing_gen); if (seg != ephemeral_heap_segment) { assert (generation_allocation_pointer (consing_gen)>= heap_segment_mem (seg)); assert (generation_allocation_pointer (consing_gen)<= heap_segment_committed (seg)); //fix the allocated size of the segment. heap_segment_plan_allocated (seg) = generation_allocation_pointer (consing_gen); generation* new_consing_gen = generation_of (max_generation - 1); generation_allocation_pointer (new_consing_gen) = heap_segment_mem (ephemeral_heap_segment); generation_allocation_limit (new_consing_gen) = generation_allocation_pointer (new_consing_gen); generation_allocation_context_start_region (new_consing_gen) = generation_allocation_pointer (new_consing_gen); generation_allocation_segment (new_consing_gen) = ephemeral_heap_segment; return new_consing_gen; } else return consing_gen; } #endif //!USE_REGIONS inline void gc_heap::init_alloc_info (generation* gen, heap_segment* seg) { generation_allocation_segment (gen) = seg; generation_allocation_pointer (gen) = heap_segment_mem (seg); generation_allocation_limit (gen) = generation_allocation_pointer (gen); generation_allocation_context_start_region (gen) = generation_allocation_pointer (gen); } inline heap_segment* gc_heap::get_next_alloc_seg (generation* gen) { #ifdef USE_REGIONS heap_segment* saved_region = generation_allocation_segment (gen); int gen_num = heap_segment_gen_num (saved_region); heap_segment* region = saved_region; while (1) { region = heap_segment_non_sip (region); if (region) { break; } else { if (gen_num > 0) { gen_num--; region = generation_start_segment (generation_of (gen_num)); dprintf (REGIONS_LOG, ("h%d next alloc region: switching to next gen%d start %Ix(%Ix)", heap_number, heap_segment_gen_num (region), (size_t)region, heap_segment_mem (region))); } else { assert (!"ran out regions when getting the next alloc seg!"); } } } if (region != saved_region) { dprintf (REGIONS_LOG, ("init allocate region for gen%d to %Ix(%d)", gen->gen_num, heap_segment_mem (region), heap_segment_gen_num (region))); init_alloc_info (gen, region); } return region; #else return generation_allocation_segment (gen); #endif //USE_REGIONS } uint8_t* gc_heap::allocate_in_condemned_generations (generation* gen, size_t size, int from_gen_number, #ifdef SHORT_PLUGS BOOL* convert_to_pinned_p, uint8_t* next_pinned_plug, heap_segment* current_seg, #endif //SHORT_PLUGS uint8_t* old_loc REQD_ALIGN_AND_OFFSET_DCL) { #ifndef USE_REGIONS // Make sure that the youngest generation gap hasn't been allocated if (settings.promotion) { assert (generation_plan_allocation_start (youngest_generation) == 0); } #endif //!USE_REGIONS size = Align (size); assert (size >= Align (min_obj_size)); int to_gen_number = from_gen_number; if (from_gen_number != (int)max_generation) { to_gen_number = from_gen_number + (settings.promotion ? 1 : 0); } dprintf (3, ("aic gen%d: s: %Id, ac: %Ix-%Ix", gen->gen_num, size, generation_allocation_pointer (gen), generation_allocation_limit (gen))); #ifdef SHORT_PLUGS int pad_in_front = ((old_loc != 0) && (to_gen_number != max_generation)) ? USE_PADDING_FRONT : 0; #else //SHORT_PLUGS int pad_in_front = 0; #endif //SHORT_PLUGS if ((from_gen_number != -1) && (from_gen_number != (int)max_generation) && settings.promotion) { generation_condemned_allocated (generation_of (from_gen_number + (settings.promotion ? 1 : 0))) += size; generation_allocation_size (generation_of (from_gen_number + (settings.promotion ? 1 : 0))) += size; } retry: { heap_segment* seg = get_next_alloc_seg (gen); if (! (size_fit_p (size REQD_ALIGN_AND_OFFSET_ARG, generation_allocation_pointer (gen), generation_allocation_limit (gen), old_loc, ((generation_allocation_limit (gen) != heap_segment_plan_allocated (seg))?USE_PADDING_TAIL:0)|pad_in_front))) { if ((! (pinned_plug_que_empty_p()) && (generation_allocation_limit (gen) == pinned_plug (oldest_pin())))) { size_t entry = deque_pinned_plug(); mark* pinned_plug_entry = pinned_plug_of (entry); size_t len = pinned_len (pinned_plug_entry); uint8_t* plug = pinned_plug (pinned_plug_entry); set_new_pin_info (pinned_plug_entry, generation_allocation_pointer (gen)); #ifdef USE_REGIONS if (to_gen_number == 0) { update_planned_gen0_free_space (pinned_len (pinned_plug_entry), plug); dprintf (REGIONS_LOG, ("aic: not promotion, gen0 added free space %Id at %Ix", pinned_len (pinned_plug_entry), plug)); } #endif //USE_REGIONS #ifdef FREE_USAGE_STATS generation_allocated_in_pinned_free (gen) += generation_allocated_since_last_pin (gen); dprintf (3, ("allocated %Id so far within pin %Ix, total->%Id", generation_allocated_since_last_pin (gen), plug, generation_allocated_in_pinned_free (gen))); generation_allocated_since_last_pin (gen) = 0; add_item_to_current_pinned_free (gen->gen_num, pinned_len (pinned_plug_of (entry))); #endif //FREE_USAGE_STATS dprintf (3, ("mark stack bos: %Id, tos: %Id, aic: p %Ix len: %Ix->%Ix", mark_stack_bos, mark_stack_tos, plug, len, pinned_len (pinned_plug_of (entry)))); assert(mark_stack_array[entry].len == 0 || mark_stack_array[entry].len >= Align(min_obj_size)); generation_allocation_pointer (gen) = plug + len; generation_allocation_context_start_region (gen) = generation_allocation_pointer (gen); generation_allocation_limit (gen) = heap_segment_plan_allocated (seg); set_allocator_next_pin (gen); //Add the size of the pinned plug to the right pinned allocations //find out which gen this pinned plug came from int frgn = object_gennum (plug); if ((frgn != (int)max_generation) && settings.promotion) { generation_pinned_allocation_sweep_size (generation_of (frgn + 1)) += len; #ifdef USE_REGIONS // With regions it's a bit more complicated since we only set the plan_gen_num // of a region after we've planned it. This means if the pinning plug is in the // the same seg we are planning, we haven't set its plan_gen_num yet. So we // need to check for that first. int togn = (in_range_for_segment (plug, seg) ? to_gen_number : object_gennum_plan (plug)); #else int togn = object_gennum_plan (plug); #endif //USE_REGIONS if (frgn < togn) { generation_pinned_allocation_compact_size (generation_of (togn)) += len; } } goto retry; } if (generation_allocation_limit (gen) != heap_segment_plan_allocated (seg)) { generation_allocation_limit (gen) = heap_segment_plan_allocated (seg); dprintf (3, ("changed limit to plan alloc: %Ix", generation_allocation_limit (gen))); } else { if (heap_segment_plan_allocated (seg) != heap_segment_committed (seg)) { heap_segment_plan_allocated (seg) = heap_segment_committed (seg); generation_allocation_limit (gen) = heap_segment_plan_allocated (seg); dprintf (3, ("changed limit to commit: %Ix", generation_allocation_limit (gen))); } else { #if !defined(RESPECT_LARGE_ALIGNMENT) && !defined(USE_REGIONS) assert (gen != youngest_generation); #endif //!RESPECT_LARGE_ALIGNMENT && !USE_REGIONS if (size_fit_p (size REQD_ALIGN_AND_OFFSET_ARG, generation_allocation_pointer (gen), heap_segment_reserved (seg), old_loc, USE_PADDING_TAIL | pad_in_front) && (grow_heap_segment (seg, generation_allocation_pointer (gen), old_loc, size, pad_in_front REQD_ALIGN_AND_OFFSET_ARG))) { dprintf (3, ("Expanded segment allocation by committing more memory")); heap_segment_plan_allocated (seg) = heap_segment_committed (seg); generation_allocation_limit (gen) = heap_segment_plan_allocated (seg); } else { heap_segment* next_seg = heap_segment_next (seg); dprintf (REGIONS_LOG, ("aic next: %Ix(%Ix,%Ix) -> %Ix(%Ix,%Ix)", heap_segment_mem (seg), heap_segment_allocated (seg), heap_segment_plan_allocated (seg), (next_seg ? heap_segment_mem (next_seg) : 0), (next_seg ? heap_segment_allocated (next_seg) : 0), (next_seg ? heap_segment_plan_allocated (next_seg) : 0))); assert (generation_allocation_pointer (gen)>= heap_segment_mem (seg)); // Verify that all pinned plugs for this segment are consumed if (!pinned_plug_que_empty_p() && ((pinned_plug (oldest_pin()) < heap_segment_allocated (seg)) && (pinned_plug (oldest_pin()) >= generation_allocation_pointer (gen)))) { LOG((LF_GC, LL_INFO10, "remaining pinned plug %Ix while leaving segment on allocation", pinned_plug (oldest_pin()))); FATAL_GC_ERROR(); } assert (generation_allocation_pointer (gen)>= heap_segment_mem (seg)); assert (generation_allocation_pointer (gen)<= heap_segment_committed (seg)); heap_segment_plan_allocated (seg) = generation_allocation_pointer (gen); #ifdef USE_REGIONS set_region_plan_gen_num (seg, to_gen_number); if ((next_seg == 0) && (heap_segment_gen_num (seg) > 0)) { // We need to switch to a younger gen's segments so the allocate seg will be in // sync with the pins. next_seg = generation_start_segment (generation_of (heap_segment_gen_num (seg) - 1)); dprintf (REGIONS_LOG, ("h%d aic: switching to next gen%d start %Ix(%Ix)", heap_number, heap_segment_gen_num (next_seg), (size_t)next_seg, heap_segment_mem (next_seg))); } #endif //USE_REGIONS if (next_seg) { init_alloc_info (gen, next_seg); } else { #ifdef USE_REGIONS assert (!"should not happen for regions!"); #else return 0; //should only happen during allocation of generation 0 gap // in that case we are going to grow the heap anyway #endif //USE_REGIONS } } } } set_allocator_next_pin (gen); goto retry; } } { assert (generation_allocation_pointer (gen)>= heap_segment_mem (generation_allocation_segment (gen))); uint8_t* result = generation_allocation_pointer (gen); size_t pad = 0; #ifdef SHORT_PLUGS if ((pad_in_front & USE_PADDING_FRONT) && (((generation_allocation_pointer (gen) - generation_allocation_context_start_region (gen))==0) || ((generation_allocation_pointer (gen) - generation_allocation_context_start_region (gen))>=DESIRED_PLUG_LENGTH))) { ptrdiff_t dist = old_loc - result; if (dist == 0) { dprintf (3, ("old alloc: %Ix, same as new alloc, not padding", old_loc)); pad = 0; } else { if ((dist > 0) && (dist < (ptrdiff_t)Align (min_obj_size))) { dprintf (1, ("old alloc: %Ix, only %d bytes > new alloc! Shouldn't happen", old_loc, dist)); FATAL_GC_ERROR(); } pad = Align (min_obj_size); set_plug_padded (old_loc); } } #endif //SHORT_PLUGS #ifdef FEATURE_STRUCTALIGN _ASSERTE(!old_loc || alignmentOffset != 0); _ASSERTE(old_loc || requiredAlignment == DATA_ALIGNMENT); if ((old_loc != 0)) { size_t pad1 = ComputeStructAlignPad(result+pad, requiredAlignment, alignmentOffset); set_node_aligninfo (old_loc, requiredAlignment, pad1); pad += pad1; } #else // FEATURE_STRUCTALIGN if (!((old_loc == 0) || same_large_alignment_p (old_loc, result+pad))) { pad += switch_alignment_size (pad != 0); set_node_realigned(old_loc); dprintf (3, ("Allocation realignment old_loc: %Ix, new_loc:%Ix", (size_t)old_loc, (size_t)(result+pad))); assert (same_large_alignment_p (result + pad, old_loc)); } #endif // FEATURE_STRUCTALIGN #ifdef SHORT_PLUGS if ((next_pinned_plug != 0) && (pad != 0) && (generation_allocation_segment (gen) == current_seg)) { assert (old_loc != 0); ptrdiff_t dist_to_next_pin = (ptrdiff_t)(next_pinned_plug - (generation_allocation_pointer (gen) + size + pad)); assert (dist_to_next_pin >= 0); if ((dist_to_next_pin >= 0) && (dist_to_next_pin < (ptrdiff_t)Align (min_obj_size))) { dprintf (3, ("%Ix->(%Ix,%Ix),%Ix(%Ix)(%Ix),NP->PP", old_loc, generation_allocation_pointer (gen), generation_allocation_limit (gen), next_pinned_plug, size, dist_to_next_pin)); clear_plug_padded (old_loc); pad = 0; *convert_to_pinned_p = TRUE; record_interesting_data_point (idp_converted_pin); return 0; } } #endif //SHORT_PLUGS if ((old_loc == 0) || (pad != 0)) { //allocating a non plug or a gap, so reset the start region generation_allocation_context_start_region (gen) = generation_allocation_pointer (gen); } generation_allocation_pointer (gen) += size + pad; assert (generation_allocation_pointer (gen) <= generation_allocation_limit (gen)); if ((pad > 0) && (to_gen_number >= 0)) { generation_free_obj_space (generation_of (to_gen_number)) += pad; } #ifdef FREE_USAGE_STATS generation_allocated_since_last_pin (gen) += size; #endif //FREE_USAGE_STATS dprintf (3, ("aic: old: %Ix ptr: %Ix, limit: %Ix, sr: %Ix, res: %Ix, pad: %Id", old_loc, generation_allocation_pointer (gen), generation_allocation_limit (gen), generation_allocation_context_start_region (gen), result, (size_t)pad)); assert (result + pad); return result + pad; } } int gc_heap::joined_generation_to_condemn (BOOL should_evaluate_elevation, int initial_gen, int current_gen, BOOL* blocking_collection_p STRESS_HEAP_ARG(int n_original)) { gc_data_global.gen_to_condemn_reasons.init(); #ifdef BGC_SERVO_TUNING if (settings.entry_memory_load == 0) { uint32_t current_memory_load = 0; uint64_t current_available_physical = 0; get_memory_info (¤t_memory_load, ¤t_available_physical); settings.entry_memory_load = current_memory_load; settings.entry_available_physical_mem = current_available_physical; } #endif //BGC_SERVO_TUNING int n = current_gen; #ifdef MULTIPLE_HEAPS BOOL joined_last_gc_before_oom = FALSE; for (int i = 0; i < n_heaps; i++) { if (g_heaps[i]->last_gc_before_oom) { dprintf (GTC_LOG, ("h%d is setting blocking to TRUE", i)); joined_last_gc_before_oom = TRUE; break; } } #else BOOL joined_last_gc_before_oom = last_gc_before_oom; #endif //MULTIPLE_HEAPS if (joined_last_gc_before_oom && settings.pause_mode != pause_low_latency) { assert (*blocking_collection_p); } if (should_evaluate_elevation && (n == max_generation)) { dprintf (GTC_LOG, ("lock: %d(%d)", (settings.should_lock_elevation ? 1 : 0), settings.elevation_locked_count)); if (settings.should_lock_elevation) { settings.elevation_locked_count++; if (settings.elevation_locked_count == 6) { settings.elevation_locked_count = 0; } else { n = max_generation - 1; gc_data_global.gen_to_condemn_reasons.set_condition(gen_joined_avoid_unproductive); settings.elevation_reduced = TRUE; } } else { settings.elevation_locked_count = 0; } } else { settings.should_lock_elevation = FALSE; settings.elevation_locked_count = 0; } if (provisional_mode_triggered && (n == max_generation)) { // There are a few cases where we should not reduce the generation. if ((initial_gen == max_generation) || (settings.reason == reason_alloc_loh)) { // If we are doing a full GC in the provisional mode, we always // make it blocking because we don't want to get into a situation // where foreground GCs are asking for a compacting full GC right away // and not getting it. dprintf (GTC_LOG, ("full GC induced, not reducing gen")); if (initial_gen == max_generation) { gc_data_global.gen_to_condemn_reasons.set_condition(gen_joined_pm_induced_fullgc_p); } else { gc_data_global.gen_to_condemn_reasons.set_condition(gen_joined_pm_alloc_loh); } *blocking_collection_p = TRUE; } else if (should_expand_in_full_gc || joined_last_gc_before_oom) { dprintf (GTC_LOG, ("need full blocking GCs to expand heap or avoid OOM, not reducing gen")); assert (*blocking_collection_p); } else { dprintf (GTC_LOG, ("reducing gen in PM: %d->%d->%d", initial_gen, n, (max_generation - 1))); gc_data_global.gen_to_condemn_reasons.set_condition(gen_joined_gen1_in_pm); n = max_generation - 1; } } if (should_expand_in_full_gc) { should_expand_in_full_gc = FALSE; } if (heap_hard_limit) { // If we have already consumed 90% of the limit, we should check to see if we should compact LOH. // TODO: should unify this with gen2. dprintf (GTC_LOG, ("committed %Id is %d%% of limit %Id", current_total_committed, (int)((float)current_total_committed * 100.0 / (float)heap_hard_limit), heap_hard_limit)); bool full_compact_gc_p = false; if (joined_last_gc_before_oom) { gc_data_global.gen_to_condemn_reasons.set_condition(gen_joined_limit_before_oom); full_compact_gc_p = true; } else if ((current_total_committed * 10) >= (heap_hard_limit * 9)) { size_t loh_frag = get_total_gen_fragmentation (loh_generation); // If the LOH frag is >= 1/8 it's worth compacting it if ((loh_frag * 8) >= heap_hard_limit) { dprintf (GTC_LOG, ("loh frag: %Id > 1/8 of limit %Id", loh_frag, (heap_hard_limit / 8))); gc_data_global.gen_to_condemn_reasons.set_condition(gen_joined_limit_loh_frag); full_compact_gc_p = true; } else { // If there's not much fragmentation but it looks like it'll be productive to // collect LOH, do that. size_t est_loh_reclaim = get_total_gen_estimated_reclaim (loh_generation); if ((est_loh_reclaim * 8) >= heap_hard_limit) { gc_data_global.gen_to_condemn_reasons.set_condition(gen_joined_limit_loh_reclaim); full_compact_gc_p = true; } dprintf (GTC_LOG, ("loh est reclaim: %Id, 1/8 of limit %Id", est_loh_reclaim, (heap_hard_limit / 8))); } } if (full_compact_gc_p) { n = max_generation; *blocking_collection_p = TRUE; settings.loh_compaction = TRUE; dprintf (GTC_LOG, ("compacting LOH due to hard limit")); } } if ((conserve_mem_setting != 0) && (n == max_generation)) { float frag_limit = 1.0f - conserve_mem_setting / 10.0f; size_t loh_size = get_total_gen_size (loh_generation); size_t gen2_size = get_total_gen_size (max_generation); float loh_frag_ratio = 0.0f; float combined_frag_ratio = 0.0f; if (loh_size != 0) { size_t loh_frag = get_total_gen_fragmentation (loh_generation); size_t gen2_frag = get_total_gen_fragmentation (max_generation); loh_frag_ratio = (float)loh_frag / (float)loh_size; combined_frag_ratio = (float)(gen2_frag + loh_frag) / (float)(gen2_size + loh_size); } if (combined_frag_ratio > frag_limit) { dprintf (GTC_LOG, ("combined frag: %f > limit %f, loh frag: %f", combined_frag_ratio, frag_limit, loh_frag_ratio)); gc_data_global.gen_to_condemn_reasons.set_condition (gen_max_high_frag_p); n = max_generation; *blocking_collection_p = TRUE; if (loh_frag_ratio > frag_limit) { settings.loh_compaction = TRUE; dprintf (GTC_LOG, ("compacting LOH due to GCConserveMem setting")); } } } #ifdef BGC_SERVO_TUNING if (bgc_tuning::should_trigger_ngc2()) { gc_data_global.gen_to_condemn_reasons.set_condition(gen_joined_servo_ngc); n = max_generation; *blocking_collection_p = TRUE; } if ((n < max_generation) && !gc_heap::background_running_p() && bgc_tuning::stepping_trigger (settings.entry_memory_load, get_current_gc_index (max_generation))) { gc_data_global.gen_to_condemn_reasons.set_condition(gen_joined_servo_initial); n = max_generation; saved_bgc_tuning_reason = reason_bgc_stepping; } if ((n < max_generation) && bgc_tuning::should_trigger_bgc()) { gc_data_global.gen_to_condemn_reasons.set_condition(gen_joined_servo_bgc); n = max_generation; } if (n == (max_generation - 1)) { if (bgc_tuning::should_delay_alloc (max_generation)) { gc_data_global.gen_to_condemn_reasons.set_condition(gen_joined_servo_postpone); n -= 1; } } #endif //BGC_SERVO_TUNING if ((n == max_generation) && (*blocking_collection_p == FALSE)) { // If we are doing a gen2 we should reset elevation regardless and let the gen2 // decide if we should lock again or in the bgc case by design we will not retract // gen1 start. settings.should_lock_elevation = FALSE; settings.elevation_locked_count = 0; dprintf (GTC_LOG, ("doing bgc, reset elevation")); } #ifdef STRESS_HEAP #ifdef BACKGROUND_GC // We can only do Concurrent GC Stress if the caller did not explicitly ask for all // generations to be collected, // // [LOCALGC TODO] STRESS_HEAP is not defined for a standalone GC so there are multiple // things that need to be fixed in this code block. if (n_original != max_generation && g_pConfig->GetGCStressLevel() && gc_can_use_concurrent) { #ifndef FEATURE_REDHAWK if (*blocking_collection_p) { // We call StressHeap() a lot for Concurrent GC Stress. However, // if we can not do a concurrent collection, no need to stress anymore. // @TODO: Enable stress when the memory pressure goes down again GCStressPolicy::GlobalDisable(); } else #endif // !FEATURE_REDHAWK { gc_data_global.gen_to_condemn_reasons.set_condition(gen_joined_stress); n = max_generation; } } #endif //BACKGROUND_GC #endif //STRESS_HEAP #ifdef BACKGROUND_GC if ((n == max_generation) && background_running_p()) { n = max_generation - 1; dprintf (GTC_LOG, ("bgc in progress - 1 instead of 2")); } #endif //BACKGROUND_GC return n; } inline size_t get_survived_size (gc_history_per_heap* hist) { size_t surv_size = 0; gc_generation_data* gen_data; for (int gen_number = 0; gen_number < total_generation_count; gen_number++) { gen_data = &(hist->gen_data[gen_number]); surv_size += (gen_data->size_after - gen_data->free_list_space_after - gen_data->free_obj_space_after); } return surv_size; } size_t gc_heap::get_total_survived_size() { size_t total_surv_size = 0; #ifdef MULTIPLE_HEAPS for (int i = 0; i < gc_heap::n_heaps; i++) { gc_heap* hp = gc_heap::g_heaps[i]; gc_history_per_heap* current_gc_data_per_heap = hp->get_gc_data_per_heap(); total_surv_size += get_survived_size (current_gc_data_per_heap); } #else gc_history_per_heap* current_gc_data_per_heap = get_gc_data_per_heap(); total_surv_size = get_survived_size (current_gc_data_per_heap); #endif //MULTIPLE_HEAPS return total_surv_size; } size_t gc_heap::get_total_allocated_since_last_gc() { size_t total_allocated_size = 0; #ifdef MULTIPLE_HEAPS for (int i = 0; i < gc_heap::n_heaps; i++) { gc_heap* hp = gc_heap::g_heaps[i]; #else //MULTIPLE_HEAPS { gc_heap* hp = pGenGCHeap; #endif //MULTIPLE_HEAPS total_allocated_size += hp->allocated_since_last_gc[0] + hp->allocated_since_last_gc[1]; hp->allocated_since_last_gc[0] = 0; hp->allocated_since_last_gc[1] = 0; } return total_allocated_size; } // Gets what's allocated on both SOH, LOH, etc that hasn't been collected. size_t gc_heap::get_current_allocated() { dynamic_data* dd = dynamic_data_of (0); size_t current_alloc = dd_desired_allocation (dd) - dd_new_allocation (dd); for (int i = uoh_start_generation; i < total_generation_count; i++) { dynamic_data* dd = dynamic_data_of (i); current_alloc += dd_desired_allocation (dd) - dd_new_allocation (dd); } return current_alloc; } size_t gc_heap::get_total_allocated() { size_t total_current_allocated = 0; #ifdef MULTIPLE_HEAPS for (int i = 0; i < gc_heap::n_heaps; i++) { gc_heap* hp = gc_heap::g_heaps[i]; total_current_allocated += hp->get_current_allocated(); } #else total_current_allocated = get_current_allocated(); #endif //MULTIPLE_HEAPS return total_current_allocated; } size_t gc_heap::get_total_promoted() { size_t total_promoted_size = 0; int highest_gen = ((settings.condemned_generation == max_generation) ? (total_generation_count - 1) : settings.condemned_generation); #ifdef MULTIPLE_HEAPS for (int i = 0; i < gc_heap::n_heaps; i++) { gc_heap* hp = gc_heap::g_heaps[i]; #else //MULTIPLE_HEAPS { gc_heap* hp = pGenGCHeap; #endif //MULTIPLE_HEAPS for (int gen_number = 0; gen_number <= highest_gen; gen_number++) { total_promoted_size += dd_promoted_size (hp->dynamic_data_of (gen_number)); } } return total_promoted_size; } #ifdef BGC_SERVO_TUNING size_t gc_heap::get_total_generation_size (int gen_number) { size_t total_generation_size = 0; #ifdef MULTIPLE_HEAPS for (int i = 0; i < gc_heap::n_heaps; i++) { gc_heap* hp = gc_heap::g_heaps[i]; #else //MULTIPLE_HEAPS { gc_heap* hp = pGenGCHeap; #endif //MULTIPLE_HEAPS total_generation_size += hp->generation_size (gen_number); } return total_generation_size; } // gets all that's allocated into the gen. This is only used for gen2/3 // for servo tuning. size_t gc_heap::get_total_servo_alloc (int gen_number) { size_t total_alloc = 0; #ifdef MULTIPLE_HEAPS for (int i = 0; i < gc_heap::n_heaps; i++) { gc_heap* hp = gc_heap::g_heaps[i]; #else //MULTIPLE_HEAPS { gc_heap* hp = pGenGCHeap; #endif //MULTIPLE_HEAPS generation* gen = hp->generation_of (gen_number); total_alloc += generation_free_list_allocated (gen); total_alloc += generation_end_seg_allocated (gen); total_alloc += generation_condemned_allocated (gen); total_alloc += generation_sweep_allocated (gen); } return total_alloc; } size_t gc_heap::get_total_bgc_promoted() { size_t total_bgc_promoted = 0; #ifdef MULTIPLE_HEAPS int num_heaps = gc_heap::n_heaps; #else //MULTIPLE_HEAPS int num_heaps = 1; #endif //MULTIPLE_HEAPS for (int i = 0; i < num_heaps; i++) { total_bgc_promoted += bpromoted_bytes (i); } return total_bgc_promoted; } // This is called after compute_new_dynamic_data is called, at which point // dd_current_size is calculated. size_t gc_heap::get_total_surv_size (int gen_number) { size_t total_surv_size = 0; #ifdef MULTIPLE_HEAPS for (int i = 0; i < gc_heap::n_heaps; i++) { gc_heap* hp = gc_heap::g_heaps[i]; #else //MULTIPLE_HEAPS { gc_heap* hp = pGenGCHeap; #endif //MULTIPLE_HEAPS total_surv_size += dd_current_size (hp->dynamic_data_of (gen_number)); } return total_surv_size; } size_t gc_heap::get_total_begin_data_size (int gen_number) { size_t total_begin_data_size = 0; #ifdef MULTIPLE_HEAPS for (int i = 0; i < gc_heap::n_heaps; i++) { gc_heap* hp = gc_heap::g_heaps[i]; #else //MULTIPLE_HEAPS { gc_heap* hp = pGenGCHeap; #endif //MULTIPLE_HEAPS total_begin_data_size += dd_begin_data_size (hp->dynamic_data_of (gen_number)); } return total_begin_data_size; } size_t gc_heap::get_total_generation_fl_size (int gen_number) { size_t total_generation_fl_size = 0; #ifdef MULTIPLE_HEAPS for (int i = 0; i < gc_heap::n_heaps; i++) { gc_heap* hp = gc_heap::g_heaps[i]; #else //MULTIPLE_HEAPS { gc_heap* hp = pGenGCHeap; #endif //MULTIPLE_HEAPS total_generation_fl_size += generation_free_list_space (hp->generation_of (gen_number)); } return total_generation_fl_size; } size_t gc_heap::get_current_gc_index (int gen_number) { #ifdef MULTIPLE_HEAPS gc_heap* hp = gc_heap::g_heaps[0]; return dd_collection_count (hp->dynamic_data_of (gen_number)); #else return dd_collection_count (dynamic_data_of (gen_number)); #endif //MULTIPLE_HEAPS } #endif //BGC_SERVO_TUNING size_t gc_heap::current_generation_size (int gen_number) { dynamic_data* dd = dynamic_data_of (gen_number); size_t gen_size = (dd_current_size (dd) + dd_desired_allocation (dd) - dd_new_allocation (dd)); return gen_size; } #ifdef USE_REGIONS // We may need a new empty region while doing a GC so try to get one now, if we don't have any // reserve in the free region list. bool gc_heap::try_get_new_free_region() { heap_segment* region = 0; if (free_regions[basic_free_region].get_num_free_regions() > 0) { dprintf (REGIONS_LOG, ("h%d has %d free regions %Ix", heap_number, free_regions[basic_free_region].get_num_free_regions(), heap_segment_mem (free_regions[basic_free_region].get_first_free_region()))); return true; } else { region = allocate_new_region (__this, 0, false); if (region) { if (init_table_for_region (0, region)) { return_free_region (region); dprintf (REGIONS_LOG, ("h%d got a new empty region %Ix", heap_number, region)); } else { region = 0; } } } return (region != 0); } bool gc_heap::init_table_for_region (int gen_number, heap_segment* region) { #ifdef BACKGROUND_GC if (is_bgc_in_progress()) { dprintf (GC_TABLE_LOG, ("new seg %Ix, mark_array is %Ix", heap_segment_mem (region), mark_array)); if (!commit_mark_array_new_seg (__this, region)) { dprintf (GC_TABLE_LOG, ("failed to commit mark array for the new region %Ix-%Ix", get_region_start (region), heap_segment_reserved (region))); // We don't have memory to commit the mark array so we cannot use the new region. global_region_allocator.delete_region (get_region_start (region)); return false; } } #endif //BACKGROUND_GC if (gen_number <= max_generation) { size_t first_brick = brick_of (heap_segment_mem (region)); set_brick (first_brick, -1); } else { assert (brick_table[brick_of (heap_segment_mem (region))] == 0); } return true; } #endif //USE_REGIONS #ifdef _PREFAST_ #pragma warning(push) #pragma warning(disable:6326) // "Potential comparison of a constant with another constant" is intentional in this function. #endif //_PREFAST_ /* This is called by when we are actually doing a GC, or when we are just checking whether we would do a full blocking GC, in which case check_only_p is TRUE. The difference between calling this with check_only_p TRUE and FALSE is that when it's TRUE: settings.reason is ignored budgets are not checked (since they are checked before this is called) it doesn't change anything non local like generation_skip_ratio */ int gc_heap::generation_to_condemn (int n_initial, BOOL* blocking_collection_p, BOOL* elevation_requested_p, BOOL check_only_p) { gc_mechanisms temp_settings = settings; gen_to_condemn_tuning temp_condemn_reasons; gc_mechanisms* local_settings = (check_only_p ? &temp_settings : &settings); gen_to_condemn_tuning* local_condemn_reasons = (check_only_p ? &temp_condemn_reasons : &gen_to_condemn_reasons); if (!check_only_p) { if ((local_settings->reason == reason_oos_soh) || (local_settings->reason == reason_oos_loh)) { assert (n_initial >= 1); } assert (settings.reason != reason_empty); } local_condemn_reasons->init(); int n = n_initial; int n_alloc = n; if (heap_number == 0) { dprintf (GTC_LOG, ("init: %d(%d)", n_initial, settings.reason)); } int i = 0; int temp_gen = 0; BOOL low_memory_detected = g_low_memory_status; uint32_t memory_load = 0; uint64_t available_physical = 0; uint64_t available_page_file = 0; BOOL check_memory = FALSE; BOOL high_fragmentation = FALSE; BOOL v_high_memory_load = FALSE; BOOL high_memory_load = FALSE; BOOL low_ephemeral_space = FALSE; BOOL evaluate_elevation = TRUE; *elevation_requested_p = FALSE; *blocking_collection_p = FALSE; BOOL check_max_gen_alloc = TRUE; #ifdef STRESS_HEAP int orig_gen = n; #endif //STRESS_HEAP if (!check_only_p) { dd_fragmentation (dynamic_data_of (0)) = generation_free_list_space (youngest_generation) + generation_free_obj_space (youngest_generation); for (int i = uoh_start_generation; i < total_generation_count; i++) { dd_fragmentation (dynamic_data_of (i)) = generation_free_list_space (generation_of (i)) + generation_free_obj_space (generation_of (i)); } //save new_allocation for (i = 0; i < total_generation_count; i++) { dynamic_data* dd = dynamic_data_of (i); dprintf (GTC_LOG, ("h%d: g%d: l: %Id (%Id)", heap_number, i, dd_new_allocation (dd), dd_desired_allocation (dd))); dd_gc_new_allocation (dd) = dd_new_allocation (dd); } local_condemn_reasons->set_gen (gen_initial, n); temp_gen = n; #ifdef BACKGROUND_GC if (gc_heap::background_running_p() #ifdef BGC_SERVO_TUNING || bgc_tuning::fl_tuning_triggered || (bgc_tuning::enable_fl_tuning && bgc_tuning::use_stepping_trigger_p) #endif //BGC_SERVO_TUNING ) { check_max_gen_alloc = FALSE; } #endif //BACKGROUND_GC if (check_max_gen_alloc) { //figure out if UOH objects need to be collected. for (int i = uoh_start_generation; i < total_generation_count; i++) { if (get_new_allocation (i) <= 0) { n = max_generation; local_condemn_reasons->set_gen (gen_alloc_budget, n); dprintf (BGC_TUNING_LOG, ("BTL[GTC]: trigger based on gen%d b: %Id", (i), get_new_allocation (i))); break; } } } //figure out which generation ran out of allocation for (i = n+1; i <= (check_max_gen_alloc ? max_generation : (max_generation - 1)); i++) { if (get_new_allocation (i) <= 0) { n = i; if (n == max_generation) { dprintf (BGC_TUNING_LOG, ("BTL[GTC]: trigger based on gen2 b: %Id", get_new_allocation (max_generation))); } } else break; } } if (n > temp_gen) { local_condemn_reasons->set_gen (gen_alloc_budget, n); } dprintf (GTC_LOG, ("h%d: g%d budget", heap_number, ((get_new_allocation (loh_generation) <= 0) ? 3 : n))); n_alloc = n; #if defined(BACKGROUND_GC) && !defined(MULTIPLE_HEAPS) //time based tuning // if enough time has elapsed since the last gc // and the number of gc is too low (1/10 of lower gen) then collect // This should also be enabled if we have memory concerns int n_time_max = max_generation; if (!check_only_p) { if (!check_max_gen_alloc) { n_time_max = max_generation - 1; } } if ((local_settings->pause_mode == pause_interactive) || (local_settings->pause_mode == pause_sustained_low_latency)) { dynamic_data* dd0 = dynamic_data_of (0); uint64_t now = GetHighPrecisionTimeStamp(); temp_gen = n; for (i = (temp_gen+1); i <= n_time_max; i++) { dynamic_data* dd = dynamic_data_of (i); if ((now > dd_time_clock(dd) + dd_time_clock_interval(dd)) && (dd_gc_clock (dd0) > (dd_gc_clock (dd) + dd_gc_clock_interval(dd))) && ((n < max_generation) || ((dd_current_size (dd) < dd_max_size (dd0))))) { n = min (i, n_time_max); dprintf (GTC_LOG, ("time %d", n)); } } if (n > temp_gen) { local_condemn_reasons->set_gen (gen_time_tuning, n); if (n == max_generation) { dprintf (BGC_TUNING_LOG, ("BTL[GTC]: trigger based on time")); } } } if (n != n_alloc) { dprintf (GTC_LOG, ("Condemning %d based on time tuning and fragmentation", n)); } #endif //BACKGROUND_GC && !MULTIPLE_HEAPS if (n < (max_generation - 1)) { if (dt_low_card_table_efficiency_p (tuning_deciding_condemned_gen)) { n = max (n, max_generation - 1); local_settings->promotion = TRUE; dprintf (GTC_LOG, ("h%d: skip %d, c %d", heap_number, generation_skip_ratio, n)); local_condemn_reasons->set_condition (gen_low_card_p); } } if (!check_only_p) { generation_skip_ratio = 100; } if (dt_low_ephemeral_space_p (check_only_p ? tuning_deciding_full_gc : tuning_deciding_condemned_gen)) { low_ephemeral_space = TRUE; n = max (n, max_generation - 1); local_condemn_reasons->set_condition (gen_low_ephemeral_p); dprintf (GTC_LOG, ("h%d: low eph", heap_number)); if (!provisional_mode_triggered) { #ifdef BACKGROUND_GC if (!gc_can_use_concurrent || (generation_free_list_space (generation_of (max_generation)) == 0)) #endif //BACKGROUND_GC { //It is better to defragment first if we are running out of space for //the ephemeral generation but we have enough fragmentation to make up for it //in the non ephemeral generation. Essentially we are trading a gen2 for // having to expand heap in ephemeral collections. if (dt_high_frag_p (tuning_deciding_condemned_gen, max_generation - 1, TRUE)) { high_fragmentation = TRUE; local_condemn_reasons->set_condition (gen_max_high_frag_e_p); dprintf (GTC_LOG, ("heap%d: gen1 frag", heap_number)); } } } } #ifdef USE_REGIONS if (!try_get_new_free_region()) { dprintf (GTC_LOG, ("can't get an empty region -> full compacting")); last_gc_before_oom = TRUE; } #endif //USE_REGIONS //figure out which ephemeral generation is too fragmented temp_gen = n; for (i = n+1; i < max_generation; i++) { if (dt_high_frag_p (tuning_deciding_condemned_gen, i)) { dprintf (GTC_LOG, ("h%d g%d too frag", heap_number, i)); n = i; } else break; } if (low_ephemeral_space) { //enable promotion local_settings->promotion = TRUE; } if (n > temp_gen) { local_condemn_reasons->set_condition (gen_eph_high_frag_p); } if (!check_only_p) { if (settings.pause_mode == pause_low_latency) { if (!is_induced (settings.reason)) { n = min (n, max_generation - 1); dprintf (GTC_LOG, ("low latency mode is enabled, condemning %d", n)); evaluate_elevation = FALSE; goto exit; } } } // It's hard to catch when we get to the point that the memory load is so high // we get an induced GC from the finalizer thread so we are checking the memory load // for every gen0 GC. check_memory = (check_only_p ? (n >= 0) : ((n >= 1) || low_memory_detected)); if (check_memory) { //find out if we are short on memory get_memory_info (&memory_load, &available_physical, &available_page_file); if (heap_number == 0) { dprintf (GTC_LOG, ("ml: %d", memory_load)); } #ifdef USE_REGIONS // For regions we want to take the VA range into consideration as well. uint32_t va_memory_load = global_region_allocator.get_va_memory_load(); if (heap_number == 0) { dprintf (GTC_LOG, ("h%d ML %d, va ML %d", heap_number, memory_load, va_memory_load)); } memory_load = max (memory_load, va_memory_load); #endif //USE_REGIONS // Need to get it early enough for all heaps to use. local_settings->entry_available_physical_mem = available_physical; local_settings->entry_memory_load = memory_load; // @TODO: Force compaction more often under GCSTRESS if (memory_load >= high_memory_load_th || low_memory_detected) { #ifdef SIMPLE_DPRINTF // stress log can't handle any parameter that's bigger than a void*. if (heap_number == 0) { dprintf (GTC_LOG, ("tp: %I64d, ap: %I64d", total_physical_mem, available_physical)); } #endif //SIMPLE_DPRINTF high_memory_load = TRUE; if (memory_load >= v_high_memory_load_th || low_memory_detected) { // TODO: Perhaps in 64-bit we should be estimating gen1's fragmentation as well since // gen1/gen0 may take a lot more memory than gen2. if (!high_fragmentation) { high_fragmentation = dt_estimate_reclaim_space_p (tuning_deciding_condemned_gen, max_generation); } v_high_memory_load = TRUE; } else { if (!high_fragmentation) { high_fragmentation = dt_estimate_high_frag_p (tuning_deciding_condemned_gen, max_generation, available_physical); } } if (high_fragmentation) { if (high_memory_load) { local_condemn_reasons->set_condition (gen_max_high_frag_m_p); } else if (v_high_memory_load) { local_condemn_reasons->set_condition (gen_max_high_frag_vm_p); } } } } dprintf (GTC_LOG, ("h%d: le: %d, hm: %d, vm: %d, f: %d", heap_number, low_ephemeral_space, high_memory_load, v_high_memory_load, high_fragmentation)); if (should_expand_in_full_gc) { dprintf (GTC_LOG, ("h%d: expand_in_full - BLOCK", heap_number)); *blocking_collection_p = TRUE; evaluate_elevation = FALSE; n = max_generation; local_condemn_reasons->set_condition (gen_expand_fullgc_p); } if (last_gc_before_oom) { dprintf (GTC_LOG, ("h%d: alloc full - BLOCK", heap_number)); n = max_generation; *blocking_collection_p = TRUE; if ((local_settings->reason == reason_oos_loh) || (local_settings->reason == reason_alloc_loh)) { evaluate_elevation = FALSE; } local_condemn_reasons->set_condition (gen_before_oom); } if (!check_only_p) { if (is_induced_blocking (settings.reason) && n_initial == max_generation IN_STRESS_HEAP( && !settings.stress_induced )) { if (heap_number == 0) { dprintf (GTC_LOG, ("induced - BLOCK")); } *blocking_collection_p = TRUE; local_condemn_reasons->set_condition (gen_induced_fullgc_p); evaluate_elevation = FALSE; } if (settings.reason == reason_induced_noforce) { local_condemn_reasons->set_condition (gen_induced_noforce_p); evaluate_elevation = FALSE; } } if (!provisional_mode_triggered && evaluate_elevation && (low_ephemeral_space || high_memory_load || v_high_memory_load)) { *elevation_requested_p = TRUE; #ifdef HOST_64BIT // if we are in high memory load and have consumed 10% of the gen2 budget, do a gen2 now. if (high_memory_load || v_high_memory_load) { dynamic_data* dd_max = dynamic_data_of (max_generation); if (((float)dd_new_allocation (dd_max) / (float)dd_desired_allocation (dd_max)) < 0.9) { dprintf (GTC_LOG, ("%Id left in gen2 alloc (%Id)", dd_new_allocation (dd_max), dd_desired_allocation (dd_max))); n = max_generation; local_condemn_reasons->set_condition (gen_almost_max_alloc); } } if (n <= max_generation) { #endif // HOST_64BIT if (high_fragmentation) { //elevate to max_generation n = max_generation; dprintf (GTC_LOG, ("h%d: f full", heap_number)); #ifdef BACKGROUND_GC if (high_memory_load || v_high_memory_load) { // For background GC we want to do blocking collections more eagerly because we don't // want to get into the situation where the memory load becomes high while we are in // a background GC and we'd have to wait for the background GC to finish to start // a blocking collection (right now the implemenation doesn't handle converting // a background GC to a blocking collection midway. dprintf (GTC_LOG, ("h%d: bgc - BLOCK", heap_number)); *blocking_collection_p = TRUE; } #else if (v_high_memory_load) { dprintf (GTC_LOG, ("h%d: - BLOCK", heap_number)); *blocking_collection_p = TRUE; } #endif //BACKGROUND_GC } else { n = max (n, max_generation - 1); dprintf (GTC_LOG, ("h%d: nf c %d", heap_number, n)); } #ifdef HOST_64BIT } #endif // HOST_64BIT } if (!provisional_mode_triggered && (n == (max_generation - 1)) && (n_alloc < (max_generation -1))) { #ifdef BGC_SERVO_TUNING if (!bgc_tuning::enable_fl_tuning) #endif //BGC_SERVO_TUNING { dprintf (GTC_LOG, ("h%d: budget %d, check 2", heap_number, n_alloc)); if (get_new_allocation (max_generation) <= 0) { dprintf (GTC_LOG, ("h%d: budget alloc", heap_number)); n = max_generation; local_condemn_reasons->set_condition (gen_max_gen1); } } } //figure out if max_generation is too fragmented -> blocking collection if (!provisional_mode_triggered #ifdef BGC_SERVO_TUNING && !bgc_tuning::enable_fl_tuning #endif //BGC_SERVO_TUNING && (n == max_generation)) { if (dt_high_frag_p (tuning_deciding_condemned_gen, n)) { dprintf (GTC_LOG, ("h%d: g%d too frag", heap_number, n)); local_condemn_reasons->set_condition (gen_max_high_frag_p); if (local_settings->pause_mode != pause_sustained_low_latency) { *blocking_collection_p = TRUE; } } } #ifdef BACKGROUND_GC if ((n == max_generation) && !(*blocking_collection_p)) { if (heap_number == 0) { BOOL bgc_heap_too_small = TRUE; size_t gen2size = 0; size_t gen3size = 0; #ifdef MULTIPLE_HEAPS for (int i = 0; i < n_heaps; i++) { if (((g_heaps[i]->current_generation_size (max_generation)) > bgc_min_per_heap) || ((g_heaps[i]->current_generation_size (loh_generation)) > bgc_min_per_heap) || ((g_heaps[i]->current_generation_size (poh_generation)) > bgc_min_per_heap)) { bgc_heap_too_small = FALSE; break; } } #else //MULTIPLE_HEAPS if ((current_generation_size (max_generation) > bgc_min_per_heap) || (current_generation_size (loh_generation) > bgc_min_per_heap) || (current_generation_size (poh_generation) > bgc_min_per_heap)) { bgc_heap_too_small = FALSE; } #endif //MULTIPLE_HEAPS if (bgc_heap_too_small) { dprintf (GTC_LOG, ("gen2 and gen3 too small")); #ifdef STRESS_HEAP // do not turn stress-induced collections into blocking GCs if (!settings.stress_induced) #endif //STRESS_HEAP { *blocking_collection_p = TRUE; } local_condemn_reasons->set_condition (gen_gen2_too_small); } } } #endif //BACKGROUND_GC exit: if (!check_only_p) { #ifdef STRESS_HEAP #ifdef BACKGROUND_GC // We can only do Concurrent GC Stress if the caller did not explicitly ask for all // generations to be collected, if (orig_gen != max_generation && g_pConfig->GetGCStressLevel() && gc_can_use_concurrent) { *elevation_requested_p = FALSE; } #endif //BACKGROUND_GC #endif //STRESS_HEAP if (check_memory) { fgm_result.available_pagefile_mb = (size_t)(available_page_file / (1024 * 1024)); } local_condemn_reasons->set_gen (gen_final_per_heap, n); get_gc_data_per_heap()->gen_to_condemn_reasons.init (local_condemn_reasons); #ifdef DT_LOG local_condemn_reasons->print (heap_number); #endif //DT_LOG if ((local_settings->reason == reason_oos_soh) || (local_settings->reason == reason_oos_loh)) { assert (n >= 1); } } return n; } #ifdef _PREFAST_ #pragma warning(pop) #endif //_PREFAST_ inline size_t gc_heap::min_reclaim_fragmentation_threshold (uint32_t num_heaps) { // if the memory load is higher, the threshold we'd want to collect gets lower. size_t min_mem_based_on_available = (500 - (settings.entry_memory_load - high_memory_load_th) * 40) * 1024 * 1024 / num_heaps; size_t ten_percent_size = (size_t)((float)generation_size (max_generation) * 0.10); uint64_t three_percent_mem = mem_one_percent * 3 / num_heaps; #ifdef SIMPLE_DPRINTF dprintf (GTC_LOG, ("min av: %Id, 10%% gen2: %Id, 3%% mem: %I64d", min_mem_based_on_available, ten_percent_size, three_percent_mem)); #endif //SIMPLE_DPRINTF return (size_t)(min (min_mem_based_on_available, min (ten_percent_size, three_percent_mem))); } inline uint64_t gc_heap::min_high_fragmentation_threshold(uint64_t available_mem, uint32_t num_heaps) { return min (available_mem, (256*1024*1024)) / num_heaps; } enum { CORINFO_EXCEPTION_GC = 0xE0004743 // 'GC' }; #ifdef BACKGROUND_GC void gc_heap::init_background_gc () { //reset the allocation so foreground gc can allocate into older (max_generation) generation generation* gen = generation_of (max_generation); generation_allocation_pointer (gen)= 0; generation_allocation_limit (gen) = 0; generation_allocation_segment (gen) = heap_segment_rw (generation_start_segment (gen)); PREFIX_ASSUME(generation_allocation_segment(gen) != NULL); #ifdef DOUBLY_LINKED_FL generation_set_bgc_mark_bit_p (gen) = FALSE; #endif //DOUBLY_LINKED_FL #ifndef USE_REGIONS //reset the plan allocation for each segment for (heap_segment* seg = generation_allocation_segment (gen); seg != ephemeral_heap_segment; seg = heap_segment_next_rw (seg)) { heap_segment_plan_allocated (seg) = heap_segment_allocated (seg); } #endif //!USE_REGIONS if (heap_number == 0) { dprintf (2, ("heap%d: bgc lowest: %Ix, highest: %Ix", heap_number, background_saved_lowest_address, background_saved_highest_address)); } } #endif //BACKGROUND_GC inline void fire_drain_mark_list_event (size_t mark_list_objects) { FIRE_EVENT(BGCDrainMark, mark_list_objects); } inline void fire_revisit_event (size_t dirtied_pages, size_t marked_objects, BOOL large_objects_p) { FIRE_EVENT(BGCRevisit, dirtied_pages, marked_objects, large_objects_p); } inline void fire_overflow_event (uint8_t* overflow_min, uint8_t* overflow_max, size_t marked_objects, int gen_number) { FIRE_EVENT(BGCOverflow_V1, (uint64_t)overflow_min, (uint64_t)overflow_max, marked_objects, gen_number == loh_generation, gen_number); } void gc_heap::concurrent_print_time_delta (const char* msg) { #ifdef TRACE_GC uint64_t current_time = GetHighPrecisionTimeStamp(); size_t elapsed_time_ms = (size_t)((current_time - time_bgc_last) / 1000); time_bgc_last = current_time; dprintf (2, ("h%d: %s T %Id ms", heap_number, msg, elapsed_time_ms)); #else UNREFERENCED_PARAMETER(msg); #endif //TRACE_GC } void gc_heap::free_list_info (int gen_num, const char* msg) { #if defined (BACKGROUND_GC) && defined (TRACE_GC) dprintf (3, ("h%d: %s", heap_number, msg)); for (int i = 0; i < total_generation_count; i++) { generation* gen = generation_of (i); if ((generation_allocation_size (gen) == 0) && (generation_free_list_space (gen) == 0) && (generation_free_obj_space (gen) == 0)) { // don't print if everything is 0. } else { dprintf (3, ("h%d: g%d: a-%Id, fl-%Id, fo-%Id", heap_number, i, generation_allocation_size (gen), generation_free_list_space (gen), generation_free_obj_space (gen))); } } #else UNREFERENCED_PARAMETER(gen_num); UNREFERENCED_PARAMETER(msg); #endif // BACKGROUND_GC && TRACE_GC } void gc_heap::update_collection_counts_for_no_gc() { assert (settings.pause_mode == pause_no_gc); settings.condemned_generation = max_generation; #ifdef MULTIPLE_HEAPS for (int i = 0; i < n_heaps; i++) g_heaps[i]->update_collection_counts(); #else //MULTIPLE_HEAPS update_collection_counts(); #endif //MULTIPLE_HEAPS full_gc_counts[gc_type_blocking]++; } BOOL gc_heap::should_proceed_with_gc() { if (gc_heap::settings.pause_mode == pause_no_gc) { if (current_no_gc_region_info.started) { // The no_gc mode was already in progress yet we triggered another GC, // this effectively exits the no_gc mode. restore_data_for_no_gc(); } else return should_proceed_for_no_gc(); } return TRUE; } void gc_heap::update_end_gc_time_per_heap() { for (int gen_number = 0; gen_number <= settings.condemned_generation; gen_number++) { dynamic_data* dd = dynamic_data_of (gen_number); dd_gc_elapsed_time (dd) = (size_t)(end_gc_time - dd_time_clock (dd)); } } void gc_heap::update_end_ngc_time() { end_gc_time = GetHighPrecisionTimeStamp(); #ifdef HEAP_BALANCE_INSTRUMENTATION last_gc_end_time_us = end_gc_time; dprintf (HEAP_BALANCE_LOG, ("[GC#%Id-%Id-%Id]", settings.gc_index, (last_gc_end_time_us - dd_time_clock (dynamic_data_of (0))), dd_time_clock (dynamic_data_of (0)))); #endif //HEAP_BALANCE_INSTRUMENTATION } size_t gc_heap::exponential_smoothing (int gen, size_t collection_count, size_t desired_per_heap) { // to avoid spikes in mem usage due to short terms fluctuations in survivorship, // apply some smoothing. size_t smoothing = min(3, collection_count); size_t new_smoothed_desired_per_heap = desired_per_heap / smoothing + ((smoothed_desired_per_heap[gen] / smoothing) * (smoothing - 1)); dprintf (2, ("new smoothed_desired_per_heap for gen %d = %Id, desired_per_heap = %Id", gen, new_smoothed_desired_per_heap, desired_per_heap)); smoothed_desired_per_heap[gen] = new_smoothed_desired_per_heap; return Align (smoothed_desired_per_heap[gen], get_alignment_constant (gen <= soh_gen2)); } //internal part of gc used by the serial and concurrent version void gc_heap::gc1() { #ifdef BACKGROUND_GC assert (settings.concurrent == (uint32_t)(bgc_thread_id.IsCurrentThread())); #endif //BACKGROUND_GC verify_soh_segment_list(); int n = settings.condemned_generation; if (settings.reason == reason_pm_full_gc) { assert (n == max_generation); init_records(); gen_to_condemn_tuning* local_condemn_reasons = &(get_gc_data_per_heap()->gen_to_condemn_reasons); local_condemn_reasons->init(); local_condemn_reasons->set_gen (gen_initial, n); local_condemn_reasons->set_gen (gen_final_per_heap, n); } update_collection_counts (); #ifdef BACKGROUND_GC bgc_alloc_lock->check(); #endif //BACKGROUND_GC free_list_info (max_generation, "beginning"); vm_heap->GcCondemnedGeneration = settings.condemned_generation; assert (g_gc_card_table == card_table); #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES assert (g_gc_card_bundle_table == card_bundle_table); #endif { #ifndef USE_REGIONS if (n == max_generation) { gc_low = lowest_address; gc_high = highest_address; } else { gc_low = generation_allocation_start (generation_of (n)); gc_high = heap_segment_reserved (ephemeral_heap_segment); } #endif //USE_REGIONS #ifdef BACKGROUND_GC if (settings.concurrent) { #ifdef TRACE_GC time_bgc_last = GetHighPrecisionTimeStamp(); #endif //TRACE_GC FIRE_EVENT(BGCBegin); concurrent_print_time_delta ("BGC"); concurrent_print_time_delta ("RW"); background_mark_phase(); free_list_info (max_generation, "after mark phase"); background_sweep(); free_list_info (max_generation, "after sweep phase"); } else #endif //BACKGROUND_GC { mark_phase (n, FALSE); GCScan::GcRuntimeStructuresValid (FALSE); plan_phase (n); GCScan::GcRuntimeStructuresValid (TRUE); } } //adjust the allocation size from the pinned quantities. for (int gen_number = 0; gen_number <= min (max_generation,n+1); gen_number++) { generation* gn = generation_of (gen_number); if (settings.compaction) { generation_pinned_allocated (gn) += generation_pinned_allocation_compact_size (gn); generation_allocation_size (generation_of (gen_number)) += generation_pinned_allocation_compact_size (gn); } else { generation_pinned_allocated (gn) += generation_pinned_allocation_sweep_size (gn); generation_allocation_size (generation_of (gen_number)) += generation_pinned_allocation_sweep_size (gn); } generation_pinned_allocation_sweep_size (gn) = 0; generation_pinned_allocation_compact_size (gn) = 0; } #ifdef BACKGROUND_GC if (settings.concurrent) { dynamic_data* dd = dynamic_data_of (n); end_gc_time = GetHighPrecisionTimeStamp(); dd_gc_elapsed_time (dd) = (size_t)(end_gc_time - dd_time_clock (dd)); #ifdef HEAP_BALANCE_INSTRUMENTATION if (heap_number == 0) { last_gc_end_time_us = end_gc_time; dprintf (HEAP_BALANCE_LOG, ("[GC#%Id-%Id-BGC]", settings.gc_index, dd_gc_elapsed_time (dd))); } #endif //HEAP_BALANCE_INSTRUMENTATION free_list_info (max_generation, "after computing new dynamic data"); gc_history_per_heap* current_gc_data_per_heap = get_gc_data_per_heap(); for (int gen_number = 0; gen_number < max_generation; gen_number++) { dprintf (2, ("end of BGC: gen%d new_alloc: %Id", gen_number, dd_desired_allocation (dynamic_data_of (gen_number)))); current_gc_data_per_heap->gen_data[gen_number].size_after = generation_size (gen_number); current_gc_data_per_heap->gen_data[gen_number].free_list_space_after = generation_free_list_space (generation_of (gen_number)); current_gc_data_per_heap->gen_data[gen_number].free_obj_space_after = generation_free_obj_space (generation_of (gen_number)); } } else #endif //BACKGROUND_GC { free_list_info (max_generation, "end"); for (int gen_number = 0; gen_number <= n; gen_number++) { compute_new_dynamic_data (gen_number); } if (n != max_generation) { for (int gen_number = (n + 1); gen_number < total_generation_count; gen_number++) { get_gc_data_per_heap()->gen_data[gen_number].size_after = generation_size (gen_number); get_gc_data_per_heap()->gen_data[gen_number].free_list_space_after = generation_free_list_space (generation_of (gen_number)); get_gc_data_per_heap()->gen_data[gen_number].free_obj_space_after = generation_free_obj_space (generation_of (gen_number)); } } get_gc_data_per_heap()->maxgen_size_info.running_free_list_efficiency = (uint32_t)(generation_allocator_efficiency (generation_of (max_generation)) * 100); free_list_info (max_generation, "after computing new dynamic data"); } if (n < max_generation) { compute_promoted_allocation (1 + n); dynamic_data* dd = dynamic_data_of (1 + n); size_t new_fragmentation = generation_free_list_space (generation_of (1 + n)) + generation_free_obj_space (generation_of (1 + n)); #ifdef BACKGROUND_GC if (current_c_gc_state != c_gc_state_planning) #endif //BACKGROUND_GC { if (settings.promotion) { dd_fragmentation (dd) = new_fragmentation; } else { //assert (dd_fragmentation (dd) == new_fragmentation); } } } #ifdef BACKGROUND_GC if (!settings.concurrent) #endif //BACKGROUND_GC { #ifndef FEATURE_REDHAWK // GCToEEInterface::IsGCThread() always returns false on CoreRT, but this assert is useful in CoreCLR. assert(GCToEEInterface::IsGCThread()); #endif // FEATURE_REDHAWK adjust_ephemeral_limits(); } #if defined(BACKGROUND_GC) && !defined(USE_REGIONS) assert (ephemeral_low == generation_allocation_start (generation_of ( max_generation -1))); assert (ephemeral_high == heap_segment_reserved (ephemeral_heap_segment)); #endif //BACKGROUND_GC && !USE_REGIONS if (fgn_maxgen_percent) { if (settings.condemned_generation == (max_generation - 1)) { check_for_full_gc (max_generation - 1, 0); } else if (settings.condemned_generation == max_generation) { if (full_gc_approach_event_set #ifdef MULTIPLE_HEAPS && (heap_number == 0) #endif //MULTIPLE_HEAPS ) { dprintf (2, ("FGN-GC: setting gen2 end event")); full_gc_approach_event.Reset(); #ifdef BACKGROUND_GC // By definition WaitForFullGCComplete only succeeds if it's full, *blocking* GC, otherwise need to return N/A fgn_last_gc_was_concurrent = settings.concurrent ? TRUE : FALSE; #endif //BACKGROUND_GC full_gc_end_event.Set(); full_gc_approach_event_set = false; } } } #ifdef BACKGROUND_GC if (!settings.concurrent) #endif //BACKGROUND_GC { //decide on the next allocation quantum if (alloc_contexts_used >= 1) { allocation_quantum = Align (min ((size_t)CLR_SIZE, (size_t)max (1024, get_new_allocation (0) / (2 * alloc_contexts_used))), get_alignment_constant(FALSE)); dprintf (3, ("New allocation quantum: %d(0x%Ix)", allocation_quantum, allocation_quantum)); } } descr_generations ("END"); verify_soh_segment_list(); #ifdef BACKGROUND_GC if (gc_can_use_concurrent) { check_bgc_mark_stack_length(); } assert (settings.concurrent == (uint32_t)(bgc_thread_id.IsCurrentThread())); #endif //BACKGROUND_GC #if defined(VERIFY_HEAP) || (defined (FEATURE_EVENT_TRACE) && defined(BACKGROUND_GC)) if (FALSE #ifdef VERIFY_HEAP // Note that right now g_pConfig->GetHeapVerifyLevel always returns the same // value. If we ever allow randomly adjusting this as the process runs, // we cannot call it this way as joins need to match - we must have the same // value for all heaps like we do with bgc_heap_walk_for_etw_p. || (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_GC) #endif #if defined(FEATURE_EVENT_TRACE) && defined(BACKGROUND_GC) || (bgc_heap_walk_for_etw_p && settings.concurrent) #endif ) { #ifdef BACKGROUND_GC bool cooperative_mode = true; if (settings.concurrent) { cooperative_mode = enable_preemptive (); #ifdef MULTIPLE_HEAPS bgc_t_join.join(this, gc_join_suspend_ee_verify); if (bgc_t_join.joined()) { bgc_threads_sync_event.Reset(); dprintf(2, ("Joining BGC threads to suspend EE for verify heap")); bgc_t_join.restart(); } if (heap_number == 0) { // need to take the gc_lock in preparation for verify_heap below // *before* we suspend the EE, otherwise we get a deadlock enter_gc_lock_for_verify_heap(); suspend_EE(); bgc_threads_sync_event.Set(); } else { bgc_threads_sync_event.Wait(INFINITE, FALSE); dprintf (2, ("bgc_threads_sync_event is signalled")); } #else //MULTIPLE_HEAPS // need to take the gc_lock in preparation for verify_heap below // *before* we suspend the EE, otherwise we get a deadlock enter_gc_lock_for_verify_heap(); suspend_EE(); #endif //MULTIPLE_HEAPS //fix the allocation area so verify_heap can proceed. fix_allocation_contexts (FALSE); } #endif //BACKGROUND_GC #ifdef BACKGROUND_GC assert (settings.concurrent == (uint32_t)(bgc_thread_id.IsCurrentThread())); #ifdef FEATURE_EVENT_TRACE if (bgc_heap_walk_for_etw_p && settings.concurrent) { GCToEEInterface::DiagWalkBGCSurvivors(__this); #ifdef MULTIPLE_HEAPS bgc_t_join.join(this, gc_join_after_profiler_heap_walk); if (bgc_t_join.joined()) { bgc_t_join.restart(); } #endif // MULTIPLE_HEAPS } #endif // FEATURE_EVENT_TRACE #endif //BACKGROUND_GC #ifdef VERIFY_HEAP if (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_GC) verify_heap (FALSE); #endif // VERIFY_HEAP #ifdef BACKGROUND_GC if (settings.concurrent) { repair_allocation_contexts (TRUE); #ifdef MULTIPLE_HEAPS bgc_t_join.join(this, gc_join_restart_ee_verify); if (bgc_t_join.joined()) { bgc_threads_sync_event.Reset(); dprintf(2, ("Joining BGC threads to restart EE after verify heap")); bgc_t_join.restart(); } if (heap_number == 0) { restart_EE(); leave_gc_lock_for_verify_heap(); bgc_threads_sync_event.Set(); } else { bgc_threads_sync_event.Wait(INFINITE, FALSE); dprintf (2, ("bgc_threads_sync_event is signalled")); } #else //MULTIPLE_HEAPS restart_EE(); leave_gc_lock_for_verify_heap(); #endif //MULTIPLE_HEAPS disable_preemptive (cooperative_mode); } #endif //BACKGROUND_GC } #endif //VERIFY_HEAP || (FEATURE_EVENT_TRACE && BACKGROUND_GC) #ifdef MULTIPLE_HEAPS if (!settings.concurrent) { gc_t_join.join(this, gc_join_done); if (gc_t_join.joined ()) { gc_heap::internal_gc_done = false; //equalize the new desired size of the generations int limit = settings.condemned_generation; if (limit == max_generation) { limit = total_generation_count-1; } for (int gen = 0; gen <= limit; gen++) { size_t total_desired = 0; for (int i = 0; i < gc_heap::n_heaps; i++) { gc_heap* hp = gc_heap::g_heaps[i]; dynamic_data* dd = hp->dynamic_data_of (gen); size_t temp_total_desired = total_desired + dd_desired_allocation (dd); if (temp_total_desired < total_desired) { // we overflowed. total_desired = (size_t)MAX_PTR; break; } total_desired = temp_total_desired; } size_t desired_per_heap = Align (total_desired/gc_heap::n_heaps, get_alignment_constant (gen <= max_generation)); if (gen == 0) { #if 1 //subsumed by the linear allocation model // to avoid spikes in mem usage due to short terms fluctuations in survivorship, // apply some smoothing. desired_per_heap = exponential_smoothing (gen, dd_collection_count (dynamic_data_of(gen)), desired_per_heap); #endif //0 if (!heap_hard_limit) { // if desired_per_heap is close to min_gc_size, trim it // down to min_gc_size to stay in the cache gc_heap* hp = gc_heap::g_heaps[0]; dynamic_data* dd = hp->dynamic_data_of (gen); size_t min_gc_size = dd_min_size(dd); // if min GC size larger than true on die cache, then don't bother // limiting the desired size if ((min_gc_size <= GCToOSInterface::GetCacheSizePerLogicalCpu(TRUE)) && desired_per_heap <= 2*min_gc_size) { desired_per_heap = min_gc_size; } } #ifdef HOST_64BIT desired_per_heap = joined_youngest_desired (desired_per_heap); dprintf (2, ("final gen0 new_alloc: %Id", desired_per_heap)); #endif // HOST_64BIT gc_data_global.final_youngest_desired = desired_per_heap; } #if 1 //subsumed by the linear allocation model if (gen >= uoh_start_generation) { // to avoid spikes in mem usage due to short terms fluctuations in survivorship, // apply some smoothing. desired_per_heap = exponential_smoothing (gen, dd_collection_count (dynamic_data_of (max_generation)), desired_per_heap); } #endif //0 for (int i = 0; i < gc_heap::n_heaps; i++) { gc_heap* hp = gc_heap::g_heaps[i]; dynamic_data* dd = hp->dynamic_data_of (gen); dd_desired_allocation (dd) = desired_per_heap; dd_gc_new_allocation (dd) = desired_per_heap; dd_new_allocation (dd) = desired_per_heap; if (gen == 0) { hp->fgn_last_alloc = desired_per_heap; } } } #ifdef FEATURE_LOH_COMPACTION BOOL all_heaps_compacted_p = TRUE; #endif //FEATURE_LOH_COMPACTION int max_gen0_must_clear_bricks = 0; for (int i = 0; i < gc_heap::n_heaps; i++) { gc_heap* hp = gc_heap::g_heaps[i]; hp->decommit_ephemeral_segment_pages(); hp->rearrange_uoh_segments(); #ifdef FEATURE_LOH_COMPACTION all_heaps_compacted_p &= hp->loh_compacted_p; #endif //FEATURE_LOH_COMPACTION // compute max of gen0_must_clear_bricks over all heaps max_gen0_must_clear_bricks = max(max_gen0_must_clear_bricks, hp->gen0_must_clear_bricks); } #ifdef USE_REGIONS distribute_free_regions(); #endif //USE_REGIONS #ifdef FEATURE_LOH_COMPACTION check_loh_compact_mode (all_heaps_compacted_p); #endif //FEATURE_LOH_COMPACTION // if max_gen0_must_clear_bricks > 0, distribute to all heaps - // if one heap encountered an interior pointer during this GC, // the next GC might see one on another heap if (max_gen0_must_clear_bricks > 0) { for (int i = 0; i < gc_heap::n_heaps; i++) { gc_heap* hp = gc_heap::g_heaps[i]; hp->gen0_must_clear_bricks = max_gen0_must_clear_bricks; } } fire_pevents(); update_end_ngc_time(); pm_full_gc_init_or_clear(); gc_t_join.restart(); } update_end_gc_time_per_heap(); add_to_history_per_heap(); alloc_context_count = 0; heap_select::mark_heap (heap_number); } #else //MULTIPLE_HEAPS gc_data_global.final_youngest_desired = dd_desired_allocation (dynamic_data_of (0)); #ifdef FEATURE_LOH_COMPACTION check_loh_compact_mode (loh_compacted_p); #endif //FEATURE_LOH_COMPACTION decommit_ephemeral_segment_pages(); #ifdef USE_REGIONS distribute_free_regions(); #endif //USE_REGIONS fire_pevents(); if (!(settings.concurrent)) { rearrange_uoh_segments(); update_end_ngc_time(); update_end_gc_time_per_heap(); add_to_history_per_heap(); do_post_gc(); } pm_full_gc_init_or_clear(); #ifdef BACKGROUND_GC recover_bgc_settings(); #endif //BACKGROUND_GC #endif //MULTIPLE_HEAPS } void gc_heap::save_data_for_no_gc() { current_no_gc_region_info.saved_pause_mode = settings.pause_mode; #ifdef MULTIPLE_HEAPS // This is to affect heap balancing. for (int i = 0; i < n_heaps; i++) { current_no_gc_region_info.saved_gen0_min_size = dd_min_size (g_heaps[i]->dynamic_data_of (0)); dd_min_size (g_heaps[i]->dynamic_data_of (0)) = min_balance_threshold; current_no_gc_region_info.saved_gen3_min_size = dd_min_size (g_heaps[i]->dynamic_data_of (loh_generation)); dd_min_size (g_heaps[i]->dynamic_data_of (loh_generation)) = 0; } #endif //MULTIPLE_HEAPS } void gc_heap::restore_data_for_no_gc() { gc_heap::settings.pause_mode = current_no_gc_region_info.saved_pause_mode; #ifdef MULTIPLE_HEAPS for (int i = 0; i < n_heaps; i++) { dd_min_size (g_heaps[i]->dynamic_data_of (0)) = current_no_gc_region_info.saved_gen0_min_size; dd_min_size (g_heaps[i]->dynamic_data_of (loh_generation)) = current_no_gc_region_info.saved_gen3_min_size; } #endif //MULTIPLE_HEAPS } start_no_gc_region_status gc_heap::prepare_for_no_gc_region (uint64_t total_size, BOOL loh_size_known, uint64_t loh_size, BOOL disallow_full_blocking) { if (current_no_gc_region_info.started) { return start_no_gc_in_progress; } start_no_gc_region_status status = start_no_gc_success; save_data_for_no_gc(); settings.pause_mode = pause_no_gc; current_no_gc_region_info.start_status = start_no_gc_success; uint64_t allocation_no_gc_loh = 0; uint64_t allocation_no_gc_soh = 0; assert(total_size != 0); if (loh_size_known) { assert(loh_size != 0); assert(loh_size <= total_size); allocation_no_gc_loh = loh_size; allocation_no_gc_soh = total_size - loh_size; } else { allocation_no_gc_soh = total_size; allocation_no_gc_loh = total_size; } int soh_align_const = get_alignment_constant (TRUE); #ifdef USE_REGIONS size_t max_soh_allocated = SIZE_T_MAX; #else size_t max_soh_allocated = soh_segment_size - segment_info_size - eph_gen_starts_size; #endif size_t size_per_heap = 0; const double scale_factor = 1.05; int num_heaps = get_num_heaps(); uint64_t total_allowed_soh_allocation = (uint64_t)max_soh_allocated * num_heaps; // [LOCALGC TODO] // In theory, the upper limit here is the physical memory of the machine, not // SIZE_T_MAX. This is not true today because total_physical_mem can be // larger than SIZE_T_MAX if running in wow64 on a machine with more than // 4GB of RAM. Once Local GC code divergence is resolved and code is flowing // more freely between branches, it would be good to clean this up to use // total_physical_mem instead of SIZE_T_MAX. assert(total_allowed_soh_allocation <= SIZE_T_MAX); uint64_t total_allowed_loh_allocation = SIZE_T_MAX; uint64_t total_allowed_soh_alloc_scaled = allocation_no_gc_soh > 0 ? static_cast(total_allowed_soh_allocation / scale_factor) : 0; uint64_t total_allowed_loh_alloc_scaled = allocation_no_gc_loh > 0 ? static_cast(total_allowed_loh_allocation / scale_factor) : 0; if (allocation_no_gc_soh > total_allowed_soh_alloc_scaled || allocation_no_gc_loh > total_allowed_loh_alloc_scaled) { status = start_no_gc_too_large; goto done; } if (allocation_no_gc_soh > 0) { allocation_no_gc_soh = static_cast(allocation_no_gc_soh * scale_factor); allocation_no_gc_soh = min (allocation_no_gc_soh, total_allowed_soh_alloc_scaled); } if (allocation_no_gc_loh > 0) { allocation_no_gc_loh = static_cast(allocation_no_gc_loh * scale_factor); allocation_no_gc_loh = min (allocation_no_gc_loh, total_allowed_loh_alloc_scaled); } if (disallow_full_blocking) current_no_gc_region_info.minimal_gc_p = TRUE; if (allocation_no_gc_soh != 0) { current_no_gc_region_info.soh_allocation_size = (size_t)allocation_no_gc_soh; size_per_heap = current_no_gc_region_info.soh_allocation_size; #ifdef MULTIPLE_HEAPS size_per_heap /= n_heaps; for (int i = 0; i < n_heaps; i++) { // due to heap balancing we need to allow some room before we even look to balance to another heap. g_heaps[i]->soh_allocation_no_gc = min (Align ((size_per_heap + min_balance_threshold), soh_align_const), max_soh_allocated); } #else //MULTIPLE_HEAPS soh_allocation_no_gc = min (Align (size_per_heap, soh_align_const), max_soh_allocated); #endif //MULTIPLE_HEAPS } if (allocation_no_gc_loh != 0) { current_no_gc_region_info.loh_allocation_size = (size_t)allocation_no_gc_loh; size_per_heap = current_no_gc_region_info.loh_allocation_size; #ifdef MULTIPLE_HEAPS size_per_heap /= n_heaps; for (int i = 0; i < n_heaps; i++) g_heaps[i]->loh_allocation_no_gc = Align (size_per_heap, get_alignment_constant (FALSE)); #else //MULTIPLE_HEAPS loh_allocation_no_gc = Align (size_per_heap, get_alignment_constant (FALSE)); #endif //MULTIPLE_HEAPS } done: if (status != start_no_gc_success) restore_data_for_no_gc(); return status; } void gc_heap::handle_failure_for_no_gc() { gc_heap::restore_data_for_no_gc(); // sets current_no_gc_region_info.started to FALSE here. memset (¤t_no_gc_region_info, 0, sizeof (current_no_gc_region_info)); } start_no_gc_region_status gc_heap::get_start_no_gc_region_status() { return current_no_gc_region_info.start_status; } void gc_heap::record_gcs_during_no_gc() { if (current_no_gc_region_info.started) { current_no_gc_region_info.num_gcs++; if (is_induced (settings.reason)) current_no_gc_region_info.num_gcs_induced++; } } BOOL gc_heap::find_loh_free_for_no_gc() { allocator* loh_allocator = generation_allocator (generation_of (loh_generation)); size_t size = loh_allocation_no_gc; for (unsigned int a_l_idx = loh_allocator->first_suitable_bucket(size); a_l_idx < loh_allocator->number_of_buckets(); a_l_idx++) { uint8_t* free_list = loh_allocator->alloc_list_head_of (a_l_idx); while (free_list) { size_t free_list_size = unused_array_size(free_list); if (free_list_size > size) { dprintf (3, ("free item %Ix(%Id) for no gc", (size_t)free_list, free_list_size)); return TRUE; } free_list = free_list_slot (free_list); } } return FALSE; } BOOL gc_heap::find_loh_space_for_no_gc() { saved_loh_segment_no_gc = 0; if (find_loh_free_for_no_gc()) return TRUE; heap_segment* seg = generation_allocation_segment (generation_of (loh_generation)); while (seg) { size_t remaining = heap_segment_reserved (seg) - heap_segment_allocated (seg); if (remaining >= loh_allocation_no_gc) { saved_loh_segment_no_gc = seg; break; } seg = heap_segment_next (seg); } if (!saved_loh_segment_no_gc && current_no_gc_region_info.minimal_gc_p) { // If no full GC is allowed, we try to get a new seg right away. saved_loh_segment_no_gc = get_segment_for_uoh (loh_generation, get_uoh_seg_size (loh_allocation_no_gc) #ifdef MULTIPLE_HEAPS , this #endif //MULTIPLE_HEAPS ); } return (saved_loh_segment_no_gc != 0); } BOOL gc_heap::loh_allocated_for_no_gc() { if (!saved_loh_segment_no_gc) return FALSE; heap_segment* seg = generation_allocation_segment (generation_of (loh_generation)); do { if (seg == saved_loh_segment_no_gc) { return FALSE; } seg = heap_segment_next (seg); } while (seg); return TRUE; } BOOL gc_heap::commit_loh_for_no_gc (heap_segment* seg) { uint8_t* end_committed = heap_segment_allocated (seg) + loh_allocation_no_gc; assert (end_committed <= heap_segment_reserved (seg)); return (grow_heap_segment (seg, end_committed)); } void gc_heap::thread_no_gc_loh_segments() { #ifdef MULTIPLE_HEAPS for (int i = 0; i < n_heaps; i++) { gc_heap* hp = g_heaps[i]; if (hp->loh_allocated_for_no_gc()) { hp->thread_uoh_segment (loh_generation, hp->saved_loh_segment_no_gc); hp->saved_loh_segment_no_gc = 0; } } #else //MULTIPLE_HEAPS if (loh_allocated_for_no_gc()) { thread_uoh_segment (loh_generation, saved_loh_segment_no_gc); saved_loh_segment_no_gc = 0; } #endif //MULTIPLE_HEAPS } void gc_heap::set_loh_allocations_for_no_gc() { if (current_no_gc_region_info.loh_allocation_size != 0) { dynamic_data* dd = dynamic_data_of (loh_generation); dd_new_allocation (dd) = loh_allocation_no_gc; dd_gc_new_allocation (dd) = dd_new_allocation (dd); } } void gc_heap::set_soh_allocations_for_no_gc() { if (current_no_gc_region_info.soh_allocation_size != 0) { dynamic_data* dd = dynamic_data_of (0); dd_new_allocation (dd) = soh_allocation_no_gc; dd_gc_new_allocation (dd) = dd_new_allocation (dd); #ifdef MULTIPLE_HEAPS alloc_context_count = 0; #endif //MULTIPLE_HEAPS } } void gc_heap::set_allocations_for_no_gc() { #ifdef MULTIPLE_HEAPS for (int i = 0; i < n_heaps; i++) { gc_heap* hp = g_heaps[i]; hp->set_loh_allocations_for_no_gc(); hp->set_soh_allocations_for_no_gc(); } #else //MULTIPLE_HEAPS set_loh_allocations_for_no_gc(); set_soh_allocations_for_no_gc(); #endif //MULTIPLE_HEAPS } BOOL gc_heap::should_proceed_for_no_gc() { BOOL gc_requested = FALSE; BOOL loh_full_gc_requested = FALSE; BOOL soh_full_gc_requested = FALSE; BOOL no_gc_requested = FALSE; BOOL get_new_loh_segments = FALSE; gc_heap* hp = nullptr; if (current_no_gc_region_info.soh_allocation_size) { #ifdef USE_REGIONS #ifdef MULTIPLE_HEAPS for (int i = 0; i < n_heaps; i++) { hp = g_heaps[i]; #else { hp = pGenGCHeap; #endif //MULTIPLE_HEAPS if (!hp->extend_soh_for_no_gc()) { soh_full_gc_requested = TRUE; #ifdef MULTIPLE_HEAPS break; #endif //MULTIPLE_HEAPS } } #else //USE_REGIONS #ifdef MULTIPLE_HEAPS for (int i = 0; i < n_heaps; i++) { hp = g_heaps[i]; #else //MULTIPLE_HEAPS { hp = pGenGCHeap; #endif //MULTIPLE_HEAPS size_t reserved_space = heap_segment_reserved (hp->ephemeral_heap_segment) - hp->alloc_allocated; if (reserved_space < hp->soh_allocation_no_gc) { gc_requested = TRUE; #ifdef MULTIPLE_HEAPS break; #endif //MULTIPLE_HEAPS } } if (!gc_requested) { #ifdef MULTIPLE_HEAPS for (int i = 0; i < n_heaps; i++) { hp = g_heaps[i]; #else //MULTIPLE_HEAPS { hp = pGenGCHeap; #endif //MULTIPLE_HEAPS if (!(hp->grow_heap_segment (hp->ephemeral_heap_segment, (hp->alloc_allocated + hp->soh_allocation_no_gc)))) { soh_full_gc_requested = TRUE; #ifdef MULTIPLE_HEAPS break; #endif //MULTIPLE_HEAPS } } } #endif //USE_REGIONS } if (!current_no_gc_region_info.minimal_gc_p && gc_requested) { soh_full_gc_requested = TRUE; } no_gc_requested = !(soh_full_gc_requested || gc_requested); if (soh_full_gc_requested && current_no_gc_region_info.minimal_gc_p) { current_no_gc_region_info.start_status = start_no_gc_no_memory; goto done; } if (!soh_full_gc_requested && current_no_gc_region_info.loh_allocation_size) { // Check to see if we have enough reserved space. #ifdef MULTIPLE_HEAPS for (int i = 0; i < n_heaps; i++) { gc_heap* hp = g_heaps[i]; if (!hp->find_loh_space_for_no_gc()) { loh_full_gc_requested = TRUE; break; } } #else //MULTIPLE_HEAPS if (!find_loh_space_for_no_gc()) loh_full_gc_requested = TRUE; #endif //MULTIPLE_HEAPS // Check to see if we have committed space. if (!loh_full_gc_requested) { #ifdef MULTIPLE_HEAPS for (int i = 0; i < n_heaps; i++) { gc_heap* hp = g_heaps[i]; if (hp->saved_loh_segment_no_gc &&!hp->commit_loh_for_no_gc (hp->saved_loh_segment_no_gc)) { loh_full_gc_requested = TRUE; break; } } #else //MULTIPLE_HEAPS if (saved_loh_segment_no_gc && !commit_loh_for_no_gc (saved_loh_segment_no_gc)) loh_full_gc_requested = TRUE; #endif //MULTIPLE_HEAPS } } if (loh_full_gc_requested || soh_full_gc_requested) { if (current_no_gc_region_info.minimal_gc_p) current_no_gc_region_info.start_status = start_no_gc_no_memory; } no_gc_requested = !(loh_full_gc_requested || soh_full_gc_requested || gc_requested); if (current_no_gc_region_info.start_status == start_no_gc_success) { if (no_gc_requested) set_allocations_for_no_gc(); } done: if ((current_no_gc_region_info.start_status == start_no_gc_success) && !no_gc_requested) return TRUE; else { // We are done with starting the no_gc_region. current_no_gc_region_info.started = TRUE; return FALSE; } } end_no_gc_region_status gc_heap::end_no_gc_region() { dprintf (1, ("end no gc called")); end_no_gc_region_status status = end_no_gc_success; if (!(current_no_gc_region_info.started)) status = end_no_gc_not_in_progress; if (current_no_gc_region_info.num_gcs_induced) status = end_no_gc_induced; else if (current_no_gc_region_info.num_gcs) status = end_no_gc_alloc_exceeded; if (settings.pause_mode == pause_no_gc) restore_data_for_no_gc(); // sets current_no_gc_region_info.started to FALSE here. memset (¤t_no_gc_region_info, 0, sizeof (current_no_gc_region_info)); return status; } //update counters void gc_heap::update_collection_counts () { dynamic_data* dd0 = dynamic_data_of (0); dd_gc_clock (dd0) += 1; uint64_t now = GetHighPrecisionTimeStamp(); for (int i = 0; i <= settings.condemned_generation;i++) { dynamic_data* dd = dynamic_data_of (i); dd_collection_count (dd)++; //this is needed by the linear allocation model if (i == max_generation) { dd_collection_count (dynamic_data_of (loh_generation))++; dd_collection_count(dynamic_data_of(poh_generation))++; } dd_gc_clock (dd) = dd_gc_clock (dd0); dd_previous_time_clock (dd) = dd_time_clock (dd); dd_time_clock (dd) = now; } } #ifdef USE_REGIONS bool gc_heap::extend_soh_for_no_gc() { size_t required = soh_allocation_no_gc; heap_segment* region = ephemeral_heap_segment; while (true) { uint8_t* allocated = (region == ephemeral_heap_segment) ? alloc_allocated : heap_segment_allocated (region); size_t available = heap_segment_reserved (region) - allocated; size_t commit = min (available, required); if (grow_heap_segment (region, allocated + commit)) { required -= commit; if (required == 0) { break; } region = heap_segment_next (region); if (region == nullptr) { region = get_new_region (0); if (region == nullptr) { break; } else { GCToEEInterface::DiagAddNewRegion( 0, heap_segment_mem (region), heap_segment_allocated (region), heap_segment_reserved (region) ); } } } else { break; } } return (required == 0); } #else BOOL gc_heap::expand_soh_with_minimal_gc() { if ((size_t)(heap_segment_reserved (ephemeral_heap_segment) - heap_segment_allocated (ephemeral_heap_segment)) >= soh_allocation_no_gc) return TRUE; heap_segment* new_seg = soh_get_segment_to_expand(); if (new_seg) { if (g_gc_card_table != card_table) copy_brick_card_table(); settings.promotion = TRUE; settings.demotion = FALSE; ephemeral_promotion = TRUE; int condemned_gen_number = max_generation - 1; int align_const = get_alignment_constant (TRUE); for (int i = 0; i <= condemned_gen_number; i++) { generation* gen = generation_of (i); saved_ephemeral_plan_start[i] = generation_allocation_start (gen); saved_ephemeral_plan_start_size[i] = Align (size (generation_allocation_start (gen)), align_const); } // We do need to clear the bricks here as we are converting a bunch of ephemeral objects to gen2 // and need to make sure that there are no left over bricks from the previous GCs for the space // we just used for gen0 allocation. We will need to go through the bricks for these objects for // ephemeral GCs later. for (size_t b = brick_of (generation_allocation_start (generation_of (0))); b < brick_of (align_on_brick (heap_segment_allocated (ephemeral_heap_segment))); b++) { set_brick (b, -1); } size_t ephemeral_size = (heap_segment_allocated (ephemeral_heap_segment) - generation_allocation_start (generation_of (max_generation - 1))); heap_segment_next (ephemeral_heap_segment) = new_seg; ephemeral_heap_segment = new_seg; uint8_t* start = heap_segment_mem (ephemeral_heap_segment); for (int i = condemned_gen_number; i >= 0; i--) { size_t gen_start_size = Align (min_obj_size); make_generation (i, ephemeral_heap_segment, start); generation* gen = generation_of (i); generation_plan_allocation_start (gen) = start; generation_plan_allocation_start_size (gen) = gen_start_size; start += gen_start_size; } heap_segment_used (ephemeral_heap_segment) = start - plug_skew; heap_segment_plan_allocated (ephemeral_heap_segment) = start; fix_generation_bounds (condemned_gen_number, generation_of (0)); dd_gc_new_allocation (dynamic_data_of (max_generation)) -= ephemeral_size; dd_new_allocation (dynamic_data_of (max_generation)) = dd_gc_new_allocation (dynamic_data_of (max_generation)); adjust_ephemeral_limits(); return TRUE; } else { return FALSE; } } #endif //USE_REGIONS // Only to be done on the thread that calls restart in a join for server GC // and reset the oom status per heap. void gc_heap::check_and_set_no_gc_oom() { #ifdef MULTIPLE_HEAPS for (int i = 0; i < n_heaps; i++) { gc_heap* hp = g_heaps[i]; if (hp->no_gc_oom_p) { current_no_gc_region_info.start_status = start_no_gc_no_memory; hp->no_gc_oom_p = false; } } #else if (no_gc_oom_p) { current_no_gc_region_info.start_status = start_no_gc_no_memory; no_gc_oom_p = false; } #endif //MULTIPLE_HEAPS } void gc_heap::allocate_for_no_gc_after_gc() { if (current_no_gc_region_info.minimal_gc_p) repair_allocation_contexts (TRUE); no_gc_oom_p = false; if (current_no_gc_region_info.start_status != start_no_gc_no_memory) { if (current_no_gc_region_info.soh_allocation_size != 0) { #ifdef USE_REGIONS no_gc_oom_p = !extend_soh_for_no_gc(); #else if (((size_t)(heap_segment_reserved (ephemeral_heap_segment) - heap_segment_allocated (ephemeral_heap_segment)) < soh_allocation_no_gc) || (!grow_heap_segment (ephemeral_heap_segment, (heap_segment_allocated (ephemeral_heap_segment) + soh_allocation_no_gc)))) { no_gc_oom_p = true; } #endif //USE_REGIONS #ifdef MULTIPLE_HEAPS gc_t_join.join(this, gc_join_after_commit_soh_no_gc); if (gc_t_join.joined()) #endif //MULTIPLE_HEAPS { check_and_set_no_gc_oom(); #ifdef MULTIPLE_HEAPS gc_t_join.restart(); #endif //MULTIPLE_HEAPS } } if ((current_no_gc_region_info.start_status == start_no_gc_success) && !(current_no_gc_region_info.minimal_gc_p) && (current_no_gc_region_info.loh_allocation_size != 0)) { gc_policy = policy_compact; saved_loh_segment_no_gc = 0; if (!find_loh_free_for_no_gc()) { heap_segment* seg = generation_allocation_segment (generation_of (loh_generation)); BOOL found_seg_p = FALSE; while (seg) { if ((size_t)(heap_segment_reserved (seg) - heap_segment_allocated (seg)) >= loh_allocation_no_gc) { found_seg_p = TRUE; if (!commit_loh_for_no_gc (seg)) { no_gc_oom_p = true; break; } } seg = heap_segment_next (seg); } if (!found_seg_p) gc_policy = policy_expand; } #ifdef MULTIPLE_HEAPS gc_t_join.join(this, gc_join_expand_loh_no_gc); if (gc_t_join.joined()) { check_and_set_no_gc_oom(); if (current_no_gc_region_info.start_status == start_no_gc_success) { for (int i = 0; i < n_heaps; i++) { gc_heap* hp = g_heaps[i]; if (hp->gc_policy == policy_expand) { hp->saved_loh_segment_no_gc = get_segment_for_uoh (loh_generation, get_uoh_seg_size (loh_allocation_no_gc), hp); if (!(hp->saved_loh_segment_no_gc)) { current_no_gc_region_info.start_status = start_no_gc_no_memory; break; } } } } gc_t_join.restart(); } #else //MULTIPLE_HEAPS check_and_set_no_gc_oom(); if ((current_no_gc_region_info.start_status == start_no_gc_success) && (gc_policy == policy_expand)) { saved_loh_segment_no_gc = get_segment_for_uoh (loh_generation, get_uoh_seg_size (loh_allocation_no_gc)); if (!saved_loh_segment_no_gc) current_no_gc_region_info.start_status = start_no_gc_no_memory; } #endif //MULTIPLE_HEAPS if ((current_no_gc_region_info.start_status == start_no_gc_success) && saved_loh_segment_no_gc) { if (!commit_loh_for_no_gc (saved_loh_segment_no_gc)) { no_gc_oom_p = true; } } } } #ifdef MULTIPLE_HEAPS gc_t_join.join(this, gc_join_final_no_gc); if (gc_t_join.joined()) #endif //MULTIPLE_HEAPS { check_and_set_no_gc_oom(); if (current_no_gc_region_info.start_status == start_no_gc_success) { set_allocations_for_no_gc(); current_no_gc_region_info.started = TRUE; } #ifdef MULTIPLE_HEAPS gc_t_join.restart(); #endif //MULTIPLE_HEAPS } } void gc_heap::init_records() { // An option is to move this to be after we figure out which gen to condemn so we don't // need to clear some generations' data 'cause we know they don't change, but that also means // we can't simply call memset here. memset (&gc_data_per_heap, 0, sizeof (gc_data_per_heap)); gc_data_per_heap.heap_index = heap_number; if (heap_number == 0) memset (&gc_data_global, 0, sizeof (gc_data_global)); #ifdef GC_CONFIG_DRIVEN memset (interesting_data_per_gc, 0, sizeof (interesting_data_per_gc)); #endif //GC_CONFIG_DRIVEN memset (&fgm_result, 0, sizeof (fgm_result)); for (int i = 0; i < total_generation_count; i++) { gc_data_per_heap.gen_data[i].size_before = generation_size (i); generation* gen = generation_of (i); gc_data_per_heap.gen_data[i].free_list_space_before = generation_free_list_space (gen); gc_data_per_heap.gen_data[i].free_obj_space_before = generation_free_obj_space (gen); } #ifdef USE_REGIONS end_gen0_region_space = 0; gen0_pinned_free_space = 0; gen0_large_chunk_found = false; num_regions_freed_in_sweep = 0; #endif //USE_REGIONS sufficient_gen0_space_p = FALSE; #ifdef MULTIPLE_HEAPS gen0_allocated_after_gc_p = false; #endif //MULTIPLE_HEAPS #if defined (_DEBUG) && defined (VERIFY_HEAP) verify_pinned_queue_p = FALSE; #endif // _DEBUG && VERIFY_HEAP } void gc_heap::pm_full_gc_init_or_clear() { // This means the next GC will be a full blocking GC and we need to init. if (settings.condemned_generation == (max_generation - 1)) { if (pm_trigger_full_gc) { #ifdef MULTIPLE_HEAPS do_post_gc(); #endif //MULTIPLE_HEAPS dprintf (GTC_LOG, ("init for PM triggered full GC")); uint32_t saved_entry_memory_load = settings.entry_memory_load; settings.init_mechanisms(); settings.reason = reason_pm_full_gc; settings.condemned_generation = max_generation; settings.entry_memory_load = saved_entry_memory_load; // Can't assert this since we only check at the end of gen2 GCs, // during gen1 the memory load could have already dropped. // Although arguably we should just turn off PM then... //assert (settings.entry_memory_load >= high_memory_load_th); assert (settings.entry_memory_load > 0); settings.gc_index += 1; do_pre_gc(); } } // This means we are in the progress of a full blocking GC triggered by // this PM mode. else if (settings.reason == reason_pm_full_gc) { assert (settings.condemned_generation == max_generation); assert (pm_trigger_full_gc); pm_trigger_full_gc = false; dprintf (GTC_LOG, ("PM triggered full GC done")); } } void gc_heap::garbage_collect_pm_full_gc() { assert (settings.condemned_generation == max_generation); assert (settings.reason == reason_pm_full_gc); assert (!settings.concurrent); gc1(); } void gc_heap::garbage_collect (int n) { //reset the number of alloc contexts alloc_contexts_used = 0; fix_allocation_contexts (TRUE); #ifdef MULTIPLE_HEAPS #ifdef JOIN_STATS gc_t_join.start_ts(this); #endif //JOIN_STATS clear_gen0_bricks(); #endif //MULTIPLE_HEAPS if ((settings.pause_mode == pause_no_gc) && current_no_gc_region_info.minimal_gc_p) { #ifdef MULTIPLE_HEAPS gc_t_join.join(this, gc_join_minimal_gc); if (gc_t_join.joined()) #endif //MULTIPLE_HEAPS { #ifndef USE_REGIONS #ifdef MULTIPLE_HEAPS // this is serialized because we need to get a segment for (int i = 0; i < n_heaps; i++) { if (!(g_heaps[i]->expand_soh_with_minimal_gc())) current_no_gc_region_info.start_status = start_no_gc_no_memory; } #else if (!expand_soh_with_minimal_gc()) current_no_gc_region_info.start_status = start_no_gc_no_memory; #endif //MULTIPLE_HEAPS #endif //!USE_REGIONS update_collection_counts_for_no_gc(); #ifdef MULTIPLE_HEAPS gc_start_event.Reset(); gc_t_join.restart(); #endif //MULTIPLE_HEAPS } goto done; } init_records(); settings.reason = gc_trigger_reason; num_pinned_objects = 0; #ifdef STRESS_HEAP if (settings.reason == reason_gcstress) { settings.reason = reason_induced; settings.stress_induced = TRUE; } #endif // STRESS_HEAP #ifdef MULTIPLE_HEAPS //align all heaps on the max generation to condemn dprintf (3, ("Joining for max generation to condemn")); condemned_generation_num = generation_to_condemn (n, &blocking_collection, &elevation_requested, FALSE); gc_t_join.join(this, gc_join_generation_determined); if (gc_t_join.joined()) #endif //MULTIPLE_HEAPS { #ifdef FEATURE_BASICFREEZE seg_table->delete_old_slots(); #endif //FEATURE_BASICFREEZE #ifdef MULTIPLE_HEAPS for (int i = 0; i < n_heaps; i++) { gc_heap* hp = g_heaps[i]; // check for card table growth if (g_gc_card_table != hp->card_table) hp->copy_brick_card_table(); hp->rearrange_uoh_segments(); #ifdef BACKGROUND_GC hp->background_delay_delete_uoh_segments(); if (!gc_heap::background_running_p()) hp->rearrange_small_heap_segments(); #endif //BACKGROUND_GC } #else //MULTIPLE_HEAPS if (g_gc_card_table != card_table) copy_brick_card_table(); rearrange_uoh_segments(); #ifdef BACKGROUND_GC background_delay_delete_uoh_segments(); if (!gc_heap::background_running_p()) rearrange_small_heap_segments(); #endif //BACKGROUND_GC #endif //MULTIPLE_HEAPS BOOL should_evaluate_elevation = TRUE; BOOL should_do_blocking_collection = FALSE; #ifdef MULTIPLE_HEAPS int gen_max = condemned_generation_num; for (int i = 0; i < n_heaps; i++) { if (gen_max < g_heaps[i]->condemned_generation_num) gen_max = g_heaps[i]->condemned_generation_num; if (should_evaluate_elevation && !(g_heaps[i]->elevation_requested)) should_evaluate_elevation = FALSE; if ((!should_do_blocking_collection) && (g_heaps[i]->blocking_collection)) should_do_blocking_collection = TRUE; } settings.condemned_generation = gen_max; #else //MULTIPLE_HEAPS settings.condemned_generation = generation_to_condemn (n, &blocking_collection, &elevation_requested, FALSE); should_evaluate_elevation = elevation_requested; should_do_blocking_collection = blocking_collection; #endif //MULTIPLE_HEAPS settings.condemned_generation = joined_generation_to_condemn ( should_evaluate_elevation, n, settings.condemned_generation, &should_do_blocking_collection STRESS_HEAP_ARG(n) ); STRESS_LOG1(LF_GCROOTS|LF_GC|LF_GCALLOC, LL_INFO10, "condemned generation num: %d\n", settings.condemned_generation); record_gcs_during_no_gc(); if (settings.condemned_generation > 1) settings.promotion = TRUE; #ifdef HEAP_ANALYZE // At this point we've decided what generation is condemned // See if we've been requested to analyze survivors after the mark phase if (GCToEEInterface::AnalyzeSurvivorsRequested(settings.condemned_generation)) { heap_analyze_enabled = TRUE; } #endif // HEAP_ANALYZE GCToEEInterface::DiagGCStart(settings.condemned_generation, settings.reason == reason_induced); #ifdef BACKGROUND_GC if ((settings.condemned_generation == max_generation) && (should_do_blocking_collection == FALSE) && gc_can_use_concurrent && !temp_disable_concurrent_p && ((settings.pause_mode == pause_interactive) || (settings.pause_mode == pause_sustained_low_latency))) { keep_bgc_threads_p = TRUE; c_write (settings.concurrent, TRUE); memset (&bgc_data_global, 0, sizeof(bgc_data_global)); memcpy (&bgc_data_global, &gc_data_global, sizeof(gc_data_global)); } #endif //BACKGROUND_GC settings.gc_index = (uint32_t)dd_collection_count (dynamic_data_of (0)) + 1; #ifdef MULTIPLE_HEAPS hb_log_balance_activities(); hb_log_new_allocation(); #endif //MULTIPLE_HEAPS // Call the EE for start of GC work GCToEEInterface::GcStartWork (settings.condemned_generation, max_generation); // TODO: we could fire an ETW event to say this GC as a concurrent GC but later on due to not being able to // create threads or whatever, this could be a non concurrent GC. Maybe for concurrent GC we should fire // it in do_background_gc and if it failed to be a CGC we fire it in gc1... in other words, this should be // fired in gc1. do_pre_gc(); #ifdef MULTIPLE_HEAPS gc_start_event.Reset(); dprintf(3, ("Starting all gc threads for gc")); gc_t_join.restart(); #endif //MULTIPLE_HEAPS } descr_generations ("BEGIN"); #ifdef VERIFY_HEAP if ((GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_GC) && !(GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_POST_GC_ONLY)) { verify_heap (TRUE); } if (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_BARRIERCHECK) checkGCWriteBarrier(); #endif // VERIFY_HEAP #ifdef BACKGROUND_GC if (settings.concurrent) { // We need to save the settings because we'll need to restore it after each FGC. assert (settings.condemned_generation == max_generation); settings.compaction = FALSE; saved_bgc_settings = settings; #ifdef MULTIPLE_HEAPS if (heap_number == 0) { for (int i = 0; i < n_heaps; i++) { prepare_bgc_thread (g_heaps[i]); } dprintf (2, ("setting bgc_threads_sync_event")); bgc_threads_sync_event.Set(); } else { bgc_threads_sync_event.Wait(INFINITE, FALSE); dprintf (2, ("bgc_threads_sync_event is signalled")); } #else prepare_bgc_thread(0); #endif //MULTIPLE_HEAPS #ifdef MULTIPLE_HEAPS gc_t_join.join(this, gc_join_start_bgc); if (gc_t_join.joined()) #endif //MULTIPLE_HEAPS { do_concurrent_p = TRUE; do_ephemeral_gc_p = FALSE; #ifdef MULTIPLE_HEAPS dprintf(2, ("Joined to perform a background GC")); for (int i = 0; i < n_heaps; i++) { gc_heap* hp = g_heaps[i]; if (!(hp->bgc_thread) || !hp->commit_mark_array_bgc_init()) { do_concurrent_p = FALSE; break; } else { hp->background_saved_lowest_address = hp->lowest_address; hp->background_saved_highest_address = hp->highest_address; } } #else do_concurrent_p = (!!bgc_thread && commit_mark_array_bgc_init()); if (do_concurrent_p) { background_saved_lowest_address = lowest_address; background_saved_highest_address = highest_address; } #endif //MULTIPLE_HEAPS if (do_concurrent_p) { #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP SoftwareWriteWatch::EnableForGCHeap(); #endif //FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP #ifdef MULTIPLE_HEAPS for (int i = 0; i < n_heaps; i++) g_heaps[i]->current_bgc_state = bgc_initialized; #else current_bgc_state = bgc_initialized; #endif //MULTIPLE_HEAPS int gen = check_for_ephemeral_alloc(); // always do a gen1 GC before we start BGC. dont_restart_ee_p = TRUE; if (gen == -1) { // If we decide to not do a GC before the BGC we need to // restore the gen0 alloc context. #ifdef MULTIPLE_HEAPS for (int i = 0; i < n_heaps; i++) { generation_allocation_pointer (g_heaps[i]->generation_of (0)) = 0; generation_allocation_limit (g_heaps[i]->generation_of (0)) = 0; } #else generation_allocation_pointer (youngest_generation) = 0; generation_allocation_limit (youngest_generation) = 0; #endif //MULTIPLE_HEAPS } else { do_ephemeral_gc_p = TRUE; settings.init_mechanisms(); settings.condemned_generation = gen; settings.gc_index = (size_t)dd_collection_count (dynamic_data_of (0)) + 2; do_pre_gc(); // TODO BACKGROUND_GC need to add the profiling stuff here. dprintf (GTC_LOG, ("doing gen%d before doing a bgc", gen)); } //clear the cards so they don't bleed in gen 1 during collection // shouldn't this always be done at the beginning of any GC? //clear_card_for_addresses ( // generation_allocation_start (generation_of (0)), // heap_segment_allocated (ephemeral_heap_segment)); if (!do_ephemeral_gc_p) { do_background_gc(); } } else { settings.compaction = TRUE; c_write (settings.concurrent, FALSE); } #ifdef MULTIPLE_HEAPS gc_t_join.restart(); #endif //MULTIPLE_HEAPS } if (do_concurrent_p) { // At this point we are sure we'll be starting a BGC, so save its per heap data here. // global data is only calculated at the end of the GC so we don't need to worry about // FGCs overwriting it. memset (&bgc_data_per_heap, 0, sizeof (bgc_data_per_heap)); memcpy (&bgc_data_per_heap, &gc_data_per_heap, sizeof(gc_data_per_heap)); if (do_ephemeral_gc_p) { dprintf (2, ("GC threads running, doing gen%d GC", settings.condemned_generation)); gen_to_condemn_reasons.init(); gen_to_condemn_reasons.set_condition (gen_before_bgc); gc_data_per_heap.gen_to_condemn_reasons.init (&gen_to_condemn_reasons); gc1(); #ifdef MULTIPLE_HEAPS gc_t_join.join(this, gc_join_bgc_after_ephemeral); if (gc_t_join.joined()) #endif //MULTIPLE_HEAPS { #ifdef MULTIPLE_HEAPS do_post_gc(); #endif //MULTIPLE_HEAPS settings = saved_bgc_settings; assert (settings.concurrent); do_background_gc(); #ifdef MULTIPLE_HEAPS gc_t_join.restart(); #endif //MULTIPLE_HEAPS } } } else { dprintf (2, ("couldn't create BGC threads, reverting to doing a blocking GC")); gc1(); } } else #endif //BACKGROUND_GC { gc1(); } #ifndef MULTIPLE_HEAPS allocation_running_time = (size_t)GCToOSInterface::GetLowPrecisionTimeStamp(); allocation_running_amount = dd_new_allocation (dynamic_data_of (0)); fgn_last_alloc = dd_new_allocation (dynamic_data_of (0)); #endif //MULTIPLE_HEAPS done: if (settings.pause_mode == pause_no_gc) allocate_for_no_gc_after_gc(); } #define mark_stack_empty_p() (mark_stack_base == mark_stack_tos) inline size_t gc_heap::get_promoted_bytes() { #ifdef USE_REGIONS if (!survived_per_region) { dprintf (REGIONS_LOG, ("no space to store promoted bytes")); return 0; } dprintf (3, ("h%d getting surv", heap_number)); size_t promoted = 0; for (size_t i = 0; i < region_count; i++) { if (survived_per_region[i] > 0) { heap_segment* region = get_region_at_index (i); dprintf (REGIONS_LOG, ("h%d region[%d] %Ix(g%d)(%s) surv: %Id(%Ix)", heap_number, i, heap_segment_mem (region), heap_segment_gen_num (region), (heap_segment_loh_p (region) ? "LOH" : (heap_segment_poh_p (region) ? "POH" :"SOH")), survived_per_region[i], &survived_per_region[i])); promoted += survived_per_region[i]; } } #ifdef _DEBUG dprintf (REGIONS_LOG, ("h%d global recorded %Id, regions recorded %Id", heap_number, promoted_bytes (heap_number), promoted)); assert (promoted_bytes (heap_number) == promoted); #endif //_DEBUG return promoted; #else //USE_REGIONS #ifdef MULTIPLE_HEAPS return g_promoted [heap_number*16]; #else //MULTIPLE_HEAPS return g_promoted; #endif //MULTIPLE_HEAPS #endif //USE_REGIONS } #ifdef USE_REGIONS void gc_heap::sync_promoted_bytes() { int condemned_gen_number = settings.condemned_generation; int highest_gen_number = ((condemned_gen_number == max_generation) ? (total_generation_count - 1) : settings.condemned_generation); int stop_gen_idx = get_stop_generation_index (condemned_gen_number); #ifdef MULTIPLE_HEAPS // We gather all the promoted bytes for a region recorded by all threads into that region's survived // for plan phase. sore_mark_list will be called shortly and will start using the same storage that // the GC threads used to record promoted bytes. for (int i = 0; i < n_heaps; i++) { gc_heap* hp = g_heaps[i]; #else //MULTIPLE_HEAPS { gc_heap* hp = pGenGCHeap; #endif //MULTIPLE_HEAPS for (int gen_idx = highest_gen_number; gen_idx >= stop_gen_idx; gen_idx--) { generation* condemned_gen = hp->generation_of (gen_idx); heap_segment* current_region = heap_segment_rw (generation_start_segment (condemned_gen)); while (current_region) { size_t region_index = get_basic_region_index_for_address (heap_segment_mem (current_region)); #ifdef MULTIPLE_HEAPS size_t total_surv = 0; size_t total_old_card_surv = 0; for (int hp_idx = 0; hp_idx < n_heaps; hp_idx++) { total_surv += g_heaps[hp_idx]->survived_per_region[region_index]; total_old_card_surv += g_heaps[hp_idx]->old_card_survived_per_region[region_index]; } heap_segment_survived (current_region) = (int)total_surv; heap_segment_old_card_survived (current_region) = (int)total_old_card_surv; #else heap_segment_survived (current_region) = (int)(survived_per_region[region_index]); heap_segment_old_card_survived (current_region) = (int)(old_card_survived_per_region[region_index]); #endif //MULTIPLE_HEAPS dprintf (REGIONS_LOG, ("region #%d %Ix surv %Id, old card surv %Id", region_index, heap_segment_mem (current_region), heap_segment_survived (current_region), heap_segment_old_card_survived (current_region))); current_region = heap_segment_next (current_region); } } } } #endif //USE_REGIONS #if !defined(USE_REGIONS) || defined(_DEBUG) inline void gc_heap::init_promoted_bytes() { #ifdef MULTIPLE_HEAPS g_promoted [heap_number*16] = 0; #else //MULTIPLE_HEAPS g_promoted = 0; #endif //MULTIPLE_HEAPS } size_t& gc_heap::promoted_bytes (int thread) { #ifdef MULTIPLE_HEAPS return g_promoted [thread*16]; #else //MULTIPLE_HEAPS UNREFERENCED_PARAMETER(thread); return g_promoted; #endif //MULTIPLE_HEAPS } #endif //!USE_REGIONS || _DEBUG inline void gc_heap::add_to_promoted_bytes (uint8_t* object, int thread) { size_t obj_size = size (object); add_to_promoted_bytes (object, obj_size, thread); } inline void gc_heap::add_to_promoted_bytes (uint8_t* object, size_t obj_size, int thread) { assert (thread == heap_number); #ifdef USE_REGIONS if (survived_per_region) { survived_per_region[get_basic_region_index_for_address (object)] += obj_size; } #endif //USE_REGIONS #if !defined(USE_REGIONS) || defined(_DEBUG) #ifdef MULTIPLE_HEAPS g_promoted [heap_number*16] += obj_size; #else //MULTIPLE_HEAPS g_promoted += obj_size; #endif //MULTIPLE_HEAPS #endif //!USE_REGIONS || _DEBUG #ifdef _DEBUG // Verify we keep the 2 recordings in sync. //get_promoted_bytes(); #endif //_DEBUG } heap_segment* gc_heap::find_segment (uint8_t* interior, BOOL small_segment_only_p) { heap_segment* seg = seg_mapping_table_segment_of (interior); if (seg) { if (small_segment_only_p && heap_segment_uoh_p (seg)) return 0; } return seg; } #if !defined(_DEBUG) && !defined(__GNUC__) inline // This causes link errors if global optimization is off #endif //!_DEBUG && !__GNUC__ gc_heap* gc_heap::heap_of (uint8_t* o) { #ifdef MULTIPLE_HEAPS if (o == 0) return g_heaps [0]; gc_heap* hp = seg_mapping_table_heap_of (o); return (hp ? hp : g_heaps[0]); #else //MULTIPLE_HEAPS UNREFERENCED_PARAMETER(o); return __this; #endif //MULTIPLE_HEAPS } inline gc_heap* gc_heap::heap_of_gc (uint8_t* o) { #ifdef MULTIPLE_HEAPS if (o == 0) return g_heaps [0]; gc_heap* hp = seg_mapping_table_heap_of_gc (o); return (hp ? hp : g_heaps[0]); #else //MULTIPLE_HEAPS UNREFERENCED_PARAMETER(o); return __this; #endif //MULTIPLE_HEAPS } // will find all heap objects (large and small) // // Callers of this method need to guarantee the interior pointer is within the heap range. // // If you need it to be stricter, eg if you only want to find an object in ephemeral range, // you should make sure interior is within that range before calling this method. uint8_t* gc_heap::find_object (uint8_t* interior) { assert (interior != 0); if (!gen0_bricks_cleared) { #ifdef MULTIPLE_HEAPS assert (!"Should have already been done in server GC"); #endif //MULTIPLE_HEAPS clear_gen0_bricks(); } //indicate that in the future this needs to be done during allocation gen0_must_clear_bricks = FFIND_DECAY; int brick_entry = get_brick_entry(brick_of (interior)); if (brick_entry == 0) { // this is a pointer to a UOH object heap_segment* seg = find_segment (interior, FALSE); if (seg) { #ifdef FEATURE_CONSERVATIVE_GC if (interior >= heap_segment_allocated(seg)) return 0; #endif // If interior falls within the first free object at the beginning of a generation, // we don't have brick entry for it, and we may incorrectly treat it as on large object heap. int align_const = get_alignment_constant (heap_segment_read_only_p (seg) #ifdef FEATURE_CONSERVATIVE_GC || (GCConfig::GetConservativeGC() && !heap_segment_uoh_p (seg)) #endif ); assert (interior < heap_segment_allocated (seg)); uint8_t* o = heap_segment_mem (seg); while (o < heap_segment_allocated (seg)) { uint8_t* next_o = o + Align (size (o), align_const); assert (next_o > o); if ((o <= interior) && (interior < next_o)) return o; o = next_o; } return 0; } else { return 0; } } else { heap_segment* seg = find_segment (interior, TRUE); if (seg) { #ifdef FEATURE_CONSERVATIVE_GC if (interior >= heap_segment_allocated (seg)) return 0; #else assert (interior < heap_segment_allocated (seg)); #endif uint8_t* o = find_first_object (interior, heap_segment_mem (seg)); return o; } else return 0; } } #ifdef MULTIPLE_HEAPS #ifdef GC_CONFIG_DRIVEN #define m_boundary(o) {if (mark_list_index <= mark_list_end) {*mark_list_index = o;mark_list_index++;} else {mark_list_index++;}} #else //GC_CONFIG_DRIVEN #define m_boundary(o) {if (mark_list_index <= mark_list_end) {*mark_list_index = o;mark_list_index++;}} #endif //GC_CONFIG_DRIVEN #define m_boundary_fullgc(o) {} #else //MULTIPLE_HEAPS #ifdef GC_CONFIG_DRIVEN #define m_boundary(o) {if (mark_list_index <= mark_list_end) {*mark_list_index = o;mark_list_index++;} else {mark_list_index++;} if (slow > o) slow = o; if (shigh < o) shigh = o;} #else #define m_boundary(o) {if (mark_list_index <= mark_list_end) {*mark_list_index = o;mark_list_index++;}if (slow > o) slow = o; if (shigh < o) shigh = o;} #endif //GC_CONFIG_DRIVEN #define m_boundary_fullgc(o) {if (slow > o) slow = o; if (shigh < o) shigh = o;} #endif //MULTIPLE_HEAPS #define method_table(o) ((CObjectHeader*)(o))->GetMethodTable() inline BOOL gc_heap::gc_mark1 (uint8_t* o) { BOOL marked = !marked (o); set_marked (o); dprintf (3, ("*%Ix*, newly marked: %d", (size_t)o, marked)); #if defined(USE_REGIONS) && defined(_DEBUG) heap_segment* seg = seg_mapping_table_segment_of (o); if (o > heap_segment_allocated (seg)) { dprintf (REGIONS_LOG, ("%Ix is in seg %Ix(%Ix) but beyond alloc %Ix!!", o, (size_t)seg, heap_segment_mem (seg), heap_segment_allocated (seg))); GCToOSInterface::DebugBreak(); } #endif //USE_REGIONS && _DEBUG return marked; } inline BOOL gc_heap::gc_mark (uint8_t* o, uint8_t* low, uint8_t* high, int condemned_gen) { #ifdef USE_REGIONS assert (low == 0); assert (high == 0); if (o) { BOOL already_marked = marked (o); if (already_marked) return FALSE; if (condemned_gen == max_generation) { set_marked (o); return TRUE; } int gen = get_region_gen_num (o); if (gen <= condemned_gen) { set_marked (o); return TRUE; } } return FALSE; #else //USE_REGIONS assert (condemned_gen == -1); BOOL marked = FALSE; if ((o >= low) && (o < high)) marked = gc_mark1 (o); #ifdef MULTIPLE_HEAPS else if (o) { gc_heap* hp = heap_of_gc (o); assert (hp); if ((o >= hp->gc_low) && (o < hp->gc_high)) marked = gc_mark1 (o); } #ifdef SNOOP_STATS snoop_stat.objects_checked_count++; if (marked) { snoop_stat.objects_marked_count++; } if (!o) { snoop_stat.zero_ref_count++; } #endif //SNOOP_STATS #endif //MULTIPLE_HEAPS return marked; #endif //USE_REGIONS } #ifdef BACKGROUND_GC inline BOOL gc_heap::background_marked (uint8_t* o) { return mark_array_marked (o); } inline BOOL gc_heap::background_mark1 (uint8_t* o) { BOOL to_mark = !mark_array_marked (o); dprintf (3, ("b*%Ix*b(%d)", (size_t)o, (to_mark ? 1 : 0))); if (to_mark) { mark_array_set_marked (o); dprintf (4, ("n*%Ix*n", (size_t)o)); return TRUE; } else return FALSE; } // TODO: we could consider filtering out NULL's here instead of going to // look for it on other heaps inline BOOL gc_heap::background_mark (uint8_t* o, uint8_t* low, uint8_t* high) { BOOL marked = FALSE; if ((o >= low) && (o < high)) marked = background_mark1 (o); #ifdef MULTIPLE_HEAPS else if (o) { gc_heap* hp = heap_of (o); assert (hp); if ((o >= hp->background_saved_lowest_address) && (o < hp->background_saved_highest_address)) marked = background_mark1 (o); } #endif //MULTIPLE_HEAPS return marked; } #endif //BACKGROUND_GC #define new_start() {if (ppstop <= start) {break;} else {parm = start}} #define ignore_start 0 #define use_start 1 #define go_through_object(mt,o,size,parm,start,start_useful,limit,exp) \ { \ CGCDesc* map = CGCDesc::GetCGCDescFromMT((MethodTable*)(mt)); \ CGCDescSeries* cur = map->GetHighestSeries(); \ ptrdiff_t cnt = (ptrdiff_t) map->GetNumSeries(); \ \ if (cnt >= 0) \ { \ CGCDescSeries* last = map->GetLowestSeries(); \ uint8_t** parm = 0; \ do \ { \ assert (parm <= (uint8_t**)((o) + cur->GetSeriesOffset())); \ parm = (uint8_t**)((o) + cur->GetSeriesOffset()); \ uint8_t** ppstop = \ (uint8_t**)((uint8_t*)parm + cur->GetSeriesSize() + (size));\ if (!start_useful || (uint8_t*)ppstop > (start)) \ { \ if (start_useful && (uint8_t*)parm < (start)) parm = (uint8_t**)(start);\ while (parm < ppstop) \ { \ {exp} \ parm++; \ } \ } \ cur--; \ \ } while (cur >= last); \ } \ else \ { \ /* Handle the repeating case - array of valuetypes */ \ uint8_t** parm = (uint8_t**)((o) + cur->startoffset); \ if (start_useful && start > (uint8_t*)parm) \ { \ ptrdiff_t cs = mt->RawGetComponentSize(); \ parm = (uint8_t**)((uint8_t*)parm + (((start) - (uint8_t*)parm)/cs)*cs); \ } \ while ((uint8_t*)parm < ((o)+(size)-plug_skew)) \ { \ for (ptrdiff_t __i = 0; __i > cnt; __i--) \ { \ HALF_SIZE_T skip = cur->val_serie[__i].skip; \ HALF_SIZE_T nptrs = cur->val_serie[__i].nptrs; \ uint8_t** ppstop = parm + nptrs; \ if (!start_useful || (uint8_t*)ppstop > (start)) \ { \ if (start_useful && (uint8_t*)parm < (start)) parm = (uint8_t**)(start); \ do \ { \ {exp} \ parm++; \ } while (parm < ppstop); \ } \ parm = (uint8_t**)((uint8_t*)ppstop + skip); \ } \ } \ } \ } #define go_through_object_nostart(mt,o,size,parm,exp) {go_through_object(mt,o,size,parm,o,ignore_start,(o + size),exp); } // 1 thing to note about this macro: // 1) you can use *parm safely but in general you don't want to use parm // because for the collectible types it's not an address on the managed heap. #ifndef COLLECTIBLE_CLASS #define go_through_object_cl(mt,o,size,parm,exp) \ { \ if (header(o)->ContainsPointers()) \ { \ go_through_object_nostart(mt,o,size,parm,exp); \ } \ } #else //COLLECTIBLE_CLASS #define go_through_object_cl(mt,o,size,parm,exp) \ { \ if (header(o)->Collectible()) \ { \ uint8_t* class_obj = get_class_object (o); \ uint8_t** parm = &class_obj; \ do {exp} while (false); \ } \ if (header(o)->ContainsPointers()) \ { \ go_through_object_nostart(mt,o,size,parm,exp); \ } \ } #endif //COLLECTIBLE_CLASS // This starts a plug. But mark_stack_tos isn't increased until set_pinned_info is called. void gc_heap::enque_pinned_plug (uint8_t* plug, BOOL save_pre_plug_info_p, uint8_t* last_object_in_last_plug) { if (mark_stack_array_length <= mark_stack_tos) { if (!grow_mark_stack (mark_stack_array, mark_stack_array_length, MARK_STACK_INITIAL_LENGTH)) { // we don't want to continue here due to security // risks. This happens very rarely and fixing it in the // way so that we can continue is a bit involved and will // not be done in Dev10. GCToEEInterface::HandleFatalError((unsigned int)CORINFO_EXCEPTION_GC); } } dprintf (3, ("enqueuing P #%Id(%Ix): %Ix. oldest: %Id, LO: %Ix, pre: %d", mark_stack_tos, &mark_stack_array[mark_stack_tos], plug, mark_stack_bos, last_object_in_last_plug, (save_pre_plug_info_p ? 1 : 0))); mark& m = mark_stack_array[mark_stack_tos]; m.first = plug; // Must be set now because if we have a short object we'll need the value of saved_pre_p. m.saved_pre_p = save_pre_plug_info_p; if (save_pre_plug_info_p) { // In the case of short plugs or doubly linked free lists, there may be extra bits // set in the method table pointer. // Clear these bits for the copy saved in saved_pre_plug, but not for the copy // saved in saved_pre_plug_reloc. // This is because we need these bits for compaction, but not for mark & sweep. size_t special_bits = clear_special_bits (last_object_in_last_plug); // now copy the bits over memcpy (&(m.saved_pre_plug), &(((plug_and_gap*)plug)[-1]), sizeof (gap_reloc_pair)); // restore the bits in the original set_special_bits (last_object_in_last_plug, special_bits); memcpy (&(m.saved_pre_plug_reloc), &(((plug_and_gap*)plug)[-1]), sizeof (gap_reloc_pair)); // If the last object in the last plug is too short, it requires special handling. size_t last_obj_size = plug - last_object_in_last_plug; if (last_obj_size < min_pre_pin_obj_size) { record_interesting_data_point (idp_pre_short); #ifdef SHORT_PLUGS if (is_plug_padded (last_object_in_last_plug)) record_interesting_data_point (idp_pre_short_padded); #endif //SHORT_PLUGS dprintf (3, ("encountered a short object %Ix right before pinned plug %Ix!", last_object_in_last_plug, plug)); // Need to set the short bit regardless of having refs or not because we need to // indicate that this object is not walkable. m.set_pre_short(); #ifdef COLLECTIBLE_CLASS if (is_collectible (last_object_in_last_plug)) { m.set_pre_short_collectible(); } #endif //COLLECTIBLE_CLASS if (contain_pointers (last_object_in_last_plug)) { dprintf (3, ("short object: %Ix(%Ix)", last_object_in_last_plug, last_obj_size)); go_through_object_nostart (method_table(last_object_in_last_plug), last_object_in_last_plug, last_obj_size, pval, { size_t gap_offset = (((size_t)pval - (size_t)(plug - sizeof (gap_reloc_pair) - plug_skew))) / sizeof (uint8_t*); dprintf (3, ("member: %Ix->%Ix, %Id ptrs from beginning of gap", (uint8_t*)pval, *pval, gap_offset)); m.set_pre_short_bit (gap_offset); } ); } } } m.saved_post_p = FALSE; } void gc_heap::save_post_plug_info (uint8_t* last_pinned_plug, uint8_t* last_object_in_last_plug, uint8_t* post_plug) { #ifndef _DEBUG UNREFERENCED_PARAMETER(last_pinned_plug); #endif //_DEBUG mark& m = mark_stack_array[mark_stack_tos - 1]; assert (last_pinned_plug == m.first); m.saved_post_plug_info_start = (uint8_t*)&(((plug_and_gap*)post_plug)[-1]); // In the case of short plugs or doubly linked free lists, there may be extra bits // set in the method table pointer. // Clear these bits for the copy saved in saved_post_plug, but not for the copy // saved in saved_post_plug_reloc. // This is because we need these bits for compaction, but not for mark & sweep. // Note that currently none of these bits will ever be set in the object saved *after* // a pinned plug - this object is currently pinned along with the pinned object before it size_t special_bits = clear_special_bits (last_object_in_last_plug); memcpy (&(m.saved_post_plug), m.saved_post_plug_info_start, sizeof (gap_reloc_pair)); // restore the bits in the original set_special_bits (last_object_in_last_plug, special_bits); memcpy (&(m.saved_post_plug_reloc), m.saved_post_plug_info_start, sizeof (gap_reloc_pair)); // This is important - we need to clear all bits here except the last one. m.saved_post_p = TRUE; #ifdef _DEBUG m.saved_post_plug_debug.gap = 1; #endif //_DEBUG dprintf (3, ("PP %Ix has NP %Ix right after", last_pinned_plug, post_plug)); size_t last_obj_size = post_plug - last_object_in_last_plug; if (last_obj_size < min_pre_pin_obj_size) { dprintf (3, ("PP %Ix last obj %Ix is too short", last_pinned_plug, last_object_in_last_plug)); record_interesting_data_point (idp_post_short); #ifdef SHORT_PLUGS if (is_plug_padded (last_object_in_last_plug)) record_interesting_data_point (idp_post_short_padded); #endif //SHORT_PLUGS m.set_post_short(); #if defined (_DEBUG) && defined (VERIFY_HEAP) verify_pinned_queue_p = TRUE; #endif // _DEBUG && VERIFY_HEAP #ifdef COLLECTIBLE_CLASS if (is_collectible (last_object_in_last_plug)) { m.set_post_short_collectible(); } #endif //COLLECTIBLE_CLASS if (contain_pointers (last_object_in_last_plug)) { dprintf (3, ("short object: %Ix(%Ix)", last_object_in_last_plug, last_obj_size)); // TODO: since we won't be able to walk this object in relocation, we still need to // take care of collectible assemblies here. go_through_object_nostart (method_table(last_object_in_last_plug), last_object_in_last_plug, last_obj_size, pval, { size_t gap_offset = (((size_t)pval - (size_t)(post_plug - sizeof (gap_reloc_pair) - plug_skew))) / sizeof (uint8_t*); dprintf (3, ("member: %Ix->%Ix, %Id ptrs from beginning of gap", (uint8_t*)pval, *pval, gap_offset)); m.set_post_short_bit (gap_offset); } ); } } } //#define PREFETCH #ifdef PREFETCH __declspec(naked) void __fastcall Prefetch(void* addr) { __asm { PREFETCHT0 [ECX] ret }; } #else //PREFETCH inline void Prefetch (void* addr) { UNREFERENCED_PARAMETER(addr); } #endif //PREFETCH #ifdef MH_SC_MARK inline VOLATILE(uint8_t*)& gc_heap::ref_mark_stack (gc_heap* hp, int index) { return ((VOLATILE(uint8_t*)*)(hp->mark_stack_array))[index]; } #endif //MH_SC_MARK #define stolen 2 #define partial 1 #define partial_object 3 inline uint8_t* ref_from_slot (uint8_t* r) { return (uint8_t*)((size_t)r & ~(stolen | partial)); } inline BOOL stolen_p (uint8_t* r) { return (((size_t)r&2) && !((size_t)r&1)); } inline BOOL ready_p (uint8_t* r) { return ((size_t)r != 1); } inline BOOL partial_p (uint8_t* r) { return (((size_t)r&1) && !((size_t)r&2)); } inline BOOL straight_ref_p (uint8_t* r) { return (!stolen_p (r) && !partial_p (r)); } inline BOOL partial_object_p (uint8_t* r) { return (((size_t)r & partial_object) == partial_object); } inline BOOL ref_p (uint8_t* r) { return (straight_ref_p (r) || partial_object_p (r)); } void gc_heap::mark_object_simple1 (uint8_t* oo, uint8_t* start THREAD_NUMBER_DCL) { SERVER_SC_MARK_VOLATILE(uint8_t*)* mark_stack_tos = (SERVER_SC_MARK_VOLATILE(uint8_t*)*)mark_stack_array; SERVER_SC_MARK_VOLATILE(uint8_t*)* mark_stack_limit = (SERVER_SC_MARK_VOLATILE(uint8_t*)*)&mark_stack_array[mark_stack_array_length]; SERVER_SC_MARK_VOLATILE(uint8_t*)* mark_stack_base = mark_stack_tos; #ifdef SORT_MARK_STACK SERVER_SC_MARK_VOLATILE(uint8_t*)* sorted_tos = mark_stack_base; #endif //SORT_MARK_STACK // If we are doing a full GC we don't use mark list anyway so use m_boundary_fullgc that doesn't // update mark list. BOOL full_p = (settings.condemned_generation == max_generation); int condemned_gen = #ifdef USE_REGIONS settings.condemned_generation; #else -1; #endif //USE_REGIONS assert ((start >= oo) && (start < oo+size(oo))); #ifndef MH_SC_MARK *mark_stack_tos = oo; #endif //!MH_SC_MARK while (1) { #ifdef MULTIPLE_HEAPS #else //MULTIPLE_HEAPS const int thread = 0; #endif //MULTIPLE_HEAPS if (oo && ((size_t)oo != 4)) { size_t s = 0; if (stolen_p (oo)) { --mark_stack_tos; goto next_level; } else if (!partial_p (oo) && ((s = size (oo)) < (partial_size_th*sizeof (uint8_t*)))) { BOOL overflow_p = FALSE; if (mark_stack_tos + (s) /sizeof (uint8_t*) >= (mark_stack_limit - 1)) { size_t num_components = ((method_table(oo))->HasComponentSize() ? ((CObjectHeader*)oo)->GetNumComponents() : 0); if (mark_stack_tos + CGCDesc::GetNumPointers(method_table(oo), s, num_components) >= (mark_stack_limit - 1)) { overflow_p = TRUE; } } if (overflow_p == FALSE) { dprintf(3,("pushing mark for %Ix ", (size_t)oo)); go_through_object_cl (method_table(oo), oo, s, ppslot, { uint8_t* o = *ppslot; Prefetch(o); if (gc_mark (o, gc_low, gc_high, condemned_gen)) { if (full_p) { m_boundary_fullgc (o); } else { m_boundary (o); } add_to_promoted_bytes (o, thread); if (contain_pointers_or_collectible (o)) { *(mark_stack_tos++) = o; } } } ); } else { dprintf(3,("mark stack overflow for object %Ix ", (size_t)oo)); min_overflow_address = min (min_overflow_address, oo); max_overflow_address = max (max_overflow_address, oo); } } else { if (partial_p (oo)) { start = ref_from_slot (oo); oo = ref_from_slot (*(--mark_stack_tos)); dprintf (4, ("oo: %Ix, start: %Ix\n", (size_t)oo, (size_t)start)); assert ((oo < start) && (start < (oo + size (oo)))); } #ifdef COLLECTIBLE_CLASS else { // If there's a class object, push it now. We are guaranteed to have the slot since // we just popped one object off. if (is_collectible (oo)) { uint8_t* class_obj = get_class_object (oo); if (gc_mark (class_obj, gc_low, gc_high, condemned_gen)) { if (full_p) { m_boundary_fullgc (class_obj); } else { m_boundary (class_obj); } add_to_promoted_bytes (class_obj, thread); *(mark_stack_tos++) = class_obj; // The code below expects that the oo is still stored in the stack slot that was // just popped and it "pushes" it back just by incrementing the mark_stack_tos. // But the class_obj has just overwritten that stack slot and so the oo needs to // be stored to the new slot that's pointed to by the mark_stack_tos. *mark_stack_tos = oo; } } if (!contain_pointers (oo)) { goto next_level; } } #endif //COLLECTIBLE_CLASS s = size (oo); BOOL overflow_p = FALSE; if (mark_stack_tos + (num_partial_refs + 2) >= mark_stack_limit) { overflow_p = TRUE; } if (overflow_p == FALSE) { dprintf(3,("pushing mark for %Ix ", (size_t)oo)); //push the object and its current SERVER_SC_MARK_VOLATILE(uint8_t*)* place = ++mark_stack_tos; mark_stack_tos++; #ifdef MH_SC_MARK *(place-1) = 0; *(place) = (uint8_t*)partial; #endif //MH_SC_MARK int i = num_partial_refs; uint8_t* ref_to_continue = 0; go_through_object (method_table(oo), oo, s, ppslot, start, use_start, (oo + s), { uint8_t* o = *ppslot; Prefetch(o); if (gc_mark (o, gc_low, gc_high,condemned_gen)) { if (full_p) { m_boundary_fullgc (o); } else { m_boundary (o); } add_to_promoted_bytes (o, thread); if (contain_pointers_or_collectible (o)) { *(mark_stack_tos++) = o; if (--i == 0) { ref_to_continue = (uint8_t*)((size_t)(ppslot+1) | partial); goto more_to_do; } } } } ); //we are finished with this object assert (ref_to_continue == 0); #ifdef MH_SC_MARK assert ((*(place-1)) == (uint8_t*)0); #else //MH_SC_MARK *(place-1) = 0; #endif //MH_SC_MARK *place = 0; // shouldn't we decrease tos by 2 here?? more_to_do: if (ref_to_continue) { //update the start #ifdef MH_SC_MARK assert ((*(place-1)) == (uint8_t*)0); *(place-1) = (uint8_t*)((size_t)oo | partial_object); assert (((*place) == (uint8_t*)1) || ((*place) == (uint8_t*)2)); #endif //MH_SC_MARK *place = ref_to_continue; } } else { dprintf(3,("mark stack overflow for object %Ix ", (size_t)oo)); min_overflow_address = min (min_overflow_address, oo); max_overflow_address = max (max_overflow_address, oo); } } #ifdef SORT_MARK_STACK if (mark_stack_tos > sorted_tos + mark_stack_array_length/8) { rqsort1 (sorted_tos, mark_stack_tos-1); sorted_tos = mark_stack_tos-1; } #endif //SORT_MARK_STACK } next_level: if (!(mark_stack_empty_p())) { oo = *(--mark_stack_tos); start = oo; #ifdef SORT_MARK_STACK sorted_tos = min ((size_t)sorted_tos, (size_t)mark_stack_tos); #endif //SORT_MARK_STACK } else break; } } #ifdef MH_SC_MARK BOOL same_numa_node_p (int hn1, int hn2) { return (heap_select::find_numa_node_from_heap_no (hn1) == heap_select::find_numa_node_from_heap_no (hn2)); } int find_next_buddy_heap (int this_heap_number, int current_buddy, int n_heaps) { int hn = (current_buddy+1)%n_heaps; while (hn != current_buddy) { if ((this_heap_number != hn) && (same_numa_node_p (this_heap_number, hn))) return hn; hn = (hn+1)%n_heaps; } return current_buddy; } void gc_heap::mark_steal() { mark_stack_busy() = 0; //clear the mark stack in the snooping range for (int i = 0; i < max_snoop_level; i++) { ((VOLATILE(uint8_t*)*)(mark_stack_array))[i] = 0; } //pick the next heap as our buddy int thpn = find_next_buddy_heap (heap_number, heap_number, n_heaps); #ifdef SNOOP_STATS dprintf (SNOOP_LOG, ("(GC%d)heap%d: start snooping %d", settings.gc_index, heap_number, (heap_number+1)%n_heaps)); uint32_t begin_tick = GCToOSInterface::GetLowPrecisionTimeStamp(); #endif //SNOOP_STATS int idle_loop_count = 0; int first_not_ready_level = 0; while (1) { gc_heap* hp = g_heaps [thpn]; int level = first_not_ready_level; first_not_ready_level = 0; while (check_next_mark_stack (hp) && (level < (max_snoop_level-1))) { idle_loop_count = 0; #ifdef SNOOP_STATS snoop_stat.busy_count++; dprintf (SNOOP_LOG, ("heap%d: looking at next heap level %d stack contents: %Ix", heap_number, level, (int)((uint8_t**)(hp->mark_stack_array))[level])); #endif //SNOOP_STATS uint8_t* o = ref_mark_stack (hp, level); uint8_t* start = o; if (ref_p (o)) { mark_stack_busy() = 1; BOOL success = TRUE; uint8_t* next = (ref_mark_stack (hp, level+1)); if (ref_p (next)) { if (((size_t)o > 4) && !partial_object_p (o)) { //this is a normal object, not a partial mark tuple //success = (Interlocked::CompareExchangePointer (&ref_mark_stack (hp, level), 0, o)==o); success = (Interlocked::CompareExchangePointer (&ref_mark_stack (hp, level), (uint8_t*)4, o)==o); #ifdef SNOOP_STATS snoop_stat.interlocked_count++; if (success) snoop_stat.normal_count++; #endif //SNOOP_STATS } else { //it is a stolen entry, or beginning/ending of a partial mark level++; #ifdef SNOOP_STATS snoop_stat.stolen_or_pm_count++; #endif //SNOOP_STATS success = FALSE; } } else if (stolen_p (next)) { //ignore the stolen guy and go to the next level success = FALSE; level+=2; #ifdef SNOOP_STATS snoop_stat.stolen_entry_count++; #endif //SNOOP_STATS } else { assert (partial_p (next)); start = ref_from_slot (next); //re-read the object o = ref_from_slot (ref_mark_stack (hp, level)); if (o && start) { //steal the object success = (Interlocked::CompareExchangePointer (&ref_mark_stack (hp, level+1), (uint8_t*)stolen, next) == next); #ifdef SNOOP_STATS snoop_stat.interlocked_count++; if (success) { snoop_stat.partial_mark_parent_count++; } #endif //SNOOP_STATS } else { // stack is not ready, or o is completely different from the last time we read from this stack level. // go up 2 levels to steal children or totally unrelated objects. success = FALSE; if (first_not_ready_level == 0) { first_not_ready_level = level; } level+=2; #ifdef SNOOP_STATS snoop_stat.pm_not_ready_count++; #endif //SNOOP_STATS } } if (success) { #ifdef SNOOP_STATS dprintf (SNOOP_LOG, ("heap%d: marking %Ix from %d [%d] tl:%dms", heap_number, (size_t)o, (heap_number+1)%n_heaps, level, (GCToOSInterface::GetLowPrecisionTimeStamp()-begin_tick))); uint32_t start_tick = GCToOSInterface::GetLowPrecisionTimeStamp(); #endif //SNOOP_STATS mark_object_simple1 (o, start, heap_number); #ifdef SNOOP_STATS dprintf (SNOOP_LOG, ("heap%d: done marking %Ix from %d [%d] %dms tl:%dms", heap_number, (size_t)o, (heap_number+1)%n_heaps, level, (GCToOSInterface::GetLowPrecisionTimeStamp()-start_tick),(GCToOSInterface::GetLowPrecisionTimeStamp()-begin_tick))); #endif //SNOOP_STATS mark_stack_busy() = 0; //clear the mark stack in snooping range for (int i = 0; i < max_snoop_level; i++) { if (((uint8_t**)mark_stack_array)[i] != 0) { ((VOLATILE(uint8_t*)*)(mark_stack_array))[i] = 0; #ifdef SNOOP_STATS snoop_stat.stack_bottom_clear_count++; #endif //SNOOP_STATS } } level = 0; } mark_stack_busy() = 0; } else { //slot is either partial or stolen level++; } } if ((first_not_ready_level != 0) && hp->mark_stack_busy()) { continue; } if (!hp->mark_stack_busy()) { first_not_ready_level = 0; idle_loop_count++; if ((idle_loop_count % (6) )==1) { #ifdef SNOOP_STATS snoop_stat.switch_to_thread_count++; #endif //SNOOP_STATS GCToOSInterface::Sleep(1); } int free_count = 1; #ifdef SNOOP_STATS snoop_stat.stack_idle_count++; //dprintf (SNOOP_LOG, ("heap%d: counting idle threads", heap_number)); #endif //SNOOP_STATS for (int hpn = (heap_number+1)%n_heaps; hpn != heap_number;) { if (!((g_heaps [hpn])->mark_stack_busy())) { free_count++; #ifdef SNOOP_STATS dprintf (SNOOP_LOG, ("heap%d: %d idle", heap_number, free_count)); #endif //SNOOP_STATS } else if (same_numa_node_p (hpn, heap_number) || ((idle_loop_count%1000))==999) { thpn = hpn; break; } hpn = (hpn+1)%n_heaps; YieldProcessor(); } if (free_count == n_heaps) { break; } } } } inline BOOL gc_heap::check_next_mark_stack (gc_heap* next_heap) { #ifdef SNOOP_STATS snoop_stat.check_level_count++; #endif //SNOOP_STATS return (next_heap->mark_stack_busy()>=1); } #endif //MH_SC_MARK #ifdef SNOOP_STATS void gc_heap::print_snoop_stat() { dprintf (1234, ("%4s | %8s | %8s | %8s | %8s | %8s | %8s | %8s", "heap", "check", "zero", "mark", "stole", "pstack", "nstack", "nonsk")); dprintf (1234, ("%4d | %8d | %8d | %8d | %8d | %8d | %8d | %8d", snoop_stat.heap_index, snoop_stat.objects_checked_count, snoop_stat.zero_ref_count, snoop_stat.objects_marked_count, snoop_stat.stolen_stack_count, snoop_stat.partial_stack_count, snoop_stat.normal_stack_count, snoop_stat.non_stack_count)); dprintf (1234, ("%4s | %8s | %8s | %8s | %8s | %8s | %8s | %8s | %8s | %8s", "heap", "level", "busy", "xchg", "pmparent", "s_pm", "stolen", "nready", "clear")); dprintf (1234, ("%4d | %8d | %8d | %8d | %8d | %8d | %8d | %8d | %8d | %8d\n", snoop_stat.heap_index, snoop_stat.check_level_count, snoop_stat.busy_count, snoop_stat.interlocked_count, snoop_stat.partial_mark_parent_count, snoop_stat.stolen_or_pm_count, snoop_stat.stolen_entry_count, snoop_stat.pm_not_ready_count, snoop_stat.normal_count, snoop_stat.stack_bottom_clear_count)); printf ("\n%4s | %8s | %8s | %8s | %8s | %8s\n", "heap", "check", "zero", "mark", "idle", "switch"); printf ("%4d | %8d | %8d | %8d | %8d | %8d\n", snoop_stat.heap_index, snoop_stat.objects_checked_count, snoop_stat.zero_ref_count, snoop_stat.objects_marked_count, snoop_stat.stack_idle_count, snoop_stat.switch_to_thread_count); printf ("%4s | %8s | %8s | %8s | %8s | %8s | %8s | %8s | %8s | %8s\n", "heap", "level", "busy", "xchg", "pmparent", "s_pm", "stolen", "nready", "normal", "clear"); printf ("%4d | %8d | %8d | %8d | %8d | %8d | %8d | %8d | %8d | %8d\n", snoop_stat.heap_index, snoop_stat.check_level_count, snoop_stat.busy_count, snoop_stat.interlocked_count, snoop_stat.partial_mark_parent_count, snoop_stat.stolen_or_pm_count, snoop_stat.stolen_entry_count, snoop_stat.pm_not_ready_count, snoop_stat.normal_count, snoop_stat.stack_bottom_clear_count); } #endif //SNOOP_STATS #ifdef HEAP_ANALYZE void gc_heap::ha_mark_object_simple (uint8_t** po THREAD_NUMBER_DCL) { if (!internal_root_array) { internal_root_array = new (nothrow) uint8_t* [internal_root_array_length]; if (!internal_root_array) { heap_analyze_success = FALSE; } } if (heap_analyze_success && (internal_root_array_length <= internal_root_array_index)) { size_t new_size = 2*internal_root_array_length; uint64_t available_physical = 0; get_memory_info (NULL, &available_physical); if (new_size > (size_t)(available_physical / 10)) { heap_analyze_success = FALSE; } else { uint8_t** tmp = new (nothrow) uint8_t* [new_size]; if (tmp) { memcpy (tmp, internal_root_array, internal_root_array_length*sizeof (uint8_t*)); delete[] internal_root_array; internal_root_array = tmp; internal_root_array_length = new_size; } else { heap_analyze_success = FALSE; } } } if (heap_analyze_success) { PREFIX_ASSUME(internal_root_array_index < internal_root_array_length); uint8_t* ref = (uint8_t*)po; if (!current_obj || !((ref >= current_obj) && (ref < (current_obj + current_obj_size)))) { gc_heap* hp = gc_heap::heap_of (ref); current_obj = hp->find_object (ref); current_obj_size = size (current_obj); internal_root_array[internal_root_array_index] = current_obj; internal_root_array_index++; } } mark_object_simple (po THREAD_NUMBER_ARG); } #endif //HEAP_ANALYZE //this method assumes that *po is in the [low. high[ range void gc_heap::mark_object_simple (uint8_t** po THREAD_NUMBER_DCL) { int condemned_gen = #ifdef USE_REGIONS settings.condemned_generation; #else -1; #endif //USE_REGIONS uint8_t* o = *po; #ifndef MULTIPLE_HEAPS const int thread = 0; #endif //MULTIPLE_HEAPS { #ifdef SNOOP_STATS snoop_stat.objects_checked_count++; #endif //SNOOP_STATS if (gc_mark1 (o)) { m_boundary (o); size_t s = size (o); add_to_promoted_bytes (o, s, thread); { go_through_object_cl (method_table(o), o, s, poo, { uint8_t* oo = *poo; if (gc_mark (oo, gc_low, gc_high, condemned_gen)) { m_boundary (oo); add_to_promoted_bytes (oo, thread); if (contain_pointers_or_collectible (oo)) mark_object_simple1 (oo, oo THREAD_NUMBER_ARG); } } ); } } } } inline void gc_heap::mark_object (uint8_t* o THREAD_NUMBER_DCL) { #ifdef USE_REGIONS if ((o != nullptr) && is_in_condemned_gc (o)) { mark_object_simple (&o THREAD_NUMBER_ARG); } #else //USE_REGIONS if ((o >= gc_low) && (o < gc_high)) mark_object_simple (&o THREAD_NUMBER_ARG); #ifdef MULTIPLE_HEAPS else if (o) { gc_heap* hp = heap_of (o); assert (hp); if ((o >= hp->gc_low) && (o < hp->gc_high)) mark_object_simple (&o THREAD_NUMBER_ARG); } #endif //MULTIPLE_HEAPS #endif //USE_REGIONS } #ifdef BACKGROUND_GC void gc_heap::background_mark_simple1 (uint8_t* oo THREAD_NUMBER_DCL) { uint8_t** mark_stack_limit = &background_mark_stack_array[background_mark_stack_array_length]; #ifdef SORT_MARK_STACK uint8_t** sorted_tos = background_mark_stack_array; #endif //SORT_MARK_STACK background_mark_stack_tos = background_mark_stack_array; while (1) { #ifdef MULTIPLE_HEAPS #else //MULTIPLE_HEAPS const int thread = 0; #endif //MULTIPLE_HEAPS if (oo) { size_t s = 0; if ((((size_t)oo & 1) == 0) && ((s = size (oo)) < (partial_size_th*sizeof (uint8_t*)))) { BOOL overflow_p = FALSE; if (background_mark_stack_tos + (s) /sizeof (uint8_t*) >= (mark_stack_limit - 1)) { size_t num_components = ((method_table(oo))->HasComponentSize() ? ((CObjectHeader*)oo)->GetNumComponents() : 0); size_t num_pointers = CGCDesc::GetNumPointers(method_table(oo), s, num_components); if (background_mark_stack_tos + num_pointers >= (mark_stack_limit - 1)) { dprintf (2, ("h%d: %Id left, obj (mt: %Ix) %Id ptrs", heap_number, (size_t)(mark_stack_limit - 1 - background_mark_stack_tos), method_table(oo), num_pointers)); bgc_overflow_count++; overflow_p = TRUE; } } if (overflow_p == FALSE) { dprintf(3,("pushing mark for %Ix ", (size_t)oo)); go_through_object_cl (method_table(oo), oo, s, ppslot, { uint8_t* o = *ppslot; Prefetch(o); if (background_mark (o, background_saved_lowest_address, background_saved_highest_address)) { //m_boundary (o); size_t obj_size = size (o); bpromoted_bytes (thread) += obj_size; if (contain_pointers_or_collectible (o)) { *(background_mark_stack_tos++) = o; } } } ); } else { dprintf (3,("mark stack overflow for object %Ix ", (size_t)oo)); background_min_overflow_address = min (background_min_overflow_address, oo); background_max_overflow_address = max (background_max_overflow_address, oo); } } else { uint8_t* start = oo; if ((size_t)oo & 1) { oo = (uint8_t*)((size_t)oo & ~1); start = *(--background_mark_stack_tos); dprintf (4, ("oo: %Ix, start: %Ix\n", (size_t)oo, (size_t)start)); } #ifdef COLLECTIBLE_CLASS else { // If there's a class object, push it now. We are guaranteed to have the slot since // we just popped one object off. if (is_collectible (oo)) { uint8_t* class_obj = get_class_object (oo); if (background_mark (class_obj, background_saved_lowest_address, background_saved_highest_address)) { size_t obj_size = size (class_obj); bpromoted_bytes (thread) += obj_size; *(background_mark_stack_tos++) = class_obj; } } if (!contain_pointers (oo)) { goto next_level; } } #endif //COLLECTIBLE_CLASS s = size (oo); BOOL overflow_p = FALSE; if (background_mark_stack_tos + (num_partial_refs + 2) >= mark_stack_limit) { size_t num_components = ((method_table(oo))->HasComponentSize() ? ((CObjectHeader*)oo)->GetNumComponents() : 0); size_t num_pointers = CGCDesc::GetNumPointers(method_table(oo), s, num_components); dprintf (2, ("h%d: PM: %Id left, obj %Ix (mt: %Ix) start: %Ix, total: %Id", heap_number, (size_t)(mark_stack_limit - background_mark_stack_tos), oo, method_table(oo), start, num_pointers)); bgc_overflow_count++; overflow_p = TRUE; } if (overflow_p == FALSE) { dprintf(3,("pushing mark for %Ix ", (size_t)oo)); //push the object and its current uint8_t** place = background_mark_stack_tos++; *(place) = start; *(background_mark_stack_tos++) = (uint8_t*)((size_t)oo | 1); int num_pushed_refs = num_partial_refs; int num_processed_refs = num_pushed_refs * 16; go_through_object (method_table(oo), oo, s, ppslot, start, use_start, (oo + s), { uint8_t* o = *ppslot; Prefetch(o); if (background_mark (o, background_saved_lowest_address, background_saved_highest_address)) { //m_boundary (o); size_t obj_size = size (o); bpromoted_bytes (thread) += obj_size; if (contain_pointers_or_collectible (o)) { *(background_mark_stack_tos++) = o; if (--num_pushed_refs == 0) { //update the start *place = (uint8_t*)(ppslot+1); goto more_to_do; } } } if (--num_processed_refs == 0) { // give foreground GC a chance to run *place = (uint8_t*)(ppslot + 1); goto more_to_do; } } ); //we are finished with this object *place = 0; *(place+1) = 0; more_to_do:; } else { dprintf (3,("mark stack overflow for object %Ix ", (size_t)oo)); background_min_overflow_address = min (background_min_overflow_address, oo); background_max_overflow_address = max (background_max_overflow_address, oo); } } } #ifdef SORT_MARK_STACK if (background_mark_stack_tos > sorted_tos + mark_stack_array_length/8) { rqsort1 (sorted_tos, background_mark_stack_tos-1); sorted_tos = background_mark_stack_tos-1; } #endif //SORT_MARK_STACK #ifdef COLLECTIBLE_CLASS next_level: #endif // COLLECTIBLE_CLASS allow_fgc(); if (!(background_mark_stack_tos == background_mark_stack_array)) { oo = *(--background_mark_stack_tos); #ifdef SORT_MARK_STACK sorted_tos = (uint8_t**)min ((size_t)sorted_tos, (size_t)background_mark_stack_tos); #endif //SORT_MARK_STACK } else break; } assert (background_mark_stack_tos == background_mark_stack_array); } //this version is different than the foreground GC because //it can't keep pointers to the inside of an object //while calling background_mark_simple1. The object could be moved //by an intervening foreground gc. //this method assumes that *po is in the [low. high[ range void gc_heap::background_mark_simple (uint8_t* o THREAD_NUMBER_DCL) { #ifdef MULTIPLE_HEAPS #else //MULTIPLE_HEAPS const int thread = 0; #endif //MULTIPLE_HEAPS { dprintf (3, ("bmarking %Ix", o)); if (background_mark1 (o)) { //m_boundary (o); size_t s = size (o); bpromoted_bytes (thread) += s; if (contain_pointers_or_collectible (o)) { background_mark_simple1 (o THREAD_NUMBER_ARG); } } allow_fgc(); } } inline uint8_t* gc_heap::background_mark_object (uint8_t* o THREAD_NUMBER_DCL) { if ((o >= background_saved_lowest_address) && (o < background_saved_highest_address)) { background_mark_simple (o THREAD_NUMBER_ARG); } else { if (o) { dprintf (3, ("or-%Ix", o)); } } return o; } void gc_heap::background_promote (Object** ppObject, ScanContext* sc, uint32_t flags) { UNREFERENCED_PARAMETER(sc); //in order to save space on the array, mark the object, //knowing that it will be visited later assert (settings.concurrent); THREAD_NUMBER_FROM_CONTEXT; #ifndef MULTIPLE_HEAPS const int thread = 0; #endif //!MULTIPLE_HEAPS uint8_t* o = (uint8_t*)*ppObject; if (o == 0) return; #ifdef DEBUG_DestroyedHandleValue // we can race with destroy handle during concurrent scan if (o == (uint8_t*)DEBUG_DestroyedHandleValue) return; #endif //DEBUG_DestroyedHandleValue HEAP_FROM_THREAD; gc_heap* hp = gc_heap::heap_of (o); if ((o < hp->background_saved_lowest_address) || (o >= hp->background_saved_highest_address)) { return; } if (flags & GC_CALL_INTERIOR) { o = hp->find_object (o); if (o == 0) return; } #ifdef FEATURE_CONSERVATIVE_GC // For conservative GC, a value on stack may point to middle of a free object. // In this case, we don't need to promote the pointer. if (GCConfig::GetConservativeGC() && ((CObjectHeader*)o)->IsFree()) { return; } #endif //FEATURE_CONSERVATIVE_GC #ifdef _DEBUG ((CObjectHeader*)o)->Validate(); #endif //_DEBUG //needs to be called before the marking because it is possible for a foreground //gc to take place during the mark and move the object STRESS_LOG3(LF_GC|LF_GCROOTS, LL_INFO1000000, " GCHeap::Promote: Promote GC Root *%p = %p MT = %pT", ppObject, o, o ? ((Object*) o)->GetGCSafeMethodTable() : NULL); hpt->background_mark_simple (o THREAD_NUMBER_ARG); } //used by the ephemeral collection to scan the local background structures //containing references. void gc_heap::scan_background_roots (promote_func* fn, int hn, ScanContext *pSC) { ScanContext sc; if (pSC == 0) pSC = ≻ pSC->thread_number = hn; BOOL relocate_p = (fn == &GCHeap::Relocate); dprintf (3, ("Scanning background mark list")); //scan mark_list size_t mark_list_finger = 0; while (mark_list_finger < c_mark_list_index) { uint8_t** o = &c_mark_list [mark_list_finger]; if (!relocate_p) { // We may not be able to calculate the size during relocate as POPO // may have written over the object. size_t s = size (*o); assert (Align (s) >= Align (min_obj_size)); dprintf(3,("background root %Ix", (size_t)*o)); } (*fn) ((Object**)o, pSC, 0); mark_list_finger++; } //scan the mark stack dprintf (3, ("Scanning background mark stack")); uint8_t** finger = background_mark_stack_array; while (finger < background_mark_stack_tos) { if ((finger + 1) < background_mark_stack_tos) { // We need to check for the partial mark case here. uint8_t* parent_obj = *(finger + 1); if ((size_t)parent_obj & 1) { uint8_t* place = *finger; size_t place_offset = 0; uint8_t* real_parent_obj = (uint8_t*)((size_t)parent_obj & ~1); if (relocate_p) { *(finger + 1) = real_parent_obj; place_offset = place - real_parent_obj; dprintf(3,("relocating background root %Ix", (size_t)real_parent_obj)); (*fn) ((Object**)(finger + 1), pSC, 0); real_parent_obj = *(finger + 1); *finger = real_parent_obj + place_offset; *(finger + 1) = (uint8_t*)((size_t)real_parent_obj | 1); dprintf(3,("roots changed to %Ix, %Ix", *finger, *(finger + 1))); } else { uint8_t** temp = &real_parent_obj; dprintf(3,("marking background root %Ix", (size_t)real_parent_obj)); (*fn) ((Object**)temp, pSC, 0); } finger += 2; continue; } } dprintf(3,("background root %Ix", (size_t)*finger)); (*fn) ((Object**)finger, pSC, 0); finger++; } } void gc_heap::grow_bgc_mark_stack (size_t new_size) { if ((background_mark_stack_array_length < new_size) && ((new_size - background_mark_stack_array_length) > (background_mark_stack_array_length / 2))) { dprintf (2, ("h%d: ov grow to %Id", heap_number, new_size)); uint8_t** tmp = new (nothrow) uint8_t* [new_size]; if (tmp) { delete [] background_mark_stack_array; background_mark_stack_array = tmp; background_mark_stack_array_length = new_size; background_mark_stack_tos = background_mark_stack_array; } } } void gc_heap::check_bgc_mark_stack_length() { if ((settings.condemned_generation < (max_generation - 1)) || gc_heap::background_running_p()) return; size_t total_heap_size = get_total_heap_size(); if (total_heap_size < ((size_t)4*1024*1024*1024)) return; #ifdef MULTIPLE_HEAPS int total_heaps = n_heaps; #else int total_heaps = 1; #endif //MULTIPLE_HEAPS size_t size_based_on_heap = total_heap_size / (size_t)(100 * 100 * total_heaps * sizeof (uint8_t*)); size_t new_size = max (background_mark_stack_array_length, size_based_on_heap); grow_bgc_mark_stack (new_size); } uint8_t* gc_heap::background_seg_end (heap_segment* seg, BOOL concurrent_p) { #ifndef USE_REGIONS if (concurrent_p && (seg == saved_overflow_ephemeral_seg)) { // for now we stop at where gen1 started when we started processing return background_min_soh_overflow_address; } else #endif //!USE_REGIONS { return heap_segment_allocated (seg); } } uint8_t* gc_heap::background_first_overflow (uint8_t* min_add, heap_segment* seg, BOOL concurrent_p, BOOL small_object_p) { uint8_t* o = 0; if (small_object_p) { #ifdef USE_REGIONS return find_first_object (min_add, heap_segment_mem (seg)); #else if (in_range_for_segment (min_add, seg)) { // min_add was the beginning of gen1 when we did the concurrent // overflow. Now we could be in a situation where min_add is // actually the same as allocated for that segment (because // we expanded heap), in which case we can not call // find first on this address or we will AV. if (min_add >= heap_segment_allocated (seg)) { return min_add; } else { if (concurrent_p && ((seg == saved_overflow_ephemeral_seg) && (min_add >= background_min_soh_overflow_address))) { return background_min_soh_overflow_address; } else { o = find_first_object (min_add, heap_segment_mem (seg)); return o; } } } #endif //USE_REGIONS } o = max (heap_segment_mem (seg), min_add); return o; } void gc_heap::background_process_mark_overflow_internal (uint8_t* min_add, uint8_t* max_add, BOOL concurrent_p) { if (concurrent_p) { current_bgc_state = bgc_overflow_soh; } size_t total_marked_objects = 0; #ifdef MULTIPLE_HEAPS int thread = heap_number; #endif //MULTIPLE_HEAPS int start_gen_idx = get_start_generation_index(); #ifdef USE_REGIONS if (concurrent_p) start_gen_idx = max_generation; #endif //USE_REGIONS exclusive_sync* loh_alloc_lock = 0; dprintf (2,("Processing Mark overflow [%Ix %Ix]", (size_t)min_add, (size_t)max_add)); #ifdef MULTIPLE_HEAPS // We don't have each heap scan all heaps concurrently because we are worried about // multiple threads calling things like find_first_object. int h_start = (concurrent_p ? heap_number : 0); int h_end = (concurrent_p ? (heap_number + 1) : n_heaps); for (int hi = h_start; hi < h_end; hi++) { gc_heap* hp = (concurrent_p ? this : g_heaps [(heap_number + hi) % n_heaps]); #else { gc_heap* hp = 0; #endif //MULTIPLE_HEAPS BOOL small_object_segments = TRUE; loh_alloc_lock = hp->bgc_alloc_lock; for (int i = start_gen_idx; i < total_generation_count; i++) { int align_const = get_alignment_constant (small_object_segments); generation* gen = hp->generation_of (i); heap_segment* seg = heap_segment_in_range (generation_start_segment (gen)); PREFIX_ASSUME(seg != NULL); uint8_t* current_min_add = min_add; uint8_t* current_max_add = max_add; while (seg) { #ifdef USE_REGIONS if (heap_segment_overflow_p (seg)) { assert (!concurrent_p); current_min_add = max (heap_segment_mem (seg), min_add); current_max_add = min (heap_segment_allocated (seg), max_add); } #endif //USE_REGIONS uint8_t* o = hp->background_first_overflow (current_min_add, seg, concurrent_p, small_object_segments); while ((o < hp->background_seg_end (seg, concurrent_p)) && (o <= current_max_add)) { dprintf (3, ("considering %Ix", (size_t)o)); size_t s; if (concurrent_p && !small_object_segments) { loh_alloc_lock->bgc_mark_set (o); if (((CObjectHeader*)o)->IsFree()) { s = unused_array_size (o); } else { s = size (o); } } else { s = size (o); } if (background_object_marked (o, FALSE) && contain_pointers_or_collectible (o)) { total_marked_objects++; go_through_object_cl (method_table(o), o, s, poo, uint8_t* oo = *poo; background_mark_object (oo THREAD_NUMBER_ARG); ); } if (concurrent_p && !small_object_segments) { loh_alloc_lock->bgc_mark_done (); } o = o + Align (s, align_const); if (concurrent_p) { allow_fgc(); } } dprintf (2, ("went through overflow objects in segment %Ix (%d) (so far %Id marked)", heap_segment_mem (seg), (small_object_segments ? 0 : 1), total_marked_objects)); #ifndef USE_REGIONS if (concurrent_p && (seg == hp->saved_overflow_ephemeral_seg)) { break; } #endif //USE_REGIONS seg = heap_segment_next_in_range (seg); } if (concurrent_p) { current_bgc_state = bgc_overflow_uoh; } dprintf (2, ("h%d: SOH: ov-mo: %Id", heap_number, total_marked_objects)); fire_overflow_event (min_add, max_add, total_marked_objects, i); if (small_object_segments) { concurrent_print_time_delta (concurrent_p ? "Cov SOH" : "Nov SOH"); } total_marked_objects = 0; small_object_segments = FALSE; } } } BOOL gc_heap::background_process_mark_overflow (BOOL concurrent_p) { BOOL grow_mark_array_p = TRUE; if (concurrent_p) { assert (!processed_eph_overflow_p); if ((background_max_overflow_address != 0) && (background_min_overflow_address != MAX_PTR)) { #ifdef USE_REGIONS // We don't want to step into the ephemeral regions so remember these regions and // be sure to process them later. An FGC cannot happen while we are going through // the region lists. for (int i = 0; i < max_generation; i++) { heap_segment* region = generation_start_segment (generation_of (i)); while (region) { if ((heap_segment_mem (region) <= background_max_overflow_address) && (heap_segment_allocated (region) >= background_min_overflow_address)) { region->flags |= heap_segment_flags_overflow; } region = heap_segment_next (region); } } #else //USE_REGIONS // We have overflow to process but we know we can't process the ephemeral generations // now (we actually could process till the current gen1 start but since we are going to // make overflow per segment, for now I'll just stop at the saved gen1 start. saved_overflow_ephemeral_seg = ephemeral_heap_segment; background_max_soh_overflow_address = heap_segment_reserved (saved_overflow_ephemeral_seg); background_min_soh_overflow_address = generation_allocation_start (generation_of (max_generation - 1)); #endif //USE_REGIONS } } else { #ifndef USE_REGIONS assert ((saved_overflow_ephemeral_seg == 0) || ((background_max_soh_overflow_address != 0) && (background_min_soh_overflow_address != MAX_PTR))); #endif //!USE_REGIONS if (!processed_eph_overflow_p) { // if there was no more overflow we just need to process what we didn't process // on the saved ephemeral segment. if ((background_max_overflow_address == 0) && (background_min_overflow_address == MAX_PTR)) { dprintf (2, ("final processing mark overflow - no more overflow since last time")); grow_mark_array_p = FALSE; } #ifndef USE_REGIONS background_min_overflow_address = min (background_min_overflow_address, background_min_soh_overflow_address); background_max_overflow_address = max (background_max_overflow_address, background_max_soh_overflow_address); #endif //!USE_REGIONS processed_eph_overflow_p = TRUE; } } BOOL overflow_p = FALSE; recheck: if ((! ((background_max_overflow_address == 0)) || ! ((background_min_overflow_address == MAX_PTR)))) { overflow_p = TRUE; if (grow_mark_array_p) { // Try to grow the array. size_t new_size = max (MARK_STACK_INITIAL_LENGTH, 2*background_mark_stack_array_length); if ((new_size * sizeof(mark)) > 100*1024) { size_t new_max_size = (get_total_heap_size() / 10) / sizeof(mark); new_size = min(new_max_size, new_size); } grow_bgc_mark_stack (new_size); } else { grow_mark_array_p = TRUE; } uint8_t* min_add = background_min_overflow_address; uint8_t* max_add = background_max_overflow_address; background_max_overflow_address = 0; background_min_overflow_address = MAX_PTR; background_process_mark_overflow_internal (min_add, max_add, concurrent_p); if (!concurrent_p) { goto recheck; } } return overflow_p; } #endif //BACKGROUND_GC inline void gc_heap::mark_through_object (uint8_t* oo, BOOL mark_class_object_p THREAD_NUMBER_DCL) { #ifndef COLLECTIBLE_CLASS UNREFERENCED_PARAMETER(mark_class_object_p); BOOL to_mark_class_object = FALSE; #else //COLLECTIBLE_CLASS BOOL to_mark_class_object = (mark_class_object_p && (is_collectible(oo))); #endif //COLLECTIBLE_CLASS if (contain_pointers (oo) || to_mark_class_object) { dprintf(3,( "Marking through %Ix", (size_t)oo)); size_t s = size (oo); #ifdef COLLECTIBLE_CLASS if (to_mark_class_object) { uint8_t* class_obj = get_class_object (oo); mark_object (class_obj THREAD_NUMBER_ARG); } #endif //COLLECTIBLE_CLASS if (contain_pointers (oo)) { go_through_object_nostart (method_table(oo), oo, s, po, uint8_t* o = *po; mark_object (o THREAD_NUMBER_ARG); ); } } } size_t gc_heap::get_total_heap_size() { size_t total_heap_size = 0; // It's correct to start from max_generation for this method because // generation_sizes will return all SOH sizes when passed max_generation. #ifdef MULTIPLE_HEAPS int hn = 0; for (hn = 0; hn < gc_heap::n_heaps; hn++) { gc_heap* hp2 = gc_heap::g_heaps [hn]; for (int i = max_generation; i < total_generation_count; i++) { total_heap_size += hp2->generation_sizes (hp2->generation_of (i)); } } #else for (int i = max_generation; i < total_generation_count; i++) { total_heap_size += generation_sizes (generation_of (i)); } #endif //MULTIPLE_HEAPS return total_heap_size; } size_t gc_heap::get_total_fragmentation() { size_t total_fragmentation = 0; #ifdef MULTIPLE_HEAPS for (int hn = 0; hn < gc_heap::n_heaps; hn++) { gc_heap* hp = gc_heap::g_heaps[hn]; #else //MULTIPLE_HEAPS { gc_heap* hp = pGenGCHeap; #endif //MULTIPLE_HEAPS for (int i = 0; i < total_generation_count; i++) { generation* gen = hp->generation_of (i); total_fragmentation += (generation_free_list_space (gen) + generation_free_obj_space (gen)); } } return total_fragmentation; } size_t gc_heap::get_total_gen_fragmentation (int gen_number) { size_t total_fragmentation = 0; #ifdef MULTIPLE_HEAPS for (int hn = 0; hn < gc_heap::n_heaps; hn++) { gc_heap* hp = gc_heap::g_heaps[hn]; #else //MULTIPLE_HEAPS { gc_heap* hp = pGenGCHeap; #endif //MULTIPLE_HEAPS generation* gen = hp->generation_of (gen_number); total_fragmentation += (generation_free_list_space (gen) + generation_free_obj_space (gen)); } return total_fragmentation; } size_t gc_heap::get_total_gen_estimated_reclaim (int gen_number) { size_t total_estimated_reclaim = 0; #ifdef MULTIPLE_HEAPS for (int hn = 0; hn < gc_heap::n_heaps; hn++) { gc_heap* hp = gc_heap::g_heaps[hn]; #else //MULTIPLE_HEAPS { gc_heap* hp = pGenGCHeap; #endif //MULTIPLE_HEAPS total_estimated_reclaim += hp->estimated_reclaim (gen_number); } return total_estimated_reclaim; } size_t gc_heap::get_total_gen_size (int gen_number) { #ifdef MULTIPLE_HEAPS size_t size = 0; for (int hn = 0; hn < gc_heap::n_heaps; hn++) { gc_heap* hp = gc_heap::g_heaps[hn]; size += hp->generation_size (gen_number); } #else size_t size = generation_size (gen_number); #endif //MULTIPLE_HEAPS return size; } size_t gc_heap::committed_size() { size_t total_committed = 0; for (int i = get_start_generation_index(); i < total_generation_count; i++) { generation* gen = generation_of (i); heap_segment* seg = heap_segment_rw (generation_start_segment (gen)); while (seg) { total_committed += heap_segment_committed (seg) - #ifdef USE_REGIONS get_region_start (seg); #else (uint8_t*)seg; #endif //USE_REGIONS seg = heap_segment_next (seg); } } #ifdef USE_REGIONS total_committed += committed_in_free; #endif //USE_REGIO return total_committed; } size_t gc_heap::get_total_committed_size() { size_t total_committed = 0; #ifdef MULTIPLE_HEAPS int hn = 0; for (hn = 0; hn < gc_heap::n_heaps; hn++) { gc_heap* hp = gc_heap::g_heaps [hn]; total_committed += hp->committed_size(); } #else total_committed = committed_size(); #endif //MULTIPLE_HEAPS return total_committed; } size_t gc_heap::uoh_committed_size (int gen_number, size_t* allocated) { generation* gen = generation_of (gen_number); heap_segment* seg = heap_segment_rw (generation_start_segment (gen)); size_t total_committed = 0; size_t total_allocated = 0; while (seg) { uint8_t* start = #ifdef USE_REGIONS get_region_start (seg); #else (uint8_t*)seg; #endif //USE_REGIONS total_committed += heap_segment_committed (seg) - start; total_allocated += heap_segment_allocated (seg) - start; seg = heap_segment_next (seg); } *allocated = total_allocated; return total_committed; } void gc_heap::get_memory_info (uint32_t* memory_load, uint64_t* available_physical, uint64_t* available_page_file) { GCToOSInterface::GetMemoryStatus(is_restricted_physical_mem ? total_physical_mem : 0, memory_load, available_physical, available_page_file); } //returns TRUE is an overflow happened. BOOL gc_heap::process_mark_overflow(int condemned_gen_number) { size_t last_promoted_bytes = get_promoted_bytes(); BOOL overflow_p = FALSE; recheck: if ((! (max_overflow_address == 0) || ! (min_overflow_address == MAX_PTR))) { overflow_p = TRUE; // Try to grow the array. size_t new_size = max (MARK_STACK_INITIAL_LENGTH, 2*mark_stack_array_length); if ((new_size * sizeof(mark)) > 100*1024) { size_t new_max_size = (get_total_heap_size() / 10) / sizeof(mark); new_size = min(new_max_size, new_size); } if ((mark_stack_array_length < new_size) && ((new_size - mark_stack_array_length) > (mark_stack_array_length / 2))) { mark* tmp = new (nothrow) mark [new_size]; if (tmp) { delete mark_stack_array; mark_stack_array = tmp; mark_stack_array_length = new_size; } } uint8_t* min_add = min_overflow_address; uint8_t* max_add = max_overflow_address; max_overflow_address = 0; min_overflow_address = MAX_PTR; process_mark_overflow_internal (condemned_gen_number, min_add, max_add); goto recheck; } size_t current_promoted_bytes = get_promoted_bytes(); if (current_promoted_bytes != last_promoted_bytes) fire_mark_event (ETW::GC_ROOT_OVERFLOW, current_promoted_bytes, last_promoted_bytes); return overflow_p; } void gc_heap::process_mark_overflow_internal (int condemned_gen_number, uint8_t* min_add, uint8_t* max_add) { #ifdef MULTIPLE_HEAPS int thread = heap_number; #endif //MULTIPLE_HEAPS BOOL full_p = (condemned_gen_number == max_generation); dprintf(3,("Processing Mark overflow [%Ix %Ix]", (size_t)min_add, (size_t)max_add)); size_t obj_count = 0; #ifdef MULTIPLE_HEAPS for (int hi = 0; hi < n_heaps; hi++) { gc_heap* hp = g_heaps [(heap_number + hi) % n_heaps]; #else { gc_heap* hp = 0; #endif //MULTIPLE_HEAPS int gen_limit = full_p ? total_generation_count : condemned_gen_number + 1; for (int i = get_stop_generation_index (condemned_gen_number); i < gen_limit; i++) { generation* gen = hp->generation_of (i); heap_segment* seg = heap_segment_in_range (generation_start_segment (gen)); int align_const = get_alignment_constant (i < uoh_start_generation); PREFIX_ASSUME(seg != NULL); while (seg) { uint8_t* o = max (heap_segment_mem (seg), min_add); uint8_t* end = heap_segment_allocated (seg); while ((o < end) && (o <= max_add)) { assert ((min_add <= o) && (max_add >= o)); dprintf (3, ("considering %Ix", (size_t)o)); if (marked (o)) { mark_through_object (o, TRUE THREAD_NUMBER_ARG); obj_count++; } o = o + Align (size (o), align_const); } seg = heap_segment_next_in_range (seg); } } #ifndef MULTIPLE_HEAPS // we should have found at least one object assert (obj_count > 0); #endif //MULTIPLE_HEAPS } } // Scanning for promotion for dependent handles need special handling. Because the primary holds a strong // reference to the secondary (when the primary itself is reachable) and this can cause a cascading series of // promotions (the secondary of one handle is or promotes the primary of another) we might need to perform the // promotion scan multiple times. // This helper encapsulates the logic to complete all dependent handle promotions when running a server GC. It // also has the effect of processing any mark stack overflow. #ifdef MULTIPLE_HEAPS // When multiple heaps are enabled we have must utilize a more complex algorithm in order to keep all the GC // worker threads synchronized. The algorithms are sufficiently divergent that we have different // implementations based on whether MULTIPLE_HEAPS is defined or not. // // Define some static variables used for synchronization in the method below. These should really be defined // locally but MSVC complains when the VOLATILE macro is expanded into an instantiation of the Volatile class. // // A note about the synchronization used within this method. Communication between the worker threads is // achieved via two shared booleans (defined below). These both act as latches that are transitioned only from // false -> true by unsynchronized code. They are only read or reset to false by a single thread under the // protection of a join. static VOLATILE(BOOL) s_fUnpromotedHandles = FALSE; static VOLATILE(BOOL) s_fUnscannedPromotions = FALSE; static VOLATILE(BOOL) s_fScanRequired; void gc_heap::scan_dependent_handles (int condemned_gen_number, ScanContext *sc, BOOL initial_scan_p) { // Whenever we call this method there may have been preceding object promotions. So set // s_fUnscannedPromotions unconditionally (during further iterations of the scanning loop this will be set // based on the how the scanning proceeded). s_fUnscannedPromotions = TRUE; // We don't know how many times we need to loop yet. In particular we can't base the loop condition on // the state of this thread's portion of the dependent handle table. That's because promotions on other // threads could cause handle promotions to become necessary here. Even if there are definitely no more // promotions possible in this thread's handles, we still have to stay in lock-step with those worker // threads that haven't finished yet (each GC worker thread has to join exactly the same number of times // as all the others or they'll get out of step). while (true) { // The various worker threads are all currently racing in this code. We need to work out if at least // one of them think they have work to do this cycle. Each thread needs to rescan its portion of the // dependent handle table when both of the following conditions apply: // 1) At least one (arbitrary) object might have been promoted since the last scan (because if this // object happens to correspond to a primary in one of our handles we might potentially have to // promote the associated secondary). // 2) The table for this thread has at least one handle with a secondary that isn't promoted yet. // // The first condition is represented by s_fUnscannedPromotions. This is always non-zero for the first // iteration of this loop (see comment above) and in subsequent cycles each thread updates this // whenever a mark stack overflow occurs or scanning their dependent handles results in a secondary // being promoted. This value is cleared back to zero in a synchronized fashion in the join that // follows below. Note that we can't read this outside of the join since on any iteration apart from // the first threads will be racing between reading this value and completing their previous // iteration's table scan. // // The second condition is tracked by the dependent handle code itself on a per worker thread basis // (and updated by the GcDhReScan() method). We call GcDhUnpromotedHandlesExist() on each thread to // determine the local value and collect the results into the s_fUnpromotedHandles variable in what is // effectively an OR operation. As per s_fUnscannedPromotions we can't read the final result until // we're safely joined. if (GCScan::GcDhUnpromotedHandlesExist(sc)) s_fUnpromotedHandles = TRUE; // Synchronize all the threads so we can read our state variables safely. The shared variable // s_fScanRequired, indicating whether we should scan the tables or terminate the loop, will be set by // a single thread inside the join. gc_t_join.join(this, gc_join_scan_dependent_handles); if (gc_t_join.joined()) { // We're synchronized so it's safe to read our shared state variables. We update another shared // variable to indicate to all threads whether we'll be scanning for another cycle or terminating // the loop. We scan if there has been at least one object promotion since last time and at least // one thread has a dependent handle table with a potential handle promotion possible. s_fScanRequired = s_fUnscannedPromotions && s_fUnpromotedHandles; // Reset our shared state variables (ready to be set again on this scan or with a good initial // value for the next call if we're terminating the loop). s_fUnscannedPromotions = FALSE; s_fUnpromotedHandles = FALSE; if (!s_fScanRequired) { // We're terminating the loop. Perform any last operations that require single threaded access. if (!initial_scan_p) { // On the second invocation we reconcile all mark overflow ranges across the heaps. This can help // load balance if some of the heaps have an abnormally large workload. uint8_t* all_heaps_max = 0; uint8_t* all_heaps_min = MAX_PTR; int i; for (i = 0; i < n_heaps; i++) { if (all_heaps_max < g_heaps[i]->max_overflow_address) all_heaps_max = g_heaps[i]->max_overflow_address; if (all_heaps_min > g_heaps[i]->min_overflow_address) all_heaps_min = g_heaps[i]->min_overflow_address; } for (i = 0; i < n_heaps; i++) { g_heaps[i]->max_overflow_address = all_heaps_max; g_heaps[i]->min_overflow_address = all_heaps_min; } } } dprintf(3, ("Starting all gc thread mark stack overflow processing")); gc_t_join.restart(); } // Handle any mark stack overflow: scanning dependent handles relies on all previous object promotions // being visible. If there really was an overflow (process_mark_overflow returns true) then set the // global flag indicating that at least one object promotion may have occurred (the usual comment // about races applies). (Note it's OK to set this flag even if we're about to terminate the loop and // exit the method since we unconditionally set this variable on method entry anyway). if (process_mark_overflow(condemned_gen_number)) s_fUnscannedPromotions = TRUE; // If we decided that no scan was required we can terminate the loop now. if (!s_fScanRequired) break; // Otherwise we must join with the other workers to ensure that all mark stack overflows have been // processed before we start scanning dependent handle tables (if overflows remain while we scan we // could miss noting the promotion of some primary objects). gc_t_join.join(this, gc_join_rescan_dependent_handles); if (gc_t_join.joined()) { dprintf(3, ("Starting all gc thread for dependent handle promotion")); gc_t_join.restart(); } // If the portion of the dependent handle table managed by this worker has handles that could still be // promoted perform a rescan. If the rescan resulted in at least one promotion note this fact since it // could require a rescan of handles on this or other workers. if (GCScan::GcDhUnpromotedHandlesExist(sc)) if (GCScan::GcDhReScan(sc)) s_fUnscannedPromotions = TRUE; } } #else //MULTIPLE_HEAPS // Non-multiple heap version of scan_dependent_handles: much simpler without the need to keep multiple worker // threads synchronized. void gc_heap::scan_dependent_handles (int condemned_gen_number, ScanContext *sc, BOOL initial_scan_p) { UNREFERENCED_PARAMETER(initial_scan_p); // Whenever we call this method there may have been preceding object promotions. So set // fUnscannedPromotions unconditionally (during further iterations of the scanning loop this will be set // based on the how the scanning proceeded). bool fUnscannedPromotions = true; // Loop until there are either no more dependent handles that can have their secondary promoted or we've // managed to perform a scan without promoting anything new. while (GCScan::GcDhUnpromotedHandlesExist(sc) && fUnscannedPromotions) { // On each iteration of the loop start with the assumption that no further objects have been promoted. fUnscannedPromotions = false; // Handle any mark stack overflow: scanning dependent handles relies on all previous object promotions // being visible. If there was an overflow (process_mark_overflow returned true) then additional // objects now appear to be promoted and we should set the flag. if (process_mark_overflow(condemned_gen_number)) fUnscannedPromotions = true; // Perform the scan and set the flag if any promotions resulted. if (GCScan::GcDhReScan(sc)) fUnscannedPromotions = true; } // Process any mark stack overflow that may have resulted from scanning handles (or if we didn't need to // scan any handles at all this is the processing of overflows that may have occurred prior to this method // invocation). process_mark_overflow(condemned_gen_number); } #endif //MULTIPLE_HEAPS size_t gc_heap::get_generation_start_size (int gen_number) { #ifdef USE_REGIONS return 0; #else return Align (size (generation_allocation_start (generation_of (gen_number))), get_alignment_constant (gen_number <= max_generation)); #endif //!USE_REGIONS } inline int gc_heap::get_num_heaps() { #ifdef MULTIPLE_HEAPS return n_heaps; #else return 1; #endif //MULTIPLE_HEAPS } BOOL gc_heap::decide_on_promotion_surv (size_t threshold) { #ifdef MULTIPLE_HEAPS for (int i = 0; i < gc_heap::n_heaps; i++) { gc_heap* hp = gc_heap::g_heaps[i]; #else //MULTIPLE_HEAPS { gc_heap* hp = pGenGCHeap; #endif //MULTIPLE_HEAPS dynamic_data* dd = hp->dynamic_data_of (min ((settings.condemned_generation + 1), max_generation)); size_t older_gen_size = dd_current_size (dd) + (dd_desired_allocation (dd) - dd_new_allocation (dd)); size_t promoted = hp->total_promoted_bytes; dprintf (2, ("promotion threshold: %Id, promoted bytes: %Id size n+1: %Id", threshold, promoted, older_gen_size)); if ((threshold > (older_gen_size)) || (promoted > threshold)) { return TRUE; } } return FALSE; } inline void gc_heap::fire_mark_event (int root_type, size_t& current_promoted_bytes, size_t& last_promoted_bytes) { #ifdef FEATURE_EVENT_TRACE if (informational_event_enabled_p) { current_promoted_bytes = get_promoted_bytes(); size_t root_promoted = current_promoted_bytes - last_promoted_bytes; dprintf (3, ("h%d marked root %s: %Id (%Id - %Id)", heap_number, str_root_kinds[root_type], root_promoted, current_promoted_bytes, last_promoted_bytes)); FIRE_EVENT(GCMarkWithType, heap_number, root_type, root_promoted); last_promoted_bytes = current_promoted_bytes; } #endif // FEATURE_EVENT_TRACE } #ifdef FEATURE_EVENT_TRACE inline void gc_heap::record_mark_time (uint64_t& mark_time, uint64_t& current_mark_time, uint64_t& last_mark_time) { if (informational_event_enabled_p) { current_mark_time = GetHighPrecisionTimeStamp(); mark_time = limit_time_to_uint32 (current_mark_time - last_mark_time); dprintf (3, ("%I64d - %I64d = %I64d", current_mark_time, last_mark_time, (current_mark_time - last_mark_time))); last_mark_time = current_mark_time; } } #endif // FEATURE_EVENT_TRACE void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p) { assert (settings.concurrent == FALSE); ScanContext sc; sc.thread_number = heap_number; sc.promotion = TRUE; sc.concurrent = FALSE; dprintf (2, (ThreadStressLog::gcStartMarkMsg(), heap_number, condemned_gen_number)); BOOL full_p = (condemned_gen_number == max_generation); int gen_to_init = condemned_gen_number; if (condemned_gen_number == max_generation) { gen_to_init = total_generation_count - 1; } for (int gen_idx = 0; gen_idx <= gen_to_init; gen_idx++) { dynamic_data* dd = dynamic_data_of (gen_idx); dd_begin_data_size (dd) = generation_size (gen_idx) - dd_fragmentation (dd) - #ifdef USE_REGIONS 0; #else get_generation_start_size (gen_idx); #endif //USE_REGIONS dprintf (2, ("begin data size for gen%d is %Id", gen_idx, dd_begin_data_size (dd))); dd_survived_size (dd) = 0; dd_pinned_survived_size (dd) = 0; dd_artificial_pinned_survived_size (dd) = 0; dd_added_pinned_size (dd) = 0; #ifdef SHORT_PLUGS dd_padding_size (dd) = 0; #endif //SHORT_PLUGS #if defined (RESPECT_LARGE_ALIGNMENT) || defined (FEATURE_STRUCTALIGN) dd_num_npinned_plugs (dd) = 0; #endif //RESPECT_LARGE_ALIGNMENT || FEATURE_STRUCTALIGN } if (gen0_must_clear_bricks > 0) gen0_must_clear_bricks--; size_t last_promoted_bytes = 0; size_t current_promoted_bytes = 0; #if !defined(USE_REGIONS) || defined(_DEBUG) init_promoted_bytes(); #endif //!USE_REGIONS || _DEBUG reset_mark_stack(); #ifdef SNOOP_STATS memset (&snoop_stat, 0, sizeof(snoop_stat)); snoop_stat.heap_index = heap_number; #endif //SNOOP_STATS #ifdef MH_SC_MARK if (full_p) { //initialize the mark stack for (int i = 0; i < max_snoop_level; i++) { ((uint8_t**)(mark_stack_array))[i] = 0; } mark_stack_busy() = 1; } #endif //MH_SC_MARK static uint32_t num_sizedrefs = 0; #ifdef MH_SC_MARK static BOOL do_mark_steal_p = FALSE; #endif //MH_SC_MARK #ifdef FEATURE_CARD_MARKING_STEALING reset_card_marking_enumerators(); #endif // FEATURE_CARD_MARKING_STEALING #ifdef STRESS_REGIONS heap_segment* gen0_region = generation_start_segment (generation_of (0)); while (gen0_region) { size_t gen0_region_size = heap_segment_allocated (gen0_region) - heap_segment_mem (gen0_region); if (gen0_region_size > 0) { if ((num_gen0_regions % pinning_seg_interval) == 0) { dprintf (REGIONS_LOG, ("h%d potentially creating pinning in region %Ix", heap_number, heap_segment_mem (gen0_region))); int align_const = get_alignment_constant (TRUE); // Pinning the first and the middle object in the region. uint8_t* boundary = heap_segment_mem (gen0_region); uint8_t* obj_to_pin = boundary; int num_pinned_objs = 0; while (obj_to_pin < heap_segment_allocated (gen0_region)) { if (obj_to_pin >= boundary && !((CObjectHeader*)obj_to_pin)->IsFree()) { pin_by_gc (obj_to_pin); num_pinned_objs++; if (num_pinned_objs >= 2) break; boundary += (gen0_region_size / 2) + 1; } obj_to_pin += Align (size (obj_to_pin), align_const); } } } num_gen0_regions++; gen0_region = heap_segment_next (gen0_region); } #endif //STRESS_REGIONS #ifdef FEATURE_EVENT_TRACE static uint64_t current_mark_time = 0; static uint64_t last_mark_time = 0; #endif //FEATURE_EVENT_TRACE #ifdef MULTIPLE_HEAPS gc_t_join.join(this, gc_join_begin_mark_phase); if (gc_t_join.joined()) #endif //MULTIPLE_HEAPS { maxgen_size_inc_p = false; #ifdef USE_REGIONS special_sweep_p = false; region_count = global_region_allocator.get_used_region_count(); grow_mark_list_piece(); #endif //USE_REGIONS GCToEEInterface::BeforeGcScanRoots(condemned_gen_number, /* is_bgc */ false, /* is_concurrent */ false); num_sizedrefs = GCToEEInterface::GetTotalNumSizedRefHandles(); #ifdef FEATURE_EVENT_TRACE informational_event_enabled_p = EVENT_ENABLED (GCMarkWithType); if (informational_event_enabled_p) { last_mark_time = GetHighPrecisionTimeStamp(); // We may not have SizedRefs to mark so init it to 0. gc_time_info[time_mark_sizedref] = 0; } #endif //FEATURE_EVENT_TRACE #ifdef MULTIPLE_HEAPS #ifdef MH_SC_MARK if (full_p) { size_t total_heap_size = get_total_heap_size(); if (total_heap_size > (100 * 1024 * 1024)) { do_mark_steal_p = TRUE; } else { do_mark_steal_p = FALSE; } } else { do_mark_steal_p = FALSE; } #endif //MH_SC_MARK gc_t_join.restart(); #endif //MULTIPLE_HEAPS } { //set up the mark lists from g_mark_list assert (g_mark_list); #ifdef MULTIPLE_HEAPS mark_list = &g_mark_list [heap_number*mark_list_size]; #else mark_list = g_mark_list; #endif //MULTIPLE_HEAPS //dont use the mark list for full gc //because multiple segments are more complex to handle and the list //is likely to overflow if (condemned_gen_number < max_generation) mark_list_end = &mark_list [mark_list_size-1]; else mark_list_end = &mark_list [0]; mark_list_index = &mark_list [0]; #ifdef USE_REGIONS if (g_mark_list_piece != nullptr) { #ifdef MULTIPLE_HEAPS // two arrays with alloc_count entries per heap mark_list_piece_start = &g_mark_list_piece[heap_number * 2 * g_mark_list_piece_size]; mark_list_piece_end = &mark_list_piece_start[g_mark_list_piece_size]; #endif //MULTIPLE_HEAPS survived_per_region = (size_t*)&g_mark_list_piece[heap_number * 2 * g_mark_list_piece_size]; old_card_survived_per_region = (size_t*)&survived_per_region[g_mark_list_piece_size]; size_t region_info_to_clear = region_count * sizeof (size_t); memset (survived_per_region, 0, region_info_to_clear); memset (old_card_survived_per_region, 0, region_info_to_clear); } else { #ifdef MULTIPLE_HEAPS // disable use of mark list altogether mark_list_piece_start = nullptr; mark_list_piece_end = nullptr; mark_list_end = &mark_list[0]; #endif //MULTIPLE_HEAPS survived_per_region = nullptr; old_card_survived_per_region = nullptr; } #endif // USE_REGIONS && MULTIPLE_HEAPS #ifndef MULTIPLE_HEAPS shigh = (uint8_t*) 0; slow = MAX_PTR; #endif //MULTIPLE_HEAPS if ((condemned_gen_number == max_generation) && (num_sizedrefs > 0)) { GCScan::GcScanSizedRefs(GCHeap::Promote, condemned_gen_number, max_generation, &sc); fire_mark_event (ETW::GC_ROOT_SIZEDREF, current_promoted_bytes, last_promoted_bytes); #ifdef MULTIPLE_HEAPS gc_t_join.join(this, gc_join_scan_sizedref_done); if (gc_t_join.joined()) #endif //MULTIPLE_HEAPS { #ifdef FEATURE_EVENT_TRACE record_mark_time (gc_time_info[time_mark_sizedref], current_mark_time, last_mark_time); #endif //FEATURE_EVENT_TRACE #ifdef MULTIPLE_HEAPS dprintf(3, ("Done with marking all sized refs. Starting all gc thread for marking other strong roots")); gc_t_join.restart(); #endif //MULTIPLE_HEAPS } } dprintf(3,("Marking Roots")); GCScan::GcScanRoots(GCHeap::Promote, condemned_gen_number, max_generation, &sc); fire_mark_event (ETW::GC_ROOT_STACK, current_promoted_bytes, last_promoted_bytes); #ifdef BACKGROUND_GC if (gc_heap::background_running_p()) { scan_background_roots (GCHeap::Promote, heap_number, &sc); fire_mark_event (ETW::GC_ROOT_BGC, current_promoted_bytes, last_promoted_bytes); } #endif //BACKGROUND_GC #ifdef FEATURE_PREMORTEM_FINALIZATION dprintf(3, ("Marking finalization data")); finalize_queue->GcScanRoots(GCHeap::Promote, heap_number, 0); fire_mark_event (ETW::GC_ROOT_FQ, current_promoted_bytes, last_promoted_bytes); #endif // FEATURE_PREMORTEM_FINALIZATION dprintf(3,("Marking handle table")); GCScan::GcScanHandles(GCHeap::Promote, condemned_gen_number, max_generation, &sc); fire_mark_event (ETW::GC_ROOT_HANDLES, current_promoted_bytes, last_promoted_bytes); if (!full_p) { #ifdef USE_REGIONS save_current_survived(); #endif //USE_REGIONS #ifdef FEATURE_CARD_MARKING_STEALING n_eph_soh = 0; n_gen_soh = 0; n_eph_loh = 0; n_gen_loh = 0; #endif //FEATURE_CARD_MARKING_STEALING #ifdef CARD_BUNDLE #ifdef MULTIPLE_HEAPS if (gc_t_join.r_join(this, gc_r_join_update_card_bundle)) { #endif //MULTIPLE_HEAPS #ifndef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES // If we are manually managing card bundles, every write to the card table should already be // accounted for in the card bundle table so there's nothing to update here. update_card_table_bundle(); #endif if (card_bundles_enabled()) { verify_card_bundles(); } #ifdef MULTIPLE_HEAPS gc_t_join.r_restart(); } #endif //MULTIPLE_HEAPS #endif //CARD_BUNDLE card_fn mark_object_fn = &gc_heap::mark_object_simple; #ifdef HEAP_ANALYZE heap_analyze_success = TRUE; if (heap_analyze_enabled) { internal_root_array_index = 0; current_obj = 0; current_obj_size = 0; mark_object_fn = &gc_heap::ha_mark_object_simple; } #endif //HEAP_ANALYZE #if defined(MULTIPLE_HEAPS) && defined(FEATURE_CARD_MARKING_STEALING) if (!card_mark_done_soh) #endif // MULTIPLE_HEAPS && FEATURE_CARD_MARKING_STEALING { dprintf (3, ("Marking cross generation pointers on heap %d", heap_number)); mark_through_cards_for_segments(mark_object_fn, FALSE THIS_ARG); #if defined(MULTIPLE_HEAPS) && defined(FEATURE_CARD_MARKING_STEALING) card_mark_done_soh = true; #endif // MULTIPLE_HEAPS && FEATURE_CARD_MARKING_STEALING } #if defined(MULTIPLE_HEAPS) && defined(FEATURE_CARD_MARKING_STEALING) if (!card_mark_done_uoh) #endif // MULTIPLE_HEAPS && FEATURE_CARD_MARKING_STEALING { dprintf (3, ("Marking cross generation pointers for uoh objects on heap %d", heap_number)); for (int i = uoh_start_generation; i < total_generation_count; i++) { #ifndef ALLOW_REFERENCES_IN_POH if (i != poh_generation) #endif //ALLOW_REFERENCES_IN_POH mark_through_cards_for_uoh_objects(mark_object_fn, i, FALSE THIS_ARG); } #if defined(MULTIPLE_HEAPS) && defined(FEATURE_CARD_MARKING_STEALING) card_mark_done_uoh = true; #endif // MULTIPLE_HEAPS && FEATURE_CARD_MARKING_STEALING } #if defined(MULTIPLE_HEAPS) && defined(FEATURE_CARD_MARKING_STEALING) // check the other heaps cyclically and try to help out where the marking isn't done for (int i = 0; i < gc_heap::n_heaps; i++) { int heap_number_to_look_at = (i + heap_number) % gc_heap::n_heaps; gc_heap* hp = gc_heap::g_heaps[heap_number_to_look_at]; if (!hp->card_mark_done_soh) { dprintf(3, ("Marking cross generation pointers on heap %d", hp->heap_number)); hp->mark_through_cards_for_segments(mark_object_fn, FALSE THIS_ARG); hp->card_mark_done_soh = true; } if (!hp->card_mark_done_uoh) { dprintf(3, ("Marking cross generation pointers for large objects on heap %d", hp->heap_number)); for (int i = uoh_start_generation; i < total_generation_count; i++) { #ifndef ALLOW_REFERENCES_IN_POH if (i != poh_generation) #endif //ALLOW_REFERENCES_IN_POH hp->mark_through_cards_for_uoh_objects(mark_object_fn, i, FALSE THIS_ARG); } hp->card_mark_done_uoh = true; } } #endif // MULTIPLE_HEAPS && FEATURE_CARD_MARKING_STEALING #ifdef USE_REGIONS update_old_card_survived(); #endif //USE_REGIONS fire_mark_event (ETW::GC_ROOT_OLDER, current_promoted_bytes, last_promoted_bytes); } } #ifdef MH_SC_MARK if (do_mark_steal_p) { mark_steal(); fire_mark_event (ETW::GC_ROOT_STEAL, current_promoted_bytes, last_promoted_bytes); } #endif //MH_SC_MARK // Dependent handles need to be scanned with a special algorithm (see the header comment on // scan_dependent_handles for more detail). We perform an initial scan without synchronizing with other // worker threads or processing any mark stack overflow. This is not guaranteed to complete the operation // but in a common case (where there are no dependent handles that are due to be collected) it allows us // to optimize away further scans. The call to scan_dependent_handles is what will cycle through more // iterations if required and will also perform processing of any mark stack overflow once the dependent // handle table has been fully promoted. GCScan::GcDhInitialScan(GCHeap::Promote, condemned_gen_number, max_generation, &sc); scan_dependent_handles(condemned_gen_number, &sc, true); fire_mark_event (ETW::GC_ROOT_DH_HANDLES, current_promoted_bytes, last_promoted_bytes); #ifdef MULTIPLE_HEAPS dprintf(3, ("Joining for short weak handle scan")); gc_t_join.join(this, gc_join_null_dead_short_weak); if (gc_t_join.joined()) #endif //MULTIPLE_HEAPS { #ifdef FEATURE_EVENT_TRACE record_mark_time (gc_time_info[time_mark_roots], current_mark_time, last_mark_time); #endif //FEATURE_EVENT_TRACE uint64_t promoted_bytes_global = 0; #ifdef HEAP_ANALYZE heap_analyze_enabled = FALSE; #ifdef MULTIPLE_HEAPS for (int i = 0; i < n_heaps; i++) { promoted_bytes_global += g_heaps[i]->get_promoted_bytes(); } #else promoted_bytes_global = get_promoted_bytes(); #endif //MULTIPLE_HEAPS GCToEEInterface::AnalyzeSurvivorsFinished (settings.gc_index, condemned_gen_number, promoted_bytes_global, GCHeap::ReportGenerationBounds); #endif // HEAP_ANALYZE GCToEEInterface::AfterGcScanRoots (condemned_gen_number, max_generation, &sc); #ifdef MULTIPLE_HEAPS if (!full_p) { // we used r_join and need to reinitialize states for it here. gc_t_join.r_init(); } dprintf(3, ("Starting all gc thread for short weak handle scan")); gc_t_join.restart(); #endif //MULTIPLE_HEAPS } #ifdef FEATURE_CARD_MARKING_STEALING reset_card_marking_enumerators(); if (!full_p) { int generation_skip_ratio_soh = ((n_eph_soh > MIN_SOH_CROSS_GEN_REFS) ? (int)(((float)n_gen_soh / (float)n_eph_soh) * 100) : 100); int generation_skip_ratio_loh = ((n_eph_loh > MIN_LOH_CROSS_GEN_REFS) ? (int)(((float)n_gen_loh / (float)n_eph_loh) * 100) : 100); generation_skip_ratio = min (generation_skip_ratio_soh, generation_skip_ratio_loh); dprintf (2, ("h%d skip ratio soh: %d, loh: %d", heap_number, generation_skip_ratio_soh, generation_skip_ratio_loh)); } #endif // FEATURE_CARD_MARKING_STEALING // null out the target of short weakref that were not promoted. GCScan::GcShortWeakPtrScan (condemned_gen_number, max_generation,&sc); #ifdef MULTIPLE_HEAPS dprintf(3, ("Joining for finalization")); gc_t_join.join(this, gc_join_scan_finalization); if (gc_t_join.joined()) { #endif //MULTIPLE_HEAPS #ifdef FEATURE_EVENT_TRACE record_mark_time (gc_time_info[time_mark_short_weak], current_mark_time, last_mark_time); #endif //FEATURE_EVENT_TRACE #ifdef MULTIPLE_HEAPS dprintf(3, ("Starting all gc thread for Finalization")); gc_t_join.restart(); } #endif //MULTIPLE_HEAPS //Handle finalization. size_t promoted_bytes_live = get_promoted_bytes(); #ifdef FEATURE_PREMORTEM_FINALIZATION dprintf (3, ("Finalize marking")); finalize_queue->ScanForFinalization (GCHeap::Promote, condemned_gen_number, mark_only_p, __this); fire_mark_event (ETW::GC_ROOT_NEW_FQ, current_promoted_bytes, last_promoted_bytes); GCToEEInterface::DiagWalkFReachableObjects(__this); // Scan dependent handles again to promote any secondaries associated with primaries that were promoted // for finalization. As before scan_dependent_handles will also process any mark stack overflow. scan_dependent_handles(condemned_gen_number, &sc, false); fire_mark_event (ETW::GC_ROOT_DH_HANDLES, current_promoted_bytes, last_promoted_bytes); #endif //FEATURE_PREMORTEM_FINALIZATION total_promoted_bytes = get_promoted_bytes(); #ifdef MULTIPLE_HEAPS static VOLATILE(int32_t) syncblock_scan_p; dprintf(3, ("Joining for weak pointer deletion")); gc_t_join.join(this, gc_join_null_dead_long_weak); if (gc_t_join.joined()) { dprintf(3, ("Starting all gc thread for weak pointer deletion")); #endif //MULTIPLE_HEAPS #ifdef FEATURE_EVENT_TRACE record_mark_time (gc_time_info[time_mark_scan_finalization], current_mark_time, last_mark_time); #endif //FEATURE_EVENT_TRACE #ifdef USE_REGIONS sync_promoted_bytes(); #endif //USE_REGIONS #ifdef MULTIPLE_HEAPS syncblock_scan_p = 0; gc_t_join.restart(); } #endif //MULTIPLE_HEAPS // null out the target of long weakref that were not promoted. GCScan::GcWeakPtrScan (condemned_gen_number, max_generation, &sc); #ifdef MULTIPLE_HEAPS size_t total_mark_list_size = sort_mark_list(); // first thread to finish sorting will scan the sync syncblk cache if ((syncblock_scan_p == 0) && (Interlocked::Increment(&syncblock_scan_p) == 1)) #endif //MULTIPLE_HEAPS { // scan for deleted entries in the syncblk cache GCScan::GcWeakPtrScanBySingleThread(condemned_gen_number, max_generation, &sc); } #ifdef MULTIPLE_HEAPS dprintf (3, ("Joining for sync block cache entry scanning")); gc_t_join.join(this, gc_join_null_dead_syncblk); if (gc_t_join.joined()) #endif //MULTIPLE_HEAPS { #ifdef FEATURE_EVENT_TRACE record_mark_time (gc_time_info[time_plan - 1], current_mark_time, last_mark_time); gc_time_info[time_plan] = last_mark_time; #endif //FEATURE_EVENT_TRACE //decide on promotion if (!settings.promotion) { size_t m = 0; for (int n = 0; n <= condemned_gen_number;n++) { #ifdef MULTIPLE_HEAPS m += (size_t)(dd_min_size (dynamic_data_of (n))*(n+1)*0.1); #else m += (size_t)(dd_min_size (dynamic_data_of (n))*(n+1)*0.06); #endif //MULTIPLE_HEAPS } settings.promotion = decide_on_promotion_surv (m); } #ifdef MULTIPLE_HEAPS #ifdef SNOOP_STATS if (do_mark_steal_p) { size_t objects_checked_count = 0; size_t zero_ref_count = 0; size_t objects_marked_count = 0; size_t check_level_count = 0; size_t busy_count = 0; size_t interlocked_count = 0; size_t partial_mark_parent_count = 0; size_t stolen_or_pm_count = 0; size_t stolen_entry_count = 0; size_t pm_not_ready_count = 0; size_t normal_count = 0; size_t stack_bottom_clear_count = 0; for (int i = 0; i < n_heaps; i++) { gc_heap* hp = g_heaps[i]; hp->print_snoop_stat(); objects_checked_count += hp->snoop_stat.objects_checked_count; zero_ref_count += hp->snoop_stat.zero_ref_count; objects_marked_count += hp->snoop_stat.objects_marked_count; check_level_count += hp->snoop_stat.check_level_count; busy_count += hp->snoop_stat.busy_count; interlocked_count += hp->snoop_stat.interlocked_count; partial_mark_parent_count += hp->snoop_stat.partial_mark_parent_count; stolen_or_pm_count += hp->snoop_stat.stolen_or_pm_count; stolen_entry_count += hp->snoop_stat.stolen_entry_count; pm_not_ready_count += hp->snoop_stat.pm_not_ready_count; normal_count += hp->snoop_stat.normal_count; stack_bottom_clear_count += hp->snoop_stat.stack_bottom_clear_count; } fflush (stdout); printf ("-------total stats-------\n"); printf ("%8s | %8s | %8s | %8s | %8s | %8s | %8s | %8s | %8s | %8s | %8s | %8s\n", "checked", "zero", "marked", "level", "busy", "xchg", "pmparent", "s_pm", "stolen", "nready", "normal", "clear"); printf ("%8d | %8d | %8d | %8d | %8d | %8d | %8d | %8d | %8d | %8d | %8d | %8d\n", objects_checked_count, zero_ref_count, objects_marked_count, check_level_count, busy_count, interlocked_count, partial_mark_parent_count, stolen_or_pm_count, stolen_entry_count, pm_not_ready_count, normal_count, stack_bottom_clear_count); } #endif //SNOOP_STATS dprintf(3, ("Starting all threads for end of mark phase")); gc_t_join.restart(); #endif //MULTIPLE_HEAPS } #if defined(MULTIPLE_HEAPS) && !defined(USE_REGIONS) merge_mark_lists (total_mark_list_size); #endif //MULTIPLE_HEAPS && !USE_REGIONS finalization_promoted_bytes = total_promoted_bytes - promoted_bytes_live; dprintf(2,("---- End of mark phase ----")); } inline void gc_heap::pin_object (uint8_t* o, uint8_t** ppObject) { dprintf (3, ("Pinning %Ix->%Ix", (size_t)ppObject, (size_t)o)); set_pinned (o); #ifdef FEATURE_EVENT_TRACE if(EVENT_ENABLED(PinObjectAtGCTime)) { fire_etw_pin_object_event(o, ppObject); } #endif // FEATURE_EVENT_TRACE num_pinned_objects++; } size_t gc_heap::get_total_pinned_objects() { #ifdef MULTIPLE_HEAPS size_t total_num_pinned_objects = 0; for (int i = 0; i < gc_heap::n_heaps; i++) { gc_heap* hp = gc_heap::g_heaps[i]; total_num_pinned_objects += hp->num_pinned_objects; } return total_num_pinned_objects; #else //MULTIPLE_HEAPS return num_pinned_objects; #endif //MULTIPLE_HEAPS } void gc_heap::reinit_pinned_objects() { #ifdef MULTIPLE_HEAPS for (int i = 0; i < gc_heap::n_heaps; i++) { gc_heap::g_heaps[i]->num_pinned_objects = 0; } #else //MULTIPLE_HEAPS num_pinned_objects = 0; #endif //MULTIPLE_HEAPS } void gc_heap::reset_mark_stack () { reset_pinned_queue(); max_overflow_address = 0; min_overflow_address = MAX_PTR; } #ifdef FEATURE_STRUCTALIGN // // The word with left child, right child, and align info is laid out as follows: // // | upper short word | lower short word | // |<------------> <----->|<------------> <----->| // | left child info hi| right child info lo| // x86: | 10 bits 6 bits| 10 bits 6 bits| // // where left/right child are signed values and concat(info hi, info lo) is unsigned. // // The "align info" encodes two numbers: the required alignment (a power of two) // and the misalignment (the number of machine words the destination address needs // to be adjusted by to provide alignment - so this number is always smaller than // the required alignment). Thus, the two can be represented as the "logical or" // of the two numbers. Note that the actual pad is computed from the misalignment // by adding the alignment iff the misalignment is non-zero and less than min_obj_size. // // The number of bits in a brick. #if defined (TARGET_AMD64) #define brick_bits (12) #else #define brick_bits (11) #endif //TARGET_AMD64 C_ASSERT(brick_size == (1 << brick_bits)); // The number of bits needed to represent the offset to a child node. // "brick_bits + 1" allows us to represent a signed offset within a brick. #define child_bits (brick_bits + 1 - LOG2_PTRSIZE) // The number of bits in each of the pad hi, pad lo fields. #define pad_bits (sizeof(short) * 8 - child_bits) #define child_from_short(w) (((signed short)(w) / (1 << (pad_bits - LOG2_PTRSIZE))) & ~((1 << LOG2_PTRSIZE) - 1)) #define pad_mask ((1 << pad_bits) - 1) #define pad_from_short(w) ((size_t)(w) & pad_mask) #else // FEATURE_STRUCTALIGN #define child_from_short(w) (w) #endif // FEATURE_STRUCTALIGN inline short node_left_child(uint8_t* node) { return child_from_short(((plug_and_pair*)node)[-1].m_pair.left); } inline void set_node_left_child(uint8_t* node, ptrdiff_t val) { assert (val > -(ptrdiff_t)brick_size); assert (val < (ptrdiff_t)brick_size); assert (Aligned (val)); #ifdef FEATURE_STRUCTALIGN size_t pad = pad_from_short(((plug_and_pair*)node)[-1].m_pair.left); ((plug_and_pair*)node)[-1].m_pair.left = ((short)val << (pad_bits - LOG2_PTRSIZE)) | (short)pad; #else // FEATURE_STRUCTALIGN ((plug_and_pair*)node)[-1].m_pair.left = (short)val; #endif // FEATURE_STRUCTALIGN assert (node_left_child (node) == val); } inline short node_right_child(uint8_t* node) { return child_from_short(((plug_and_pair*)node)[-1].m_pair.right); } inline void set_node_right_child(uint8_t* node, ptrdiff_t val) { assert (val > -(ptrdiff_t)brick_size); assert (val < (ptrdiff_t)brick_size); assert (Aligned (val)); #ifdef FEATURE_STRUCTALIGN size_t pad = pad_from_short(((plug_and_pair*)node)[-1].m_pair.right); ((plug_and_pair*)node)[-1].m_pair.right = ((short)val << (pad_bits - LOG2_PTRSIZE)) | (short)pad; #else // FEATURE_STRUCTALIGN ((plug_and_pair*)node)[-1].m_pair.right = (short)val; #endif // FEATURE_STRUCTALIGN assert (node_right_child (node) == val); } #ifdef FEATURE_STRUCTALIGN void node_aligninfo (uint8_t* node, int& requiredAlignment, ptrdiff_t& pad) { // Extract the single-number aligninfo from the fields. short left = ((plug_and_pair*)node)[-1].m_pair.left; short right = ((plug_and_pair*)node)[-1].m_pair.right; ptrdiff_t pad_shifted = (pad_from_short(left) << pad_bits) | pad_from_short(right); ptrdiff_t aligninfo = pad_shifted * DATA_ALIGNMENT; // Replicate the topmost bit into all lower bits. ptrdiff_t x = aligninfo; x |= x >> 8; x |= x >> 4; x |= x >> 2; x |= x >> 1; // Clear all bits but the highest. requiredAlignment = (int)(x ^ (x >> 1)); pad = aligninfo - requiredAlignment; pad += AdjustmentForMinPadSize(pad, requiredAlignment); } inline ptrdiff_t node_alignpad (uint8_t* node) { int requiredAlignment; ptrdiff_t alignpad; node_aligninfo (node, requiredAlignment, alignpad); return alignpad; } void clear_node_aligninfo (uint8_t* node) { ((plug_and_pair*)node)[-1].m_pair.left &= ~0 << pad_bits; ((plug_and_pair*)node)[-1].m_pair.right &= ~0 << pad_bits; } void set_node_aligninfo (uint8_t* node, int requiredAlignment, ptrdiff_t pad) { // Encode the alignment requirement and alignment offset as a single number // as described above. ptrdiff_t aligninfo = (size_t)requiredAlignment + (pad & (requiredAlignment-1)); assert (Aligned (aligninfo)); ptrdiff_t aligninfo_shifted = aligninfo / DATA_ALIGNMENT; assert (aligninfo_shifted < (1 << (pad_bits + pad_bits))); ptrdiff_t hi = aligninfo_shifted >> pad_bits; assert (pad_from_short(((plug_and_gap*)node)[-1].m_pair.left) == 0); ((plug_and_pair*)node)[-1].m_pair.left |= hi; ptrdiff_t lo = aligninfo_shifted & pad_mask; assert (pad_from_short(((plug_and_gap*)node)[-1].m_pair.right) == 0); ((plug_and_pair*)node)[-1].m_pair.right |= lo; #ifdef _DEBUG int requiredAlignment2; ptrdiff_t pad2; node_aligninfo (node, requiredAlignment2, pad2); assert (requiredAlignment == requiredAlignment2); assert (pad == pad2); #endif // _DEBUG } #endif // FEATURE_STRUCTALIGN inline void loh_set_node_relocation_distance(uint8_t* node, ptrdiff_t val) { ptrdiff_t* place = &(((loh_obj_and_pad*)node)[-1].reloc); *place = val; } inline ptrdiff_t loh_node_relocation_distance(uint8_t* node) { return (((loh_obj_and_pad*)node)[-1].reloc); } inline ptrdiff_t node_relocation_distance (uint8_t* node) { return (((plug_and_reloc*)(node))[-1].reloc & ~3); } inline void set_node_relocation_distance(uint8_t* node, ptrdiff_t val) { assert (val == (val & ~3)); ptrdiff_t* place = &(((plug_and_reloc*)node)[-1].reloc); //clear the left bit and the relocation field *place &= 1; *place |= val; } #define node_left_p(node) (((plug_and_reloc*)(node))[-1].reloc & 2) #define set_node_left(node) ((plug_and_reloc*)(node))[-1].reloc |= 2; #ifndef FEATURE_STRUCTALIGN void set_node_realigned(uint8_t* node) { ((plug_and_reloc*)(node))[-1].reloc |= 1; } void clear_node_realigned(uint8_t* node) { #ifdef RESPECT_LARGE_ALIGNMENT ((plug_and_reloc*)(node))[-1].reloc &= ~1; #else UNREFERENCED_PARAMETER(node); #endif //RESPECT_LARGE_ALIGNMENT } #endif // FEATURE_STRUCTALIGN inline size_t node_gap_size (uint8_t* node) { return ((plug_and_gap *)node)[-1].gap; } void set_gap_size (uint8_t* node, size_t size) { assert (Aligned (size)); // clear the 2 uint32_t used by the node. ((plug_and_gap *)node)[-1].reloc = 0; ((plug_and_gap *)node)[-1].lr =0; ((plug_and_gap *)node)[-1].gap = size; assert ((size == 0 )||(size >= sizeof(plug_and_reloc))); } uint8_t* gc_heap::insert_node (uint8_t* new_node, size_t sequence_number, uint8_t* tree, uint8_t* last_node) { dprintf (3, ("IN: %Ix(%Ix), T: %Ix(%Ix), L: %Ix(%Ix) [%Ix]", (size_t)new_node, brick_of(new_node), (size_t)tree, brick_of(tree), (size_t)last_node, brick_of(last_node), sequence_number)); if (power_of_two_p (sequence_number)) { set_node_left_child (new_node, (tree - new_node)); dprintf (3, ("NT: %Ix, LC->%Ix", (size_t)new_node, (tree - new_node))); tree = new_node; } else { if (oddp (sequence_number)) { set_node_right_child (last_node, (new_node - last_node)); dprintf (3, ("%Ix RC->%Ix", last_node, (new_node - last_node))); } else { uint8_t* earlier_node = tree; size_t imax = logcount(sequence_number) - 2; for (size_t i = 0; i != imax; i++) { earlier_node = earlier_node + node_right_child (earlier_node); } int tmp_offset = node_right_child (earlier_node); assert (tmp_offset); // should never be empty set_node_left_child (new_node, ((earlier_node + tmp_offset ) - new_node)); set_node_right_child (earlier_node, (new_node - earlier_node)); dprintf (3, ("%Ix LC->%Ix, %Ix RC->%Ix", new_node, ((earlier_node + tmp_offset ) - new_node), earlier_node, (new_node - earlier_node))); } } return tree; } size_t gc_heap::update_brick_table (uint8_t* tree, size_t current_brick, uint8_t* x, uint8_t* plug_end) { dprintf (3, ("tree: %Ix, current b: %Ix, x: %Ix, plug_end: %Ix", tree, current_brick, x, plug_end)); if (tree != NULL) { dprintf (3, ("b- %Ix->%Ix pointing to tree %Ix", current_brick, (size_t)(tree - brick_address (current_brick)), tree)); set_brick (current_brick, (tree - brick_address (current_brick))); } else { dprintf (3, ("b- %Ix->-1", current_brick)); set_brick (current_brick, -1); } size_t b = 1 + current_brick; ptrdiff_t offset = 0; size_t last_br = brick_of (plug_end-1); current_brick = brick_of (x-1); dprintf (3, ("ubt: %Ix->%Ix]->%Ix]", b, last_br, current_brick)); while (b <= current_brick) { if (b <= last_br) { set_brick (b, --offset); } else { set_brick (b,-1); } b++; } return brick_of (x); } #ifndef USE_REGIONS void gc_heap::plan_generation_start (generation* gen, generation* consing_gen, uint8_t* next_plug_to_allocate) { #ifdef HOST_64BIT // We should never demote big plugs to gen0. if (gen == youngest_generation) { heap_segment* seg = ephemeral_heap_segment; size_t mark_stack_large_bos = mark_stack_bos; size_t large_plug_pos = 0; while (mark_stack_large_bos < mark_stack_tos) { if (mark_stack_array[mark_stack_large_bos].len > demotion_plug_len_th) { while (mark_stack_bos <= mark_stack_large_bos) { size_t entry = deque_pinned_plug(); size_t len = pinned_len (pinned_plug_of (entry)); uint8_t* plug = pinned_plug (pinned_plug_of(entry)); if (len > demotion_plug_len_th) { dprintf (2, ("ps(%d): S %Ix (%Id)(%Ix)", gen->gen_num, plug, len, (plug+len))); } pinned_len (pinned_plug_of (entry)) = plug - generation_allocation_pointer (consing_gen); assert(mark_stack_array[entry].len == 0 || mark_stack_array[entry].len >= Align(min_obj_size)); generation_allocation_pointer (consing_gen) = plug + len; generation_allocation_limit (consing_gen) = heap_segment_plan_allocated (seg); set_allocator_next_pin (consing_gen); } } mark_stack_large_bos++; } } #endif // HOST_64BIT generation_plan_allocation_start (gen) = allocate_in_condemned_generations (consing_gen, Align (min_obj_size), -1); generation_plan_allocation_start_size (gen) = Align (min_obj_size); size_t allocation_left = (size_t)(generation_allocation_limit (consing_gen) - generation_allocation_pointer (consing_gen)); if (next_plug_to_allocate) { size_t dist_to_next_plug = (size_t)(next_plug_to_allocate - generation_allocation_pointer (consing_gen)); if (allocation_left > dist_to_next_plug) { allocation_left = dist_to_next_plug; } } if (allocation_left < Align (min_obj_size)) { generation_plan_allocation_start_size (gen) += allocation_left; generation_allocation_pointer (consing_gen) += allocation_left; } dprintf (2, ("plan alloc gen%d(%Ix) start at %Ix (ptr: %Ix, limit: %Ix, next: %Ix)", gen->gen_num, generation_plan_allocation_start (gen), generation_plan_allocation_start_size (gen), generation_allocation_pointer (consing_gen), generation_allocation_limit (consing_gen), next_plug_to_allocate)); } void gc_heap::realloc_plan_generation_start (generation* gen, generation* consing_gen) { BOOL adjacentp = FALSE; generation_plan_allocation_start (gen) = allocate_in_expanded_heap (consing_gen, Align(min_obj_size), adjacentp, 0, #ifdef SHORT_PLUGS FALSE, NULL, #endif //SHORT_PLUGS FALSE, -1 REQD_ALIGN_AND_OFFSET_ARG); generation_plan_allocation_start_size (gen) = Align (min_obj_size); size_t allocation_left = (size_t)(generation_allocation_limit (consing_gen) - generation_allocation_pointer (consing_gen)); if ((allocation_left < Align (min_obj_size)) && (generation_allocation_limit (consing_gen)!=heap_segment_plan_allocated (generation_allocation_segment (consing_gen)))) { generation_plan_allocation_start_size (gen) += allocation_left; generation_allocation_pointer (consing_gen) += allocation_left; } dprintf (1, ("plan re-alloc gen%d start at %Ix (ptr: %Ix, limit: %Ix)", gen->gen_num, generation_plan_allocation_start (consing_gen), generation_allocation_pointer (consing_gen), generation_allocation_limit (consing_gen))); } void gc_heap::plan_generation_starts (generation*& consing_gen) { //make sure that every generation has a planned allocation start int gen_number = settings.condemned_generation; while (gen_number >= 0) { if (gen_number < max_generation) { consing_gen = ensure_ephemeral_heap_segment (consing_gen); } generation* gen = generation_of (gen_number); if (0 == generation_plan_allocation_start (gen)) { plan_generation_start (gen, consing_gen, 0); assert (generation_plan_allocation_start (gen)); } gen_number--; } // now we know the planned allocation size heap_segment_plan_allocated (ephemeral_heap_segment) = generation_allocation_pointer (consing_gen); } void gc_heap::advance_pins_for_demotion (generation* gen) { uint8_t* original_youngest_start = generation_allocation_start (youngest_generation); heap_segment* seg = ephemeral_heap_segment; if ((!(pinned_plug_que_empty_p()))) { size_t gen1_pinned_promoted = generation_pinned_allocation_compact_size (generation_of (max_generation)); size_t gen1_pins_left = dd_pinned_survived_size (dynamic_data_of (max_generation - 1)) - gen1_pinned_promoted; size_t total_space_to_skip = last_gen1_pin_end - generation_allocation_pointer (gen); float pin_frag_ratio = (float)gen1_pins_left / (float)total_space_to_skip; float pin_surv_ratio = (float)gen1_pins_left / (float)(dd_survived_size (dynamic_data_of (max_generation - 1))); if ((pin_frag_ratio > 0.15) && (pin_surv_ratio > 0.30)) { while (!pinned_plug_que_empty_p() && (pinned_plug (oldest_pin()) < original_youngest_start)) { size_t entry = deque_pinned_plug(); size_t len = pinned_len (pinned_plug_of (entry)); uint8_t* plug = pinned_plug (pinned_plug_of(entry)); pinned_len (pinned_plug_of (entry)) = plug - generation_allocation_pointer (gen); assert(mark_stack_array[entry].len == 0 || mark_stack_array[entry].len >= Align(min_obj_size)); generation_allocation_pointer (gen) = plug + len; generation_allocation_limit (gen) = heap_segment_plan_allocated (seg); set_allocator_next_pin (gen); //Add the size of the pinned plug to the right pinned allocations //find out which gen this pinned plug came from int frgn = object_gennum (plug); if ((frgn != (int)max_generation) && settings.promotion) { int togn = object_gennum_plan (plug); generation_pinned_allocation_sweep_size ((generation_of (frgn +1))) += len; if (frgn < togn) { generation_pinned_allocation_compact_size (generation_of (togn)) += len; } } dprintf (2, ("skipping gap %d, pin %Ix (%Id)", pinned_len (pinned_plug_of (entry)), plug, len)); } } dprintf (2, ("ad_p_d: PL: %Id, SL: %Id, pfr: %d, psr: %d", gen1_pins_left, total_space_to_skip, (int)(pin_frag_ratio*100), (int)(pin_surv_ratio*100))); } } void gc_heap::process_ephemeral_boundaries (uint8_t* x, int& active_new_gen_number, int& active_old_gen_number, generation*& consing_gen, BOOL& allocate_in_condemned) { retry: if ((active_old_gen_number > 0) && (x >= generation_allocation_start (generation_of (active_old_gen_number - 1)))) { dprintf (2, ("crossing gen%d, x is %Ix", active_old_gen_number - 1, x)); if (!pinned_plug_que_empty_p()) { dprintf (2, ("oldest pin: %Ix(%Id)", pinned_plug (oldest_pin()), (x - pinned_plug (oldest_pin())))); } if (active_old_gen_number <= (settings.promotion ? (max_generation - 1) : max_generation)) { active_new_gen_number--; } active_old_gen_number--; assert ((!settings.promotion) || (active_new_gen_number>0)); if (active_new_gen_number == (max_generation - 1)) { #ifdef FREE_USAGE_STATS if (settings.condemned_generation == max_generation) { // We need to do this before we skip the rest of the pinned plugs. generation* gen_2 = generation_of (max_generation); generation* gen_1 = generation_of (max_generation - 1); size_t total_num_pinned_free_spaces_left = 0; // We are about to allocate gen1, check to see how efficient fitting in gen2 pinned free spaces is. for (int j = 0; j < NUM_GEN_POWER2; j++) { dprintf (1, ("[h%d][#%Id]2^%d: current: %Id, S: 2: %Id, 1: %Id(%Id)", heap_number, settings.gc_index, (j + 10), gen_2->gen_current_pinned_free_spaces[j], gen_2->gen_plugs[j], gen_1->gen_plugs[j], (gen_2->gen_plugs[j] + gen_1->gen_plugs[j]))); total_num_pinned_free_spaces_left += gen_2->gen_current_pinned_free_spaces[j]; } float pinned_free_list_efficiency = 0; size_t total_pinned_free_space = generation_allocated_in_pinned_free (gen_2) + generation_pinned_free_obj_space (gen_2); if (total_pinned_free_space != 0) { pinned_free_list_efficiency = (float)(generation_allocated_in_pinned_free (gen_2)) / (float)total_pinned_free_space; } dprintf (1, ("[h%d] gen2 allocated %Id bytes with %Id bytes pinned free spaces (effi: %d%%), %Id (%Id) left", heap_number, generation_allocated_in_pinned_free (gen_2), total_pinned_free_space, (int)(pinned_free_list_efficiency * 100), generation_pinned_free_obj_space (gen_2), total_num_pinned_free_spaces_left)); } #endif //FREE_USAGE_STATS //Go past all of the pinned plugs for this generation. while (!pinned_plug_que_empty_p() && (!in_range_for_segment ((pinned_plug (oldest_pin())), ephemeral_heap_segment))) { size_t entry = deque_pinned_plug(); mark* m = pinned_plug_of (entry); uint8_t* plug = pinned_plug (m); size_t len = pinned_len (m); // detect pinned block in different segment (later) than // allocation segment, skip those until the oldest pin is in the ephemeral seg. // adjust the allocation segment along the way (at the end it will // be the ephemeral segment. heap_segment* nseg = heap_segment_in_range (generation_allocation_segment (consing_gen)); PREFIX_ASSUME(nseg != NULL); while (!((plug >= generation_allocation_pointer (consing_gen))&& (plug < heap_segment_allocated (nseg)))) { //adjust the end of the segment to be the end of the plug assert (generation_allocation_pointer (consing_gen)>= heap_segment_mem (nseg)); assert (generation_allocation_pointer (consing_gen)<= heap_segment_committed (nseg)); heap_segment_plan_allocated (nseg) = generation_allocation_pointer (consing_gen); //switch allocation segment nseg = heap_segment_next_rw (nseg); generation_allocation_segment (consing_gen) = nseg; //reset the allocation pointer and limits generation_allocation_pointer (consing_gen) = heap_segment_mem (nseg); } set_new_pin_info (m, generation_allocation_pointer (consing_gen)); assert(pinned_len(m) == 0 || pinned_len(m) >= Align(min_obj_size)); generation_allocation_pointer (consing_gen) = plug + len; generation_allocation_limit (consing_gen) = generation_allocation_pointer (consing_gen); } allocate_in_condemned = TRUE; consing_gen = ensure_ephemeral_heap_segment (consing_gen); } if (active_new_gen_number != max_generation) { if (active_new_gen_number == (max_generation - 1)) { maxgen_pinned_compact_before_advance = generation_pinned_allocation_compact_size (generation_of (max_generation)); if (!demote_gen1_p) advance_pins_for_demotion (consing_gen); } plan_generation_start (generation_of (active_new_gen_number), consing_gen, x); dprintf (2, ("process eph: allocated gen%d start at %Ix", active_new_gen_number, generation_plan_allocation_start (generation_of (active_new_gen_number)))); if ((demotion_low == MAX_PTR) && !pinned_plug_que_empty_p()) { uint8_t* pplug = pinned_plug (oldest_pin()); if (object_gennum (pplug) > 0) { demotion_low = pplug; dprintf (3, ("process eph: dlow->%Ix", demotion_low)); } } assert (generation_plan_allocation_start (generation_of (active_new_gen_number))); } goto retry; } } #endif //!USE_REGIONS inline void gc_heap::seg_clear_mark_bits (heap_segment* seg) { uint8_t* o = heap_segment_mem (seg); while (o < heap_segment_allocated (seg)) { if (marked (o)) { clear_marked (o); } o = o + Align (size (o)); } } #ifdef FEATURE_BASICFREEZE void gc_heap::sweep_ro_segments (heap_segment* start_seg) { //go through all of the segment in range and reset the mark bit heap_segment* seg = start_seg; while (seg) { if (heap_segment_read_only_p (seg) && heap_segment_in_range_p (seg)) { #ifdef BACKGROUND_GC if (settings.concurrent) { seg_clear_mark_array_bits_soh (seg); } else { seg_clear_mark_bits (seg); } #else //BACKGROUND_GC seg_clear_mark_bits (seg); #endif //BACKGROUND_GC } seg = heap_segment_next (seg); } } #endif // FEATURE_BASICFREEZE #ifdef FEATURE_LOH_COMPACTION inline BOOL gc_heap::loh_pinned_plug_que_empty_p() { return (loh_pinned_queue_bos == loh_pinned_queue_tos); } void gc_heap::loh_set_allocator_next_pin() { if (!(loh_pinned_plug_que_empty_p())) { mark* oldest_entry = loh_oldest_pin(); uint8_t* plug = pinned_plug (oldest_entry); generation* gen = large_object_generation; if ((plug >= generation_allocation_pointer (gen)) && (plug < generation_allocation_limit (gen))) { generation_allocation_limit (gen) = pinned_plug (oldest_entry); } else assert (!((plug < generation_allocation_pointer (gen)) && (plug >= heap_segment_mem (generation_allocation_segment (gen))))); } } size_t gc_heap::loh_deque_pinned_plug () { size_t m = loh_pinned_queue_bos; loh_pinned_queue_bos++; return m; } inline mark* gc_heap::loh_pinned_plug_of (size_t bos) { return &loh_pinned_queue[bos]; } inline mark* gc_heap::loh_oldest_pin() { return loh_pinned_plug_of (loh_pinned_queue_bos); } // If we can't grow the queue, then don't compact. BOOL gc_heap::loh_enque_pinned_plug (uint8_t* plug, size_t len) { assert(len >= Align(min_obj_size, get_alignment_constant (FALSE))); if (loh_pinned_queue_length <= loh_pinned_queue_tos) { if (!grow_mark_stack (loh_pinned_queue, loh_pinned_queue_length, LOH_PIN_QUEUE_LENGTH)) { return FALSE; } } dprintf (3, (" P: %Ix(%Id)", plug, len)); mark& m = loh_pinned_queue[loh_pinned_queue_tos]; m.first = plug; m.len = len; loh_pinned_queue_tos++; loh_set_allocator_next_pin(); return TRUE; } inline BOOL gc_heap::loh_size_fit_p (size_t size, uint8_t* alloc_pointer, uint8_t* alloc_limit) { dprintf (1235, ("trying to fit %Id(%Id) between %Ix and %Ix (%Id)", size, (2* AlignQword (loh_padding_obj_size) + size), alloc_pointer, alloc_limit, (alloc_limit - alloc_pointer))); return ((alloc_pointer + 2* AlignQword (loh_padding_obj_size) + size) <= alloc_limit); } uint8_t* gc_heap::loh_allocate_in_condemned (size_t size) { generation* gen = large_object_generation; dprintf (1235, ("E: p:%Ix, l:%Ix, s: %Id", generation_allocation_pointer (gen), generation_allocation_limit (gen), size)); retry: { heap_segment* seg = generation_allocation_segment (gen); if (!(loh_size_fit_p (size, generation_allocation_pointer (gen), generation_allocation_limit (gen)))) { if ((!(loh_pinned_plug_que_empty_p()) && (generation_allocation_limit (gen) == pinned_plug (loh_oldest_pin())))) { mark* m = loh_pinned_plug_of (loh_deque_pinned_plug()); size_t len = pinned_len (m); uint8_t* plug = pinned_plug (m); dprintf (1235, ("AIC: %Ix->%Ix(%Id)", generation_allocation_pointer (gen), plug, plug - generation_allocation_pointer (gen))); pinned_len (m) = plug - generation_allocation_pointer (gen); generation_allocation_pointer (gen) = plug + len; generation_allocation_limit (gen) = heap_segment_plan_allocated (seg); loh_set_allocator_next_pin(); dprintf (1235, ("s: p: %Ix, l: %Ix (%Id)", generation_allocation_pointer (gen), generation_allocation_limit (gen), (generation_allocation_limit (gen) - generation_allocation_pointer (gen)))); goto retry; } if (generation_allocation_limit (gen) != heap_segment_plan_allocated (seg)) { generation_allocation_limit (gen) = heap_segment_plan_allocated (seg); dprintf (1235, ("l->pa(%Ix)", generation_allocation_limit (gen))); } else { if (heap_segment_plan_allocated (seg) != heap_segment_committed (seg)) { heap_segment_plan_allocated (seg) = heap_segment_committed (seg); generation_allocation_limit (gen) = heap_segment_plan_allocated (seg); dprintf (1235, ("l->c(%Ix)", generation_allocation_limit (gen))); } else { if (loh_size_fit_p (size, generation_allocation_pointer (gen), heap_segment_reserved (seg)) && (grow_heap_segment (seg, (generation_allocation_pointer (gen) + size + 2* AlignQword (loh_padding_obj_size))))) { dprintf (1235, ("growing seg from %Ix to %Ix\n", heap_segment_committed (seg), (generation_allocation_pointer (gen) + size))); heap_segment_plan_allocated (seg) = heap_segment_committed (seg); generation_allocation_limit (gen) = heap_segment_plan_allocated (seg); dprintf (1235, ("g: p: %Ix, l: %Ix (%Id)", generation_allocation_pointer (gen), generation_allocation_limit (gen), (generation_allocation_limit (gen) - generation_allocation_pointer (gen)))); } else { heap_segment* next_seg = heap_segment_next (seg); assert (generation_allocation_pointer (gen)>= heap_segment_mem (seg)); // Verify that all pinned plugs for this segment are consumed if (!loh_pinned_plug_que_empty_p() && ((pinned_plug (loh_oldest_pin()) < heap_segment_allocated (seg)) && (pinned_plug (loh_oldest_pin()) >= generation_allocation_pointer (gen)))) { LOG((LF_GC, LL_INFO10, "remaining pinned plug %Ix while leaving segment on allocation", pinned_plug (loh_oldest_pin()))); dprintf (1, ("queue empty: %d", loh_pinned_plug_que_empty_p())); FATAL_GC_ERROR(); } assert (generation_allocation_pointer (gen)>= heap_segment_mem (seg)); assert (generation_allocation_pointer (gen)<= heap_segment_committed (seg)); heap_segment_plan_allocated (seg) = generation_allocation_pointer (gen); if (next_seg) { // for LOH do we want to try starting from the first LOH every time though? generation_allocation_segment (gen) = next_seg; generation_allocation_pointer (gen) = heap_segment_mem (next_seg); generation_allocation_limit (gen) = generation_allocation_pointer (gen); dprintf (1235, ("n: p: %Ix, l: %Ix (%Id)", generation_allocation_pointer (gen), generation_allocation_limit (gen), (generation_allocation_limit (gen) - generation_allocation_pointer (gen)))); } else { dprintf (1, ("We ran out of space compacting, shouldn't happen")); FATAL_GC_ERROR(); } } } } loh_set_allocator_next_pin(); dprintf (1235, ("r: p: %Ix, l: %Ix (%Id)", generation_allocation_pointer (gen), generation_allocation_limit (gen), (generation_allocation_limit (gen) - generation_allocation_pointer (gen)))); goto retry; } } { assert (generation_allocation_pointer (gen)>= heap_segment_mem (generation_allocation_segment (gen))); uint8_t* result = generation_allocation_pointer (gen); size_t loh_pad = AlignQword (loh_padding_obj_size); generation_allocation_pointer (gen) += size + loh_pad; assert (generation_allocation_pointer (gen) <= generation_allocation_limit (gen)); dprintf (1235, ("p: %Ix, l: %Ix (%Id)", generation_allocation_pointer (gen), generation_allocation_limit (gen), (generation_allocation_limit (gen) - generation_allocation_pointer (gen)))); assert (result + loh_pad); return result + loh_pad; } } BOOL gc_heap::loh_compaction_requested() { // If hard limit is specified GC will automatically decide if LOH needs to be compacted. return (loh_compaction_always_p || (loh_compaction_mode != loh_compaction_default)); } inline void gc_heap::check_loh_compact_mode (BOOL all_heaps_compacted_p) { if (settings.loh_compaction && (loh_compaction_mode == loh_compaction_once)) { if (all_heaps_compacted_p) { // If the compaction mode says to compact once and we are going to compact LOH, // we need to revert it back to no compaction. loh_compaction_mode = loh_compaction_default; } } } BOOL gc_heap::plan_loh() { #ifdef FEATURE_EVENT_TRACE uint64_t start_time, end_time; if (informational_event_enabled_p) { memset (loh_compact_info, 0, (sizeof (etw_loh_compact_info) * get_num_heaps())); start_time = GetHighPrecisionTimeStamp(); } #endif //FEATURE_EVENT_TRACE if (!loh_pinned_queue) { loh_pinned_queue = new (nothrow) (mark [LOH_PIN_QUEUE_LENGTH]); if (!loh_pinned_queue) { dprintf (1, ("Cannot allocate the LOH pinned queue (%Id bytes), no compaction", LOH_PIN_QUEUE_LENGTH * sizeof (mark))); return FALSE; } loh_pinned_queue_length = LOH_PIN_QUEUE_LENGTH; } if (heap_number == 0) loh_pinned_queue_decay = LOH_PIN_DECAY; loh_pinned_queue_tos = 0; loh_pinned_queue_bos = 0; generation* gen = large_object_generation; heap_segment* start_seg = heap_segment_rw (generation_start_segment (gen)); PREFIX_ASSUME(start_seg != NULL); heap_segment* seg = start_seg; uint8_t* o = get_uoh_start_object (seg, gen); dprintf (1235, ("before GC LOH size: %Id, free list: %Id, free obj: %Id\n", generation_size (loh_generation), generation_free_list_space (gen), generation_free_obj_space (gen))); while (seg) { heap_segment_plan_allocated (seg) = heap_segment_mem (seg); seg = heap_segment_next (seg); } seg = start_seg; // We don't need to ever realloc gen3 start so don't touch it. heap_segment_plan_allocated (seg) = o; generation_allocation_pointer (gen) = o; generation_allocation_limit (gen) = generation_allocation_pointer (gen); generation_allocation_segment (gen) = start_seg; uint8_t* free_space_start = o; uint8_t* free_space_end = o; uint8_t* new_address = 0; while (1) { if (o >= heap_segment_allocated (seg)) { seg = heap_segment_next (seg); if (seg == 0) { break; } o = heap_segment_mem (seg); } if (marked (o)) { free_space_end = o; size_t size = AlignQword (size (o)); dprintf (1235, ("%Ix(%Id) M", o, size)); if (pinned (o)) { // We don't clear the pinned bit yet so we can check in // compact phase how big a free object we should allocate // in front of the pinned object. We use the reloc address // field to store this. if (!loh_enque_pinned_plug (o, size)) { return FALSE; } new_address = o; } else { new_address = loh_allocate_in_condemned (size); } loh_set_node_relocation_distance (o, (new_address - o)); dprintf (1235, ("lobj %Ix-%Ix -> %Ix-%Ix (%Id)", o, (o + size), new_address, (new_address + size), (new_address - o))); o = o + size; free_space_start = o; if (o < heap_segment_allocated (seg)) { assert (!marked (o)); } } else { while (o < heap_segment_allocated (seg) && !marked (o)) { dprintf (1235, ("%Ix(%Id) F (%d)", o, AlignQword (size (o)), ((method_table (o) == g_gc_pFreeObjectMethodTable) ? 1 : 0))); o = o + AlignQword (size (o)); } } } while (!loh_pinned_plug_que_empty_p()) { mark* m = loh_pinned_plug_of (loh_deque_pinned_plug()); size_t len = pinned_len (m); uint8_t* plug = pinned_plug (m); // detect pinned block in different segment (later) than // allocation segment heap_segment* nseg = heap_segment_rw (generation_allocation_segment (gen)); while ((plug < generation_allocation_pointer (gen)) || (plug >= heap_segment_allocated (nseg))) { assert ((plug < heap_segment_mem (nseg)) || (plug > heap_segment_reserved (nseg))); //adjust the end of the segment to be the end of the plug assert (generation_allocation_pointer (gen)>= heap_segment_mem (nseg)); assert (generation_allocation_pointer (gen)<= heap_segment_committed (nseg)); heap_segment_plan_allocated (nseg) = generation_allocation_pointer (gen); //switch allocation segment nseg = heap_segment_next_rw (nseg); generation_allocation_segment (gen) = nseg; //reset the allocation pointer and limits generation_allocation_pointer (gen) = heap_segment_mem (nseg); } dprintf (1235, ("SP: %Ix->%Ix(%Id)", generation_allocation_pointer (gen), plug, plug - generation_allocation_pointer (gen))); pinned_len (m) = plug - generation_allocation_pointer (gen); generation_allocation_pointer (gen) = plug + len; } heap_segment_plan_allocated (generation_allocation_segment (gen)) = generation_allocation_pointer (gen); generation_allocation_pointer (gen) = 0; generation_allocation_limit (gen) = 0; #ifdef FEATURE_EVENT_TRACE if (informational_event_enabled_p) { end_time = GetHighPrecisionTimeStamp(); loh_compact_info[heap_number].time_plan = limit_time_to_uint32 (end_time - start_time); } #endif //FEATURE_EVENT_TRACE return TRUE; } void gc_heap::compact_loh() { assert (loh_compaction_requested() || heap_hard_limit || conserve_mem_setting); #ifdef FEATURE_EVENT_TRACE uint64_t start_time, end_time; if (informational_event_enabled_p) { start_time = GetHighPrecisionTimeStamp(); } #endif //FEATURE_EVENT_TRACE generation* gen = large_object_generation; heap_segment* start_seg = heap_segment_rw (generation_start_segment (gen)); PREFIX_ASSUME(start_seg != NULL); heap_segment* seg = start_seg; heap_segment* prev_seg = 0; uint8_t* o = get_uoh_start_object (seg, gen); // We don't need to ever realloc gen3 start so don't touch it. uint8_t* free_space_start = o; uint8_t* free_space_end = o; generation_allocator (gen)->clear(); generation_free_list_space (gen) = 0; generation_free_obj_space (gen) = 0; loh_pinned_queue_bos = 0; while (1) { if (o >= heap_segment_allocated (seg)) { heap_segment* next_seg = heap_segment_next (seg); // REGIONS TODO: for regions we can get rid of the start_seg. Just need // to update start region accordingly. if ((heap_segment_plan_allocated (seg) == heap_segment_mem (seg)) && (seg != start_seg) && !heap_segment_read_only_p (seg)) { dprintf (3, ("Preparing empty large segment %Ix", (size_t)seg)); assert (prev_seg); heap_segment_next (prev_seg) = next_seg; heap_segment_next (seg) = freeable_uoh_segment; freeable_uoh_segment = seg; #ifdef USE_REGIONS update_start_tail_regions (gen, seg, prev_seg, next_seg); #endif //USE_REGIONS } else { if (!heap_segment_read_only_p (seg)) { // We grew the segment to accommodate allocations. if (heap_segment_plan_allocated (seg) > heap_segment_allocated (seg)) { if ((heap_segment_plan_allocated (seg) - plug_skew) > heap_segment_used (seg)) { heap_segment_used (seg) = heap_segment_plan_allocated (seg) - plug_skew; } } heap_segment_allocated (seg) = heap_segment_plan_allocated (seg); dprintf (3, ("Trimming seg to %Ix[", heap_segment_allocated (seg))); decommit_heap_segment_pages (seg, 0); dprintf (1236, ("CLOH: seg: %Ix, alloc: %Ix, used: %Ix, committed: %Ix", seg, heap_segment_allocated (seg), heap_segment_used (seg), heap_segment_committed (seg))); //heap_segment_used (seg) = heap_segment_allocated (seg) - plug_skew; dprintf (1236, ("CLOH: used is set to %Ix", heap_segment_used (seg))); } prev_seg = seg; } seg = next_seg; if (seg == 0) break; else { o = heap_segment_mem (seg); } } if (marked (o)) { free_space_end = o; size_t size = AlignQword (size (o)); size_t loh_pad; uint8_t* reloc = o; clear_marked (o); if (pinned (o)) { // We are relying on the fact the pinned objects are always looked at in the same order // in plan phase and in compact phase. mark* m = loh_pinned_plug_of (loh_deque_pinned_plug()); uint8_t* plug = pinned_plug (m); assert (plug == o); loh_pad = pinned_len (m); clear_pinned (o); } else { loh_pad = AlignQword (loh_padding_obj_size); reloc += loh_node_relocation_distance (o); gcmemcopy (reloc, o, size, TRUE); } thread_gap ((reloc - loh_pad), loh_pad, gen); o = o + size; free_space_start = o; if (o < heap_segment_allocated (seg)) { assert (!marked (o)); } } else { while (o < heap_segment_allocated (seg) && !marked (o)) { o = o + AlignQword (size (o)); } } } #ifdef FEATURE_EVENT_TRACE if (informational_event_enabled_p) { end_time = GetHighPrecisionTimeStamp(); loh_compact_info[heap_number].time_compact = limit_time_to_uint32 (end_time - start_time); } #endif //FEATURE_EVENT_TRACE assert (loh_pinned_plug_que_empty_p()); dprintf (1235, ("after GC LOH size: %Id, free list: %Id, free obj: %Id\n\n", generation_size (loh_generation), generation_free_list_space (gen), generation_free_obj_space (gen))); } #ifdef FEATURE_EVENT_TRACE inline void gc_heap::loh_reloc_survivor_helper (uint8_t** pval, size_t& total_refs, size_t& zero_refs) { uint8_t* val = *pval; if (!val) zero_refs++; total_refs++; reloc_survivor_helper (pval); } #endif //FEATURE_EVENT_TRACE void gc_heap::relocate_in_loh_compact() { generation* gen = large_object_generation; heap_segment* seg = heap_segment_rw (generation_start_segment (gen)); uint8_t* o = get_uoh_start_object (seg, gen); #ifdef FEATURE_EVENT_TRACE size_t total_refs = 0; size_t zero_refs = 0; uint64_t start_time, end_time; if (informational_event_enabled_p) { start_time = GetHighPrecisionTimeStamp(); } #endif //FEATURE_EVENT_TRACE while (1) { if (o >= heap_segment_allocated (seg)) { seg = heap_segment_next (seg); if (seg == 0) { break; } o = heap_segment_mem (seg); } if (marked (o)) { size_t size = AlignQword (size (o)); check_class_object_demotion (o); if (contain_pointers (o)) { #ifdef FEATURE_EVENT_TRACE if (informational_event_enabled_p) { go_through_object_nostart (method_table (o), o, size(o), pval, { loh_reloc_survivor_helper (pval, total_refs, zero_refs); }); } else #endif //FEATURE_EVENT_TRACE { go_through_object_nostart (method_table (o), o, size(o), pval, { reloc_survivor_helper (pval); }); } } o = o + size; if (o < heap_segment_allocated (seg)) { assert (!marked (o)); } } else { while (o < heap_segment_allocated (seg) && !marked (o)) { o = o + AlignQword (size (o)); } } } #ifdef FEATURE_EVENT_TRACE if (informational_event_enabled_p) { end_time = GetHighPrecisionTimeStamp(); loh_compact_info[heap_number].time_relocate = limit_time_to_uint32 (end_time - start_time); loh_compact_info[heap_number].total_refs = total_refs; loh_compact_info[heap_number].zero_refs = zero_refs; } #endif //FEATURE_EVENT_TRACE dprintf (1235, ("after GC LOH size: %Id, free list: %Id, free obj: %Id\n\n", generation_size (loh_generation), generation_free_list_space (gen), generation_free_obj_space (gen))); } void gc_heap::walk_relocation_for_loh (void* profiling_context, record_surv_fn fn) { generation* gen = large_object_generation; heap_segment* seg = heap_segment_rw (generation_start_segment (gen)); uint8_t* o = get_uoh_start_object (seg, gen); while (1) { if (o >= heap_segment_allocated (seg)) { seg = heap_segment_next (seg); if (seg == 0) { break; } o = heap_segment_mem (seg); } if (marked (o)) { size_t size = AlignQword (size (o)); ptrdiff_t reloc = loh_node_relocation_distance (o); STRESS_LOG_PLUG_MOVE(o, (o + size), -reloc); fn (o, (o + size), reloc, profiling_context, !!settings.compaction, false); o = o + size; if (o < heap_segment_allocated (seg)) { assert (!marked (o)); } } else { while (o < heap_segment_allocated (seg) && !marked (o)) { o = o + AlignQword (size (o)); } } } } BOOL gc_heap::loh_object_p (uint8_t* o) { #ifdef MULTIPLE_HEAPS gc_heap* hp = gc_heap::g_heaps [0]; int brick_entry = hp->brick_table[hp->brick_of (o)]; #else //MULTIPLE_HEAPS int brick_entry = brick_table[brick_of (o)]; #endif //MULTIPLE_HEAPS return (brick_entry == 0); } #endif //FEATURE_LOH_COMPACTION void gc_heap::convert_to_pinned_plug (BOOL& last_npinned_plug_p, BOOL& last_pinned_plug_p, BOOL& pinned_plug_p, size_t ps, size_t& artificial_pinned_size) { last_npinned_plug_p = FALSE; last_pinned_plug_p = TRUE; pinned_plug_p = TRUE; artificial_pinned_size = ps; } // Because we have the artificial pinning, we can't guarantee that pinned and npinned // plugs are always interleaved. void gc_heap::store_plug_gap_info (uint8_t* plug_start, uint8_t* plug_end, BOOL& last_npinned_plug_p, BOOL& last_pinned_plug_p, uint8_t*& last_pinned_plug, BOOL& pinned_plug_p, uint8_t* last_object_in_last_plug, BOOL& merge_with_last_pin_p, // this is only for verification purpose size_t last_plug_len) { UNREFERENCED_PARAMETER(last_plug_len); if (!last_npinned_plug_p && !last_pinned_plug_p) { //dprintf (3, ("last full plug end: %Ix, full plug start: %Ix", plug_end, plug_start)); dprintf (3, ("Free: %Ix", (plug_start - plug_end))); assert ((plug_start == plug_end) || ((size_t)(plug_start - plug_end) >= Align (min_obj_size))); set_gap_size (plug_start, plug_start - plug_end); } if (pinned (plug_start)) { BOOL save_pre_plug_info_p = FALSE; if (last_npinned_plug_p || last_pinned_plug_p) { //if (last_plug_len == Align (min_obj_size)) //{ // dprintf (3, ("debugging only - last npinned plug is min, check to see if it's correct")); // GCToOSInterface::DebugBreak(); //} save_pre_plug_info_p = TRUE; } pinned_plug_p = TRUE; last_npinned_plug_p = FALSE; if (last_pinned_plug_p) { dprintf (3, ("last plug %Ix was also pinned, should merge", last_pinned_plug)); merge_with_last_pin_p = TRUE; } else { last_pinned_plug_p = TRUE; last_pinned_plug = plug_start; enque_pinned_plug (last_pinned_plug, save_pre_plug_info_p, last_object_in_last_plug); if (save_pre_plug_info_p) { #ifdef DOUBLY_LINKED_FL if (last_object_in_last_plug == generation_last_free_list_allocated(generation_of(max_generation))) { saved_pinned_plug_index = mark_stack_tos; } #endif //DOUBLY_LINKED_FL set_gap_size (plug_start, sizeof (gap_reloc_pair)); } } } else { if (last_pinned_plug_p) { //if (Align (last_plug_len) < min_pre_pin_obj_size) //{ // dprintf (3, ("debugging only - last pinned plug is min, check to see if it's correct")); // GCToOSInterface::DebugBreak(); //} save_post_plug_info (last_pinned_plug, last_object_in_last_plug, plug_start); set_gap_size (plug_start, sizeof (gap_reloc_pair)); verify_pins_with_post_plug_info("after saving post plug info"); } last_npinned_plug_p = TRUE; last_pinned_plug_p = FALSE; } } void gc_heap::record_interesting_data_point (interesting_data_point idp) { #ifdef GC_CONFIG_DRIVEN (interesting_data_per_gc[idp])++; #else UNREFERENCED_PARAMETER(idp); #endif //GC_CONFIG_DRIVEN } #ifdef USE_REGIONS void gc_heap::skip_pins_in_alloc_region (generation* consing_gen, int plan_gen_num) { heap_segment* alloc_region = generation_allocation_segment (consing_gen); while (!pinned_plug_que_empty_p()) { uint8_t* oldest_plug = pinned_plug (oldest_pin()); if ((oldest_plug >= generation_allocation_pointer (consing_gen)) && (oldest_plug < heap_segment_allocated (alloc_region))) { mark* m = pinned_plug_of (deque_pinned_plug()); uint8_t* plug = pinned_plug (m); size_t len = pinned_len (m); set_new_pin_info (m, generation_allocation_pointer (consing_gen)); dprintf (REGIONS_LOG, ("pin %Ix b: %Ix->%Ix", plug, brick_of (plug), (size_t)(brick_table[brick_of (plug)]))); generation_allocation_pointer (consing_gen) = plug + len; } else { // Exit when we detect the first pin that's not on the alloc seg anymore. break; } } dprintf (REGIONS_LOG, ("finished with alloc region %Ix, (%s) plan gen -> %d", heap_segment_mem (alloc_region), (heap_segment_swept_in_plan (alloc_region) ? "SIP" : "non SIP"), (heap_segment_swept_in_plan (alloc_region) ? heap_segment_plan_gen_num (alloc_region) : plan_gen_num))); set_region_plan_gen_num_sip (alloc_region, plan_gen_num); heap_segment_plan_allocated (alloc_region) = generation_allocation_pointer (consing_gen); } void gc_heap::decide_on_demotion_pin_surv (heap_segment* region) { int new_gen_num = 0; if (settings.promotion) { // If this region doesn't have much pinned surv left, we demote it; otherwise the region // will be promoted like normal. size_t basic_region_size = (size_t)1 << min_segment_size_shr; if ((int)(((double)heap_segment_pinned_survived (region) * 100.0) / (double)basic_region_size) >= demotion_pinned_ratio_th) { new_gen_num = get_plan_gen_num (heap_segment_gen_num (region)); } } set_region_plan_gen_num_sip (region, new_gen_num); } // If the next plan gen number is different, since different generations cannot share the same // region, we need to get a new alloc region and skip all remaining pins in the alloc region if // any. void gc_heap::process_last_np_surv_region (generation* consing_gen, int current_plan_gen_num, int next_plan_gen_num) { heap_segment* alloc_region = generation_allocation_segment (consing_gen); //assert (in_range_for_segment (generation_allocation_pointer (consing_gen), alloc_region)); // I'm not using in_range_for_segment here because alloc pointer/limit can be exactly the same // as reserved. size_fit_p in allocate_in_condemned_generations can be used to fit the exact // size of a plug at the end of the segment which makes alloc pointer/limit both reserved // on exit of that method. uint8_t* consing_gen_alloc_ptr = generation_allocation_pointer (consing_gen); assert ((consing_gen_alloc_ptr >= heap_segment_mem (alloc_region)) && (consing_gen_alloc_ptr <= heap_segment_reserved (alloc_region))); dprintf (REGIONS_LOG, ("h%d next need to plan gen%d, consing alloc region: %Ix, ptr: %Ix(consing gen: %d)", heap_number, next_plan_gen_num, heap_segment_mem (alloc_region), generation_allocation_pointer (consing_gen), consing_gen->gen_num)); if (current_plan_gen_num != next_plan_gen_num) { // If we haven't needed to consume this alloc region at all, we can use it to allocate the new // gen. if (generation_allocation_pointer (consing_gen) == heap_segment_mem (alloc_region)) { dprintf (REGIONS_LOG, ("h%d alloc region %Ix unused, using it to plan %d", heap_number, heap_segment_mem (alloc_region), next_plan_gen_num)); return; } // skip all the pins in this region since we cannot use it to plan the next gen. skip_pins_in_alloc_region (consing_gen, current_plan_gen_num); heap_segment* next_region = heap_segment_next (alloc_region); if (!next_region) { int gen_num = heap_segment_gen_num (alloc_region); if (gen_num > 0) { next_region = generation_start_segment (generation_of (gen_num - 1)); dprintf (REGIONS_LOG, ("h%d consing switching to next gen%d seg %Ix", heap_number, heap_segment_gen_num (next_region), heap_segment_mem (next_region))); } else { if (settings.promotion) { assert (next_plan_gen_num == 0); next_region = get_new_region (0); if (next_region) { dprintf (REGIONS_LOG, ("h%d getting a new region for gen0 plan start seg to %Ix", heap_number, heap_segment_mem (next_region))); } else { dprintf (REGIONS_LOG, ("h%d couldn't get a region to plan gen0, special sweep on", heap_number)); special_sweep_p = true; } } else { assert (!"ran out of regions for non promotion case??"); } } } else { dprintf (REGIONS_LOG, ("h%d consing switching to next seg %Ix in gen%d to alloc in", heap_number, heap_segment_mem (next_region), heap_segment_gen_num (next_region))); } if (next_region) { init_alloc_info (consing_gen, next_region); dprintf (REGIONS_LOG, ("h%d consing(%d) alloc seg: %Ix(%Ix, %Ix), ptr: %Ix, planning gen%d", heap_number, consing_gen->gen_num, heap_segment_mem (generation_allocation_segment (consing_gen)), heap_segment_allocated (generation_allocation_segment (consing_gen)), heap_segment_plan_allocated (generation_allocation_segment (consing_gen)), generation_allocation_pointer (consing_gen), next_plan_gen_num)); } else { assert (special_sweep_p); } } } void gc_heap::process_remaining_regions (int current_plan_gen_num, generation* consing_gen) { assert ((current_plan_gen_num == 0) || (!settings.promotion && (current_plan_gen_num == -1))); if (special_sweep_p) { assert (pinned_plug_que_empty_p()); } dprintf (REGIONS_LOG, ("h%d PRR: plan %d: consing alloc seg: %Ix, ptr: %Ix", heap_number, current_plan_gen_num, heap_segment_mem (generation_allocation_segment (consing_gen)), generation_allocation_pointer (consing_gen))); if (current_plan_gen_num == -1) { assert (!settings.promotion); current_plan_gen_num = 0; } while (!pinned_plug_que_empty_p()) { uint8_t* oldest_plug = pinned_plug (oldest_pin()); // detect pinned block in segments without pins heap_segment* nseg = heap_segment_rw (generation_allocation_segment (consing_gen)); dprintf (3, ("h%d oldest pin: %Ix, consing alloc %Ix, ptr %Ix, limit %Ix", heap_number, oldest_plug, heap_segment_mem (nseg), generation_allocation_pointer (consing_gen), generation_allocation_limit (consing_gen))); while ((oldest_plug < generation_allocation_pointer (consing_gen)) || (oldest_plug >= heap_segment_allocated (nseg))) { assert ((oldest_plug < heap_segment_mem (nseg)) || (oldest_plug > heap_segment_reserved (nseg))); assert (generation_allocation_pointer (consing_gen)>= heap_segment_mem (nseg)); assert (generation_allocation_pointer (consing_gen)<= heap_segment_committed (nseg)); dprintf (3, ("h%d PRR: in loop, seg %Ix pa %Ix -> alloc ptr %Ix, plan gen %d->%d", heap_number, heap_segment_mem (nseg), heap_segment_plan_allocated (nseg), generation_allocation_pointer (consing_gen), heap_segment_plan_gen_num (nseg), current_plan_gen_num)); if (!heap_segment_swept_in_plan (nseg)) { heap_segment_plan_allocated (nseg) = generation_allocation_pointer (consing_gen); } decide_on_demotion_pin_surv (nseg); heap_segment* next_seg = heap_segment_next_non_sip (nseg); if ((next_seg == 0) && (heap_segment_gen_num (nseg) > 0)) { next_seg = generation_start_segment (generation_of (heap_segment_gen_num (nseg) - 1)); dprintf (3, ("h%d PRR: switching to next gen%d start %Ix", heap_number, heap_segment_gen_num (next_seg), (size_t)next_seg)); } assert (next_seg != 0); nseg = next_seg; generation_allocation_segment (consing_gen) = nseg; generation_allocation_pointer (consing_gen) = heap_segment_mem (nseg); } mark* m = pinned_plug_of (deque_pinned_plug()); uint8_t* plug = pinned_plug (m); size_t len = pinned_len (m); set_new_pin_info (m, generation_allocation_pointer (consing_gen)); size_t free_size = pinned_len (m); update_planned_gen0_free_space (free_size, plug); dprintf (2, ("h%d plug %Ix-%Ix(%Id), free space before %Ix-%Ix(%Id)", heap_number, plug, (plug + len), len, generation_allocation_pointer (consing_gen), plug, free_size)); generation_allocation_pointer (consing_gen) = plug + len; generation_allocation_limit (consing_gen) = generation_allocation_pointer (consing_gen); } heap_segment* current_region = generation_allocation_segment (consing_gen); if (special_sweep_p) { assert (heap_segment_next_rw (current_region) == 0); return; } set_region_plan_gen_num_sip (current_region, current_plan_gen_num); if (!heap_segment_swept_in_plan (current_region)) { heap_segment_plan_allocated (current_region) = generation_allocation_pointer (consing_gen); dprintf (REGIONS_LOG, ("h%d setting alloc seg %Ix plan alloc to %Ix", heap_number, heap_segment_mem (current_region), heap_segment_plan_allocated (current_region))); } heap_segment* region_no_pins = heap_segment_next (current_region); int region_no_pins_gen_num = heap_segment_gen_num (current_region); do { region_no_pins = heap_segment_non_sip (region_no_pins); if (region_no_pins) { set_region_plan_gen_num (region_no_pins, current_plan_gen_num); heap_segment_plan_allocated (region_no_pins) = heap_segment_mem (region_no_pins); dprintf (REGIONS_LOG, ("h%d setting seg %Ix(no pins) plan gen to 0, plan alloc to %Ix", heap_number, heap_segment_mem (region_no_pins), heap_segment_plan_allocated (region_no_pins))); region_no_pins = heap_segment_next (region_no_pins); } else { if (region_no_pins_gen_num > 0) { region_no_pins_gen_num--; region_no_pins = generation_start_segment (generation_of (region_no_pins_gen_num)); } else break; } } while (region_no_pins); } void gc_heap::grow_mark_list_piece() { if (g_mark_list_piece_size < region_count) { delete[] g_mark_list_piece; // at least double the size size_t alloc_count = max ((g_mark_list_piece_size * 2), region_count); // we need two arrays with alloc_count entries per heap g_mark_list_piece = new (nothrow) uint8_t * *[alloc_count * 2 * get_num_heaps()]; if (g_mark_list_piece != nullptr) { g_mark_list_piece_size = alloc_count; } else { g_mark_list_piece_size = 0; } } } void gc_heap::save_current_survived() { if (!survived_per_region) return; size_t region_info_to_copy = region_count * sizeof (size_t); memcpy (old_card_survived_per_region, survived_per_region, region_info_to_copy); #ifdef _DEBUG for (size_t region_index = 0; region_index < region_count; region_index++) { if (survived_per_region[region_index] != 0) { dprintf (REGIONS_LOG, ("region#[%3d]: %Id", region_index, survived_per_region[region_index])); } } dprintf (REGIONS_LOG, ("global reported %Id", promoted_bytes (heap_number))); #endif //_DEBUG } void gc_heap::update_old_card_survived() { if (!survived_per_region) return; for (size_t region_index = 0; region_index < region_count; region_index++) { old_card_survived_per_region[region_index] = survived_per_region[region_index] - old_card_survived_per_region[region_index]; if (survived_per_region[region_index] != 0) { dprintf (REGIONS_LOG, ("region#[%3d]: %Id (card: %Id)", region_index, survived_per_region[region_index], old_card_survived_per_region[region_index])); } } } void gc_heap::update_planned_gen0_free_space (size_t free_size, uint8_t* plug) { gen0_pinned_free_space += free_size; if (!gen0_large_chunk_found) { gen0_large_chunk_found = (free_size >= END_SPACE_AFTER_GC_FL); if (gen0_large_chunk_found) { dprintf (3, ("h%d found large pin free space: %Id at %Ix", heap_number, free_size, plug)); } } } // REGIONS TODO: I wrote this in the same spirit as ephemeral_gen_fit_p but we really should // take committed into consideration instead of reserved. We could also avoid going through // the regions again and do this update in plan phase. void gc_heap::get_gen0_end_plan_space() { for (int gen_idx = settings.condemned_generation; gen_idx >= 0; gen_idx--) { generation* gen = generation_of (gen_idx); heap_segment* region = heap_segment_rw (generation_start_segment (gen)); while (region) { if (heap_segment_plan_gen_num (region) == 0) { size_t end_plan_space = heap_segment_reserved (region) - heap_segment_plan_allocated (region); if (!gen0_large_chunk_found) { gen0_large_chunk_found = (end_plan_space >= END_SPACE_AFTER_GC_FL); if (gen0_large_chunk_found) { dprintf (REGIONS_LOG, ("h%d found large end space: %Id in region %Ix", heap_number, end_plan_space, heap_segment_mem (region))); } } dprintf (REGIONS_LOG, ("h%d found end space: %Id in region %Ix, total %Id->%Id", heap_number, end_plan_space, heap_segment_mem (region), end_gen0_region_space, (end_gen0_region_space + end_plan_space))); end_gen0_region_space += end_plan_space; } region = heap_segment_next (region); } } } size_t gc_heap::get_gen0_end_space() { size_t end_space = 0; heap_segment* seg = generation_start_segment (generation_of (0)); while (seg) { // TODO - // This method can also be called concurrently by full GC notification but // there's no synchronization between checking for ephemeral_heap_segment and // getting alloc_allocated so for now we just always use heap_segment_allocated. //uint8_t* allocated = ((seg == ephemeral_heap_segment) ? // alloc_allocated : heap_segment_allocated (seg)); uint8_t* allocated = heap_segment_allocated (seg); end_space += heap_segment_reserved (seg) - allocated; dprintf (REGIONS_LOG, ("h%d gen0 seg %Ix, end %Ix-%Ix=%Ix, end_space->%Id", heap_number, heap_segment_mem (seg), heap_segment_reserved (seg), allocated, (heap_segment_reserved (seg) - allocated), end_space)); seg = heap_segment_next (seg); } return end_space; } #endif //USE_REGIONS inline uint8_t* gc_heap::find_next_marked (uint8_t* x, uint8_t* end, BOOL use_mark_list, uint8_t**& mark_list_next, uint8_t** mark_list_index) { if (use_mark_list) { uint8_t* old_x = x; while ((mark_list_next < mark_list_index) && (*mark_list_next <= x)) { mark_list_next++; } x = end; if ((mark_list_next < mark_list_index) #ifdef MULTIPLE_HEAPS && (*mark_list_next < end) //for multiple segments #endif //MULTIPLE_HEAPS ) x = *mark_list_next; if (current_c_gc_state == c_gc_state_marking) { assert(gc_heap::background_running_p()); bgc_clear_batch_mark_array_bits (old_x, x); } } else { uint8_t* xl = x; #ifdef BACKGROUND_GC if (current_c_gc_state == c_gc_state_marking) { assert (gc_heap::background_running_p()); while ((xl < end) && !marked (xl)) { dprintf (4, ("-%Ix-", (size_t)xl)); assert ((size (xl) > 0)); background_object_marked (xl, TRUE); xl = xl + Align (size (xl)); Prefetch (xl); } } else #endif //BACKGROUND_GC { while ((xl < end) && !marked (xl)) { dprintf (4, ("-%Ix-", (size_t)xl)); assert ((size (xl) > 0)); xl = xl + Align (size (xl)); Prefetch (xl); } } assert (xl <= end); x = xl; } return x; } #ifdef FEATURE_EVENT_TRACE void gc_heap::init_bucket_info() { memset (bucket_info, 0, sizeof (bucket_info)); } void gc_heap::add_plug_in_condemned_info (generation* gen, size_t plug_size) { uint32_t bucket_index = generation_allocator (gen)->first_suitable_bucket (plug_size); (bucket_info[bucket_index].count)++; bucket_info[bucket_index].size += plug_size; } #endif //FEATURE_EVENT_TRACE #ifdef _PREFAST_ #pragma warning(push) #pragma warning(disable:21000) // Suppress PREFast warning about overly large function #endif //_PREFAST_ void gc_heap::plan_phase (int condemned_gen_number) { size_t old_gen2_allocated = 0; size_t old_gen2_size = 0; if (condemned_gen_number == (max_generation - 1)) { old_gen2_allocated = generation_free_list_allocated (generation_of (max_generation)); old_gen2_size = generation_size (max_generation); } assert (settings.concurrent == FALSE); dprintf (2,(ThreadStressLog::gcStartPlanMsg(), heap_number, condemned_gen_number, settings.promotion ? 1 : 0)); generation* condemned_gen1 = generation_of (condemned_gen_number); BOOL use_mark_list = FALSE; #ifdef GC_CONFIG_DRIVEN dprintf (3, ("total number of marked objects: %Id (%Id)", (mark_list_index - &mark_list[0]), (mark_list_end - &mark_list[0]))); if (mark_list_index >= (mark_list_end + 1)) { mark_list_index = mark_list_end + 1; #ifndef MULTIPLE_HEAPS // in Server GC, we check for mark list overflow in sort_mark_list mark_list_overflow = true; #endif } #else //GC_CONFIG_DRIVEN dprintf (3, ("mark_list length: %Id", (mark_list_index - &mark_list[0]))); #endif //GC_CONFIG_DRIVEN if ((condemned_gen_number < max_generation) && (mark_list_index <= mark_list_end)) { #ifndef MULTIPLE_HEAPS #ifdef USE_VXSORT do_vxsort (mark_list, mark_list_index - mark_list, slow, shigh); #else //USE_VXSORT _sort (&mark_list[0], mark_list_index - 1, 0); #endif //USE_VXSORT dprintf (3, ("using mark list at GC #%d", settings.gc_index)); //verify_qsort_array (&mark_list[0], mark_list_index-1); #endif //!MULTIPLE_HEAPS use_mark_list = TRUE; get_gc_data_per_heap()->set_mechanism_bit(gc_mark_list_bit); } else { dprintf (3, ("mark_list not used")); } #ifdef FEATURE_BASICFREEZE #ifdef USE_REGIONS assert (!ro_segments_in_range); #else //USE_REGIONS if ((generation_start_segment (condemned_gen1) != ephemeral_heap_segment) && ro_segments_in_range) { sweep_ro_segments (generation_start_segment (condemned_gen1)); } #endif //USE_REGIONS #endif // FEATURE_BASICFREEZE #ifndef MULTIPLE_HEAPS int condemned_gen_index = get_stop_generation_index (condemned_gen_number); for (; condemned_gen_index <= condemned_gen_number; condemned_gen_index++) { generation* current_gen = generation_of (condemned_gen_index); if (shigh != (uint8_t*)0) { heap_segment* seg = heap_segment_rw (generation_start_segment (current_gen)); PREFIX_ASSUME(seg != NULL); heap_segment* fseg = seg; do { if (in_range_for_segment (slow, seg)) { uint8_t* start_unmarked = 0; #ifdef USE_REGIONS start_unmarked = heap_segment_mem (seg); #else //USE_REGIONS if (seg == fseg) { uint8_t* o = generation_allocation_start (current_gen); o += get_soh_start_obj_len (o); if (slow > o) { start_unmarked = o; assert ((slow - o) >= (int)Align (min_obj_size)); } } else { assert (condemned_gen_number == max_generation); start_unmarked = heap_segment_mem (seg); } #endif //USE_REGIONS if (start_unmarked) { size_t unmarked_size = slow - start_unmarked; if (unmarked_size > 0) { #ifdef BACKGROUND_GC if (current_c_gc_state == c_gc_state_marking) { bgc_clear_batch_mark_array_bits (start_unmarked, slow); } #endif //BACKGROUND_GC make_unused_array (start_unmarked, unmarked_size); } } } if (in_range_for_segment (shigh, seg)) { #ifdef BACKGROUND_GC if (current_c_gc_state == c_gc_state_marking) { bgc_clear_batch_mark_array_bits ((shigh + Align (size (shigh))), heap_segment_allocated (seg)); } #endif //BACKGROUND_GC heap_segment_saved_allocated (seg) = heap_segment_allocated (seg); heap_segment_allocated (seg) = shigh + Align (size (shigh)); } // test if the segment is in the range of [slow, shigh] if (!((heap_segment_reserved (seg) >= slow) && (heap_segment_mem (seg) <= shigh))) { heap_segment_saved_allocated (seg) = heap_segment_allocated (seg); // shorten it to minimum heap_segment_allocated (seg) = heap_segment_mem (seg); } seg = heap_segment_next_rw (seg); } while (seg); } else { heap_segment* seg = heap_segment_rw (generation_start_segment (current_gen)); PREFIX_ASSUME(seg != NULL); heap_segment* sseg = seg; do { uint8_t* start_unmarked = heap_segment_mem (seg); #ifndef USE_REGIONS // shorten it to minimum if (seg == sseg) { // no survivors make all generations look empty uint8_t* o = generation_allocation_start (current_gen); o += get_soh_start_obj_len (o); start_unmarked = o; } #endif //!USE_REGIONS #ifdef BACKGROUND_GC if (current_c_gc_state == c_gc_state_marking) { bgc_clear_batch_mark_array_bits (start_unmarked, heap_segment_allocated (seg)); } #endif //BACKGROUND_GC heap_segment_saved_allocated (seg) = heap_segment_allocated (seg); heap_segment_allocated (seg) = start_unmarked; seg = heap_segment_next_rw (seg); } while (seg); } } #endif //MULTIPLE_HEAPS heap_segment* seg1 = heap_segment_rw (generation_start_segment (condemned_gen1)); PREFIX_ASSUME(seg1 != NULL); uint8_t* end = heap_segment_allocated (seg1); uint8_t* first_condemned_address = get_soh_start_object (seg1, condemned_gen1); uint8_t* x = first_condemned_address; #ifdef USE_REGIONS memset (regions_per_gen, 0, sizeof (regions_per_gen)); memset (sip_maxgen_regions_per_gen, 0, sizeof (sip_maxgen_regions_per_gen)); memset (reserved_free_regions_sip, 0, sizeof (reserved_free_regions_sip)); int pinned_survived_region = 0; uint8_t** mark_list_index = nullptr; uint8_t** mark_list_next = nullptr; if (use_mark_list) mark_list_next = get_region_mark_list (x, end, &mark_list_index); #else // USE_REGIONS assert (!marked (x)); uint8_t** mark_list_next = &mark_list[0]; #endif //USE_REGIONS uint8_t* plug_end = x; uint8_t* tree = 0; size_t sequence_number = 0; uint8_t* last_node = 0; size_t current_brick = brick_of (x); BOOL allocate_in_condemned = ((condemned_gen_number == max_generation)|| (settings.promotion == FALSE)); int active_old_gen_number = condemned_gen_number; int active_new_gen_number = (allocate_in_condemned ? condemned_gen_number: (1 + condemned_gen_number)); generation* older_gen = 0; generation* consing_gen = condemned_gen1; alloc_list r_free_list [MAX_SOH_BUCKET_COUNT]; size_t r_free_list_space = 0; size_t r_free_obj_space = 0; size_t r_older_gen_free_list_allocated = 0; size_t r_older_gen_condemned_allocated = 0; size_t r_older_gen_end_seg_allocated = 0; uint8_t* r_allocation_pointer = 0; uint8_t* r_allocation_limit = 0; uint8_t* r_allocation_start_region = 0; heap_segment* r_allocation_segment = 0; #ifdef FREE_USAGE_STATS size_t r_older_gen_free_space[NUM_GEN_POWER2]; #endif //FREE_USAGE_STATS if ((condemned_gen_number < max_generation)) { older_gen = generation_of (min (max_generation, 1 + condemned_gen_number)); generation_allocator (older_gen)->copy_to_alloc_list (r_free_list); r_free_list_space = generation_free_list_space (older_gen); r_free_obj_space = generation_free_obj_space (older_gen); #ifdef FREE_USAGE_STATS memcpy (r_older_gen_free_space, older_gen->gen_free_spaces, sizeof (r_older_gen_free_space)); #endif //FREE_USAGE_STATS generation_allocate_end_seg_p (older_gen) = FALSE; #ifdef DOUBLY_LINKED_FL if (older_gen->gen_num == max_generation) { generation_set_bgc_mark_bit_p (older_gen) = FALSE; generation_last_free_list_allocated (older_gen) = 0; } #endif //DOUBLY_LINKED_FL r_older_gen_free_list_allocated = generation_free_list_allocated (older_gen); r_older_gen_condemned_allocated = generation_condemned_allocated (older_gen); r_older_gen_end_seg_allocated = generation_end_seg_allocated (older_gen); r_allocation_limit = generation_allocation_limit (older_gen); r_allocation_pointer = generation_allocation_pointer (older_gen); r_allocation_start_region = generation_allocation_context_start_region (older_gen); r_allocation_segment = generation_allocation_segment (older_gen); #ifdef USE_REGIONS if (older_gen->gen_num == max_generation) { check_seg_gen_num (r_allocation_segment); } #endif //USE_REGIONS heap_segment* start_seg = heap_segment_rw (generation_start_segment (older_gen)); PREFIX_ASSUME(start_seg != NULL); #ifdef USE_REGIONS heap_segment* skip_seg = 0; assert (generation_allocation_pointer (older_gen) == 0); assert (generation_allocation_limit (older_gen) == 0); #else //USE_REGIONS heap_segment* skip_seg = ephemeral_heap_segment; if (start_seg != ephemeral_heap_segment) { assert (condemned_gen_number == (max_generation - 1)); } #endif //USE_REGIONS if (start_seg != skip_seg) { while (start_seg && (start_seg != skip_seg)) { assert (heap_segment_allocated (start_seg) >= heap_segment_mem (start_seg)); assert (heap_segment_allocated (start_seg) <= heap_segment_reserved (start_seg)); heap_segment_plan_allocated (start_seg) = heap_segment_allocated (start_seg); start_seg = heap_segment_next_rw (start_seg); } } } //reset all of the segment's plan_allocated { int condemned_gen_index1 = get_stop_generation_index (condemned_gen_number); for (; condemned_gen_index1 <= condemned_gen_number; condemned_gen_index1++) { generation* current_gen = generation_of (condemned_gen_index1); heap_segment* seg2 = heap_segment_rw (generation_start_segment (current_gen)); PREFIX_ASSUME(seg2 != NULL); while (seg2) { #ifdef USE_REGIONS regions_per_gen[condemned_gen_index1]++; dprintf (REGIONS_LOG, ("h%d gen%d %Ix-%Ix", heap_number, condemned_gen_index1, heap_segment_mem (seg2), heap_segment_allocated (seg2))); #endif //USE_REGIONS heap_segment_plan_allocated (seg2) = heap_segment_mem (seg2); seg2 = heap_segment_next_rw (seg2); } } } int condemned_gn = condemned_gen_number; int bottom_gen = 0; init_free_and_plug(); while (condemned_gn >= bottom_gen) { generation* condemned_gen2 = generation_of (condemned_gn); generation_allocator (condemned_gen2)->clear(); generation_free_list_space (condemned_gen2) = 0; generation_free_obj_space (condemned_gen2) = 0; generation_allocation_size (condemned_gen2) = 0; generation_condemned_allocated (condemned_gen2) = 0; generation_sweep_allocated (condemned_gen2) = 0; generation_pinned_allocated (condemned_gen2) = 0; generation_free_list_allocated(condemned_gen2) = 0; generation_end_seg_allocated (condemned_gen2) = 0; generation_pinned_allocation_sweep_size (condemned_gen2) = 0; generation_pinned_allocation_compact_size (condemned_gen2) = 0; #ifdef FREE_USAGE_STATS generation_pinned_free_obj_space (condemned_gen2) = 0; generation_allocated_in_pinned_free (condemned_gen2) = 0; generation_allocated_since_last_pin (condemned_gen2) = 0; #endif //FREE_USAGE_STATS #ifndef USE_REGIONS generation_plan_allocation_start (condemned_gen2) = 0; #endif //!USE_REGIONS generation_allocation_segment (condemned_gen2) = heap_segment_rw (generation_start_segment (condemned_gen2)); PREFIX_ASSUME(generation_allocation_segment(condemned_gen2) != NULL); #ifdef USE_REGIONS generation_allocation_pointer (condemned_gen2) = heap_segment_mem (generation_allocation_segment (condemned_gen2)); #else //USE_REGIONS if (generation_start_segment (condemned_gen2) != ephemeral_heap_segment) { generation_allocation_pointer (condemned_gen2) = heap_segment_mem (generation_allocation_segment (condemned_gen2)); } else { generation_allocation_pointer (condemned_gen2) = generation_allocation_start (condemned_gen2); } #endif //USE_REGIONS generation_allocation_limit (condemned_gen2) = generation_allocation_pointer (condemned_gen2); generation_allocation_context_start_region (condemned_gen2) = generation_allocation_pointer (condemned_gen2); condemned_gn--; } BOOL allocate_first_generation_start = FALSE; if (allocate_in_condemned) { allocate_first_generation_start = TRUE; } dprintf(3,( " From %Ix to %Ix", (size_t)x, (size_t)end)); #ifdef USE_REGIONS if (should_sweep_in_plan (seg1)) { sweep_region_in_plan (seg1, use_mark_list, mark_list_next, mark_list_index); x = end; } #else demotion_low = MAX_PTR; demotion_high = heap_segment_allocated (ephemeral_heap_segment); #endif //!USE_REGIONS // If we are doing a gen1 only because of cards, it means we should not demote any pinned plugs // from gen1. They should get promoted to gen2. demote_gen1_p = !(settings.promotion && (settings.condemned_generation == (max_generation - 1)) && gen_to_condemn_reasons.is_only_condition (gen_low_card_p)); total_ephemeral_size = 0; print_free_and_plug ("BP"); #ifndef USE_REGIONS for (int gen_idx = 0; gen_idx <= max_generation; gen_idx++) { generation* temp_gen = generation_of (gen_idx); dprintf (2, ("gen%d start %Ix, plan start %Ix", gen_idx, generation_allocation_start (temp_gen), generation_plan_allocation_start (temp_gen))); } #endif //!USE_REGIONS #ifdef FEATURE_EVENT_TRACE // When verbose level is enabled we want to record some info about gen2 FL usage during gen1 GCs. // We record the bucket info for the largest FL items and plugs that we have to allocate in condemned. bool record_fl_info_p = (EVENT_ENABLED (GCFitBucketInfo) && (condemned_gen_number == (max_generation - 1))); size_t recorded_fl_info_size = 0; if (record_fl_info_p) init_bucket_info(); bool fire_pinned_plug_events_p = EVENT_ENABLED(PinPlugAtGCTime); #endif //FEATURE_EVENT_TRACE size_t last_plug_len = 0; #ifdef DOUBLY_LINKED_FL gen2_removed_no_undo = 0; saved_pinned_plug_index = 0; #endif //DOUBLY_LINKED_FL while (1) { if (x >= end) { if (!use_mark_list) { assert (x == end); } #ifdef USE_REGIONS if (heap_segment_swept_in_plan (seg1)) { assert (heap_segment_gen_num (seg1) == active_old_gen_number); dynamic_data* dd_active_old = dynamic_data_of (active_old_gen_number); dd_survived_size (dd_active_old) += heap_segment_survived (seg1); dprintf (REGIONS_LOG, ("region %Ix-%Ix SIP", heap_segment_mem (seg1), heap_segment_allocated (seg1))); } else #endif //USE_REGIONS { assert (heap_segment_allocated (seg1) == end); heap_segment_saved_allocated (seg1) = heap_segment_allocated (seg1); heap_segment_allocated (seg1) = plug_end; current_brick = update_brick_table (tree, current_brick, x, plug_end); dprintf (REGIONS_LOG, ("region %Ix-%Ix(%Ix) non SIP", heap_segment_mem (seg1), heap_segment_allocated (seg1), heap_segment_plan_allocated (seg1))); dprintf (3, ("end of seg: new tree, sequence# 0")); sequence_number = 0; tree = 0; } #ifdef USE_REGIONS heap_segment_pinned_survived (seg1) = pinned_survived_region; dprintf (REGIONS_LOG, ("h%d setting seg %Ix pin surv: %Ix", heap_number, heap_segment_mem (seg1), pinned_survived_region)); pinned_survived_region = 0; if (heap_segment_mem (seg1) == heap_segment_allocated (seg1)) { num_regions_freed_in_sweep++; } #endif //USE_REGIONS if (heap_segment_next_rw (seg1)) { seg1 = heap_segment_next_rw (seg1); end = heap_segment_allocated (seg1); plug_end = x = heap_segment_mem (seg1); current_brick = brick_of (x); #ifdef USE_REGIONS if (use_mark_list) mark_list_next = get_region_mark_list (x, end, &mark_list_index); if (should_sweep_in_plan (seg1)) { sweep_region_in_plan (seg1, use_mark_list, mark_list_next, mark_list_index); x = end; } #endif //USE_REGIONS dprintf(3,( " From %Ix to %Ix", (size_t)x, (size_t)end)); continue; } else { #ifdef USE_REGIONS // We have a few task here when we ran out of regions to go through for the // active_old_gen_number - // // + decide on which pins to skip // + set the planned gen for the regions we process here // + set the consing gen's alloc ptr/limit // + decide on the new active_old_gen_number (which is just the current one - 1) // + decide on the new active_new_gen_number (which depends on settings.promotion) // // Important differences between process_last_np_surv_region and process_ephemeral_boundaries // - it's guaranteed we would ask to allocate gen1 start for promotion and gen0 // start for non promotion case. // - consing_gen is never changed. In fact we really don't need consing_gen, we just // need the alloc ptr/limit pair and the alloc seg. // TODO : should just get rid of consing_gen. // These make things more regular and easier to keep track of. // // Also I'm doing everything here instead of having to have separate code to go // through the left over pins after the main loop in plan phase. int saved_active_new_gen_number = active_new_gen_number; BOOL saved_allocate_in_condemned = allocate_in_condemned; dprintf (REGIONS_LOG, ("h%d switching to look at next gen - current active old %d, new %d, alloc_in_condemned: %d", heap_number, active_old_gen_number, active_new_gen_number, allocate_in_condemned)); if (active_old_gen_number <= (settings.promotion ? (max_generation - 1) : max_generation)) { dprintf (REGIONS_LOG, ("h%d active old: %d, new: %d->%d, allocate_in_condemned %d->1", heap_number, active_old_gen_number, active_new_gen_number, (active_new_gen_number - 1), allocate_in_condemned)); active_new_gen_number--; allocate_in_condemned = TRUE; } if (active_new_gen_number >= 0) { process_last_np_surv_region (consing_gen, saved_active_new_gen_number, active_new_gen_number); } if (active_old_gen_number == 0) { // We need to process the pins on the remaining regions if any. process_remaining_regions (active_new_gen_number, consing_gen); break; } else { active_old_gen_number--; seg1 = heap_segment_rw (generation_start_segment (generation_of (active_old_gen_number))); end = heap_segment_allocated (seg1); plug_end = x = heap_segment_mem (seg1); current_brick = brick_of (x); if (use_mark_list) mark_list_next = get_region_mark_list (x, end, &mark_list_index); if (should_sweep_in_plan (seg1)) { sweep_region_in_plan (seg1, use_mark_list, mark_list_next, mark_list_index); x = end; } dprintf (REGIONS_LOG,("h%d switching to gen%d start region %Ix, %Ix-%Ix", heap_number, active_old_gen_number, heap_segment_mem (seg1), x, end)); continue; } #else //USE_REGIONS break; #endif //USE_REGIONS } } BOOL last_npinned_plug_p = FALSE; BOOL last_pinned_plug_p = FALSE; // last_pinned_plug is the beginning of the last pinned plug. If we merge a plug into a pinned // plug we do not change the value of last_pinned_plug. This happens with artificially pinned plugs - // it can be merged with a previous pinned plug and a pinned plug after it can be merged with it. uint8_t* last_pinned_plug = 0; size_t num_pinned_plugs_in_plug = 0; uint8_t* last_object_in_plug = 0; while ((x < end) && marked (x)) { uint8_t* plug_start = x; uint8_t* saved_plug_end = plug_end; BOOL pinned_plug_p = FALSE; BOOL npin_before_pin_p = FALSE; BOOL saved_last_npinned_plug_p = last_npinned_plug_p; uint8_t* saved_last_object_in_plug = last_object_in_plug; BOOL merge_with_last_pin_p = FALSE; size_t added_pinning_size = 0; size_t artificial_pinned_size = 0; store_plug_gap_info (plug_start, plug_end, last_npinned_plug_p, last_pinned_plug_p, last_pinned_plug, pinned_plug_p, last_object_in_plug, merge_with_last_pin_p, last_plug_len); #ifdef FEATURE_STRUCTALIGN int requiredAlignment = ((CObjectHeader*)plug_start)->GetRequiredAlignment(); size_t alignmentOffset = OBJECT_ALIGNMENT_OFFSET; #endif // FEATURE_STRUCTALIGN { uint8_t* xl = x; while ((xl < end) && marked (xl) && (pinned (xl) == pinned_plug_p)) { assert (xl < end); if (pinned(xl)) { clear_pinned (xl); } #ifdef FEATURE_STRUCTALIGN else { int obj_requiredAlignment = ((CObjectHeader*)xl)->GetRequiredAlignment(); if (obj_requiredAlignment > requiredAlignment) { requiredAlignment = obj_requiredAlignment; alignmentOffset = xl - plug_start + OBJECT_ALIGNMENT_OFFSET; } } #endif // FEATURE_STRUCTALIGN clear_marked (xl); dprintf(4, ("+%Ix+", (size_t)xl)); assert ((size (xl) > 0)); assert ((size (xl) <= loh_size_threshold)); last_object_in_plug = xl; xl = xl + Align (size (xl)); Prefetch (xl); } BOOL next_object_marked_p = ((xl < end) && marked (xl)); if (pinned_plug_p) { // If it is pinned we need to extend to the next marked object as we can't use part of // a pinned object to make the artificial gap (unless the last 3 ptr sized words are all // references but for now I am just using the next non pinned object for that). if (next_object_marked_p) { clear_marked (xl); last_object_in_plug = xl; size_t extra_size = Align (size (xl)); xl = xl + extra_size; added_pinning_size = extra_size; } } else { if (next_object_marked_p) npin_before_pin_p = TRUE; } assert (xl <= end); x = xl; } dprintf (3, ( "%Ix[", (size_t)plug_start)); plug_end = x; size_t ps = plug_end - plug_start; last_plug_len = ps; dprintf (3, ( "%Ix[(%Ix)", (size_t)x, ps)); uint8_t* new_address = 0; if (!pinned_plug_p) { if (allocate_in_condemned && (settings.condemned_generation == max_generation) && (ps > OS_PAGE_SIZE)) { ptrdiff_t reloc = plug_start - generation_allocation_pointer (consing_gen); //reloc should >=0 except when we relocate //across segments and the dest seg is higher then the src if ((ps > (8*OS_PAGE_SIZE)) && (reloc > 0) && ((size_t)reloc < (ps/16))) { dprintf (3, ("Pinning %Ix; reloc would have been: %Ix", (size_t)plug_start, reloc)); // The last plug couldn't have been a npinned plug or it would have // included this plug. assert (!saved_last_npinned_plug_p); if (last_pinned_plug) { dprintf (3, ("artificially pinned plug merged with last pinned plug")); merge_with_last_pin_p = TRUE; } else { enque_pinned_plug (plug_start, FALSE, 0); last_pinned_plug = plug_start; } convert_to_pinned_plug (last_npinned_plug_p, last_pinned_plug_p, pinned_plug_p, ps, artificial_pinned_size); } } } #ifndef USE_REGIONS if (allocate_first_generation_start) { allocate_first_generation_start = FALSE; plan_generation_start (condemned_gen1, consing_gen, plug_start); assert (generation_plan_allocation_start (condemned_gen1)); } if (seg1 == ephemeral_heap_segment) { process_ephemeral_boundaries (plug_start, active_new_gen_number, active_old_gen_number, consing_gen, allocate_in_condemned); } #endif //!USE_REGIONS dprintf (3, ("adding %Id to gen%d surv", ps, active_old_gen_number)); dynamic_data* dd_active_old = dynamic_data_of (active_old_gen_number); dd_survived_size (dd_active_old) += ps; BOOL convert_to_pinned_p = FALSE; BOOL allocated_in_older_p = FALSE; if (!pinned_plug_p) { #if defined (RESPECT_LARGE_ALIGNMENT) || defined (FEATURE_STRUCTALIGN) dd_num_npinned_plugs (dd_active_old)++; #endif //RESPECT_LARGE_ALIGNMENT || FEATURE_STRUCTALIGN add_gen_plug (active_old_gen_number, ps); if (allocate_in_condemned) { verify_pins_with_post_plug_info("before aic"); new_address = allocate_in_condemned_generations (consing_gen, ps, active_old_gen_number, #ifdef SHORT_PLUGS &convert_to_pinned_p, (npin_before_pin_p ? plug_end : 0), seg1, #endif //SHORT_PLUGS plug_start REQD_ALIGN_AND_OFFSET_ARG); verify_pins_with_post_plug_info("after aic"); } else { new_address = allocate_in_older_generation (older_gen, ps, active_old_gen_number, plug_start REQD_ALIGN_AND_OFFSET_ARG); if (new_address != 0) { allocated_in_older_p = TRUE; if (settings.condemned_generation == (max_generation - 1)) { dprintf (3, (" NA: %Ix-%Ix -> %Ix, %Ix (%Ix)", plug_start, plug_end, (size_t)new_address, (size_t)new_address + (plug_end - plug_start), (size_t)(plug_end - plug_start))); } } else { if (generation_allocator(older_gen)->discard_if_no_fit_p()) { allocate_in_condemned = TRUE; } new_address = allocate_in_condemned_generations (consing_gen, ps, active_old_gen_number, #ifdef SHORT_PLUGS &convert_to_pinned_p, (npin_before_pin_p ? plug_end : 0), seg1, #endif //SHORT_PLUGS plug_start REQD_ALIGN_AND_OFFSET_ARG); } } #ifdef FEATURE_EVENT_TRACE if (record_fl_info_p && !allocated_in_older_p) { add_plug_in_condemned_info (older_gen, ps); recorded_fl_info_size += ps; } #endif //FEATURE_EVENT_TRACE if (convert_to_pinned_p) { assert (last_npinned_plug_p != FALSE); assert (last_pinned_plug_p == FALSE); convert_to_pinned_plug (last_npinned_plug_p, last_pinned_plug_p, pinned_plug_p, ps, artificial_pinned_size); enque_pinned_plug (plug_start, FALSE, 0); last_pinned_plug = plug_start; } else { if (!new_address) { //verify that we are at then end of the ephemeral segment assert (generation_allocation_segment (consing_gen) == ephemeral_heap_segment); //verify that we are near the end assert ((generation_allocation_pointer (consing_gen) + Align (ps)) < heap_segment_allocated (ephemeral_heap_segment)); assert ((generation_allocation_pointer (consing_gen) + Align (ps)) > (heap_segment_allocated (ephemeral_heap_segment) + Align (min_obj_size))); } else { dprintf (3, (ThreadStressLog::gcPlanPlugMsg(), (size_t)(node_gap_size (plug_start)), plug_start, plug_end, (size_t)new_address, (size_t)(plug_start - new_address), (size_t)new_address + ps, ps, (is_plug_padded (plug_start) ? 1 : 0), x, (allocated_in_older_p ? "O" : "C"))); #ifdef SHORT_PLUGS if (is_plug_padded (plug_start)) { dprintf (3, ("%Ix was padded", plug_start)); dd_padding_size (dd_active_old) += Align (min_obj_size); } #endif //SHORT_PLUGS } } } if (pinned_plug_p) { #ifdef FEATURE_EVENT_TRACE if (fire_pinned_plug_events_p) { FIRE_EVENT(PinPlugAtGCTime, plug_start, plug_end, (merge_with_last_pin_p ? 0 : (uint8_t*)node_gap_size (plug_start))); } #endif //FEATURE_EVENT_TRACE if (merge_with_last_pin_p) { merge_with_last_pinned_plug (last_pinned_plug, ps); } else { assert (last_pinned_plug == plug_start); set_pinned_info (plug_start, ps, consing_gen); } new_address = plug_start; dprintf (3, (ThreadStressLog::gcPlanPinnedPlugMsg(), (size_t)(node_gap_size (plug_start)), (size_t)plug_start, (size_t)plug_end, ps, (merge_with_last_pin_p ? 1 : 0))); dprintf (3, ("adding %Id to gen%d pinned surv", plug_end - plug_start, active_old_gen_number)); size_t pinned_plug_size = plug_end - plug_start; #ifdef USE_REGIONS pinned_survived_region += (int)pinned_plug_size; #endif //USE_REGIONS dd_pinned_survived_size (dd_active_old) += pinned_plug_size; dd_added_pinned_size (dd_active_old) += added_pinning_size; dd_artificial_pinned_survived_size (dd_active_old) += artificial_pinned_size; if (!demote_gen1_p && (active_old_gen_number == (max_generation - 1))) { last_gen1_pin_end = plug_end; } } #ifdef _DEBUG // detect forward allocation in the same segment assert (!((new_address > plug_start) && (new_address < heap_segment_reserved (seg1)))); #endif //_DEBUG if (!merge_with_last_pin_p) { if (current_brick != brick_of (plug_start)) { current_brick = update_brick_table (tree, current_brick, plug_start, saved_plug_end); sequence_number = 0; tree = 0; } set_node_relocation_distance (plug_start, (new_address - plug_start)); if (last_node && (node_relocation_distance (last_node) == (node_relocation_distance (plug_start) + (ptrdiff_t)node_gap_size (plug_start)))) { //dprintf(3,( " Lb")); dprintf (3, ("%Ix Lb", plug_start)); set_node_left (plug_start); } if (0 == sequence_number) { dprintf (2, ("sn: 0, tree is set to %Ix", plug_start)); tree = plug_start; } verify_pins_with_post_plug_info("before insert node"); tree = insert_node (plug_start, ++sequence_number, tree, last_node); dprintf (3, ("tree is %Ix (b: %Ix) after insert_node(lc: %Ix, rc: %Ix)", tree, brick_of (tree), (tree + node_left_child (tree)), (tree + node_right_child (tree)))); last_node = plug_start; #ifdef _DEBUG // If we detect if the last plug is pinned plug right before us, we should save this gap info if (!pinned_plug_p) { if (mark_stack_tos > 0) { mark& m = mark_stack_array[mark_stack_tos - 1]; if (m.has_post_plug_info()) { uint8_t* post_plug_info_start = m.saved_post_plug_info_start; size_t* current_plug_gap_start = (size_t*)(plug_start - sizeof (plug_and_gap)); if ((uint8_t*)current_plug_gap_start == post_plug_info_start) { dprintf (3, ("Ginfo: %Ix, %Ix, %Ix", *current_plug_gap_start, *(current_plug_gap_start + 1), *(current_plug_gap_start + 2))); memcpy (&(m.saved_post_plug_debug), current_plug_gap_start, sizeof (gap_reloc_pair)); } } } } #endif //_DEBUG verify_pins_with_post_plug_info("after insert node"); } } if (num_pinned_plugs_in_plug > 1) { dprintf (3, ("more than %Id pinned plugs in this plug", num_pinned_plugs_in_plug)); } x = find_next_marked (x, end, use_mark_list, mark_list_next, mark_list_index); } #ifndef USE_REGIONS while (!pinned_plug_que_empty_p()) { if (settings.promotion) { uint8_t* pplug = pinned_plug (oldest_pin()); if (in_range_for_segment (pplug, ephemeral_heap_segment)) { consing_gen = ensure_ephemeral_heap_segment (consing_gen); //allocate all of the generation gaps while (active_new_gen_number > 0) { active_new_gen_number--; if (active_new_gen_number == (max_generation - 1)) { maxgen_pinned_compact_before_advance = generation_pinned_allocation_compact_size (generation_of (max_generation)); if (!demote_gen1_p) advance_pins_for_demotion (consing_gen); } generation* gen = generation_of (active_new_gen_number); plan_generation_start (gen, consing_gen, 0); if (demotion_low == MAX_PTR) { demotion_low = pplug; dprintf (3, ("end plan: dlow->%Ix", demotion_low)); } dprintf (2, ("(%d)gen%d plan start: %Ix", heap_number, active_new_gen_number, (size_t)generation_plan_allocation_start (gen))); assert (generation_plan_allocation_start (gen)); } } } if (pinned_plug_que_empty_p()) break; size_t entry = deque_pinned_plug(); mark* m = pinned_plug_of (entry); uint8_t* plug = pinned_plug (m); size_t len = pinned_len (m); // detect pinned block in different segment (later) than // allocation segment heap_segment* nseg = heap_segment_rw (generation_allocation_segment (consing_gen)); while ((plug < generation_allocation_pointer (consing_gen)) || (plug >= heap_segment_allocated (nseg))) { assert ((plug < heap_segment_mem (nseg)) || (plug > heap_segment_reserved (nseg))); //adjust the end of the segment to be the end of the plug assert (generation_allocation_pointer (consing_gen)>= heap_segment_mem (nseg)); assert (generation_allocation_pointer (consing_gen)<= heap_segment_committed (nseg)); heap_segment_plan_allocated (nseg) = generation_allocation_pointer (consing_gen); //switch allocation segment nseg = heap_segment_next_rw (nseg); generation_allocation_segment (consing_gen) = nseg; //reset the allocation pointer and limits generation_allocation_pointer (consing_gen) = heap_segment_mem (nseg); } set_new_pin_info (m, generation_allocation_pointer (consing_gen)); dprintf (2, ("pin %Ix b: %Ix->%Ix", plug, brick_of (plug), (size_t)(brick_table[brick_of (plug)]))); generation_allocation_pointer (consing_gen) = plug + len; generation_allocation_limit (consing_gen) = generation_allocation_pointer (consing_gen); //Add the size of the pinned plug to the right pinned allocations //find out which gen this pinned plug came from int frgn = object_gennum (plug); if ((frgn != (int)max_generation) && settings.promotion) { generation_pinned_allocation_sweep_size ((generation_of (frgn +1))) += len; } } plan_generation_starts (consing_gen); #endif //!USE_REGIONS descr_generations ("AP"); print_free_and_plug ("AP"); { #ifdef SIMPLE_DPRINTF for (int gen_idx = 0; gen_idx <= max_generation; gen_idx++) { generation* temp_gen = generation_of (gen_idx); dynamic_data* temp_dd = dynamic_data_of (gen_idx); int added_pinning_ratio = 0; int artificial_pinned_ratio = 0; if (dd_pinned_survived_size (temp_dd) != 0) { added_pinning_ratio = (int)((float)dd_added_pinned_size (temp_dd) * 100 / (float)dd_pinned_survived_size (temp_dd)); artificial_pinned_ratio = (int)((float)dd_artificial_pinned_survived_size (temp_dd) * 100 / (float)dd_pinned_survived_size (temp_dd)); } size_t padding_size = #ifdef SHORT_PLUGS dd_padding_size (temp_dd); #else 0; #endif //SHORT_PLUGS dprintf (1, ("gen%d: NON PIN alloc: %Id, pin com: %Id, sweep: %Id, surv: %Id, pinsurv: %Id(%d%% added, %d%% art), np surv: %Id, pad: %Id", gen_idx, generation_allocation_size (temp_gen), generation_pinned_allocation_compact_size (temp_gen), generation_pinned_allocation_sweep_size (temp_gen), dd_survived_size (temp_dd), dd_pinned_survived_size (temp_dd), added_pinning_ratio, artificial_pinned_ratio, (dd_survived_size (temp_dd) - dd_pinned_survived_size (temp_dd)), padding_size)); #ifndef USE_REGIONS dprintf (1, ("gen%d: %Ix, %Ix(%Id)", gen_idx, generation_allocation_start (temp_gen), generation_plan_allocation_start (temp_gen), (size_t)(generation_plan_allocation_start (temp_gen) - generation_allocation_start (temp_gen)))); #endif //USE_REGIONS } #endif //SIMPLE_DPRINTF } if (settings.condemned_generation == (max_generation - 1 )) { generation* older_gen = generation_of (settings.condemned_generation + 1); size_t rejected_free_space = generation_free_obj_space (older_gen) - r_free_obj_space; size_t free_list_allocated = generation_free_list_allocated (older_gen) - r_older_gen_free_list_allocated; size_t end_seg_allocated = generation_end_seg_allocated (older_gen) - r_older_gen_end_seg_allocated; size_t condemned_allocated = generation_condemned_allocated (older_gen) - r_older_gen_condemned_allocated; size_t growth = end_seg_allocated + condemned_allocated; if (growth > 0) { dprintf (1, ("gen2 grew %Id (end seg alloc: %Id, condemned alloc: %Id", growth, end_seg_allocated, condemned_allocated)); maxgen_size_inc_p = true; } else { dprintf (2, ("gen2 didn't grow (end seg alloc: %Id, , condemned alloc: %Id, gen1 c alloc: %Id", end_seg_allocated, condemned_allocated, generation_condemned_allocated (generation_of (max_generation - 1)))); } dprintf (1, ("older gen's free alloc: %Id->%Id, seg alloc: %Id->%Id, condemned alloc: %Id->%Id", r_older_gen_free_list_allocated, generation_free_list_allocated (older_gen), r_older_gen_end_seg_allocated, generation_end_seg_allocated (older_gen), r_older_gen_condemned_allocated, generation_condemned_allocated (older_gen))); dprintf (1, ("this GC did %Id free list alloc(%Id bytes free space rejected)", free_list_allocated, rejected_free_space)); maxgen_size_increase* maxgen_size_info = &(get_gc_data_per_heap()->maxgen_size_info); maxgen_size_info->free_list_allocated = free_list_allocated; maxgen_size_info->free_list_rejected = rejected_free_space; maxgen_size_info->end_seg_allocated = end_seg_allocated; maxgen_size_info->condemned_allocated = condemned_allocated; maxgen_size_info->pinned_allocated = maxgen_pinned_compact_before_advance; maxgen_size_info->pinned_allocated_advance = generation_pinned_allocation_compact_size (generation_of (max_generation)) - maxgen_pinned_compact_before_advance; #ifdef FREE_USAGE_STATS int free_list_efficiency = 0; if ((free_list_allocated + rejected_free_space) != 0) free_list_efficiency = (int)(((float) (free_list_allocated) / (float)(free_list_allocated + rejected_free_space)) * (float)100); int running_free_list_efficiency = (int)(generation_allocator_efficiency(older_gen)*100); dprintf (1, ("gen%d free list alloc effi: %d%%, current effi: %d%%", older_gen->gen_num, free_list_efficiency, running_free_list_efficiency)); dprintf (1, ("gen2 free list change")); for (int j = 0; j < NUM_GEN_POWER2; j++) { dprintf (1, ("[h%d][#%Id]: 2^%d: F: %Id->%Id(%Id), P: %Id", heap_number, settings.gc_index, (j + 10), r_older_gen_free_space[j], older_gen->gen_free_spaces[j], (ptrdiff_t)(r_older_gen_free_space[j] - older_gen->gen_free_spaces[j]), (generation_of(max_generation - 1))->gen_plugs[j])); } #endif //FREE_USAGE_STATS } size_t fragmentation = generation_fragmentation (generation_of (condemned_gen_number), consing_gen, heap_segment_allocated (ephemeral_heap_segment)); dprintf (2,("Fragmentation: %Id", fragmentation)); dprintf (2,("---- End of Plan phase ----")); // We may update write barrier code. We assume here EE has been suspended if we are on a GC thread. assert(IsGCInProgress()); BOOL should_expand = FALSE; BOOL should_compact= FALSE; ephemeral_promotion = FALSE; #ifdef HOST_64BIT if ((!settings.concurrent) && #ifdef USE_REGIONS !special_sweep_p && #endif //USE_REGIONS !provisional_mode_triggered && ((condemned_gen_number < max_generation) && ((settings.gen0_reduction_count > 0) || (settings.entry_memory_load >= 95)))) { dprintf (GTC_LOG, ("gen0 reduction count is %d, condemning %d, mem load %d", settings.gen0_reduction_count, condemned_gen_number, settings.entry_memory_load)); should_compact = TRUE; get_gc_data_per_heap()->set_mechanism (gc_heap_compact, ((settings.gen0_reduction_count > 0) ? compact_fragmented_gen0 : compact_high_mem_load)); #ifndef USE_REGIONS if ((condemned_gen_number >= (max_generation - 1)) && dt_low_ephemeral_space_p (tuning_deciding_expansion)) { dprintf (GTC_LOG, ("Not enough space for all ephemeral generations with compaction")); should_expand = TRUE; } #endif //!USE_REGIONS } else #endif // HOST_64BIT { should_compact = decide_on_compacting (condemned_gen_number, fragmentation, should_expand); } #ifdef FEATURE_LOH_COMPACTION loh_compacted_p = FALSE; #endif //FEATURE_LOH_COMPACTION if (condemned_gen_number == max_generation) { #ifdef FEATURE_LOH_COMPACTION if (settings.loh_compaction) { if (plan_loh()) { should_compact = TRUE; get_gc_data_per_heap()->set_mechanism (gc_heap_compact, compact_loh_forced); loh_compacted_p = TRUE; } } else { if ((heap_number == 0) && (loh_pinned_queue)) { loh_pinned_queue_decay--; if (!loh_pinned_queue_decay) { delete loh_pinned_queue; loh_pinned_queue = 0; } } } if (!loh_compacted_p) #endif //FEATURE_LOH_COMPACTION { GCToEEInterface::DiagWalkUOHSurvivors(__this, loh_generation); sweep_uoh_objects (loh_generation); } GCToEEInterface::DiagWalkUOHSurvivors(__this, poh_generation); sweep_uoh_objects (poh_generation); } else { settings.loh_compaction = FALSE; } #ifdef MULTIPLE_HEAPS new_heap_segment = NULL; if (should_compact && should_expand) gc_policy = policy_expand; else if (should_compact) gc_policy = policy_compact; else gc_policy = policy_sweep; //vote for result of should_compact dprintf (3, ("Joining for compaction decision")); gc_t_join.join(this, gc_join_decide_on_compaction); if (gc_t_join.joined()) { //safe place to delete large heap segments if (condemned_gen_number == max_generation) { for (int i = 0; i < n_heaps; i++) { g_heaps [i]->rearrange_uoh_segments (); } } if (maxgen_size_inc_p && provisional_mode_triggered && !is_bgc_in_progress()) { pm_trigger_full_gc = true; dprintf (GTC_LOG, ("in PM: maxgen size inc, doing a sweeping gen1 and trigger NGC2")); } else { #ifndef USE_REGIONS settings.demotion = FALSE; #endif //!USE_REGIONS int pol_max = policy_sweep; #ifdef GC_CONFIG_DRIVEN BOOL is_compaction_mandatory = FALSE; #endif //GC_CONFIG_DRIVEN int i; for (i = 0; i < n_heaps; i++) { if (pol_max < g_heaps[i]->gc_policy) pol_max = policy_compact; #ifndef USE_REGIONS // set the demotion flag is any of the heap has demotion if (g_heaps[i]->demotion_high >= g_heaps[i]->demotion_low) { (g_heaps[i]->get_gc_data_per_heap())->set_mechanism_bit (gc_demotion_bit); settings.demotion = TRUE; } #endif //!USE_REGIONS #ifdef GC_CONFIG_DRIVEN if (!is_compaction_mandatory) { int compact_reason = (g_heaps[i]->get_gc_data_per_heap())->get_mechanism (gc_heap_compact); if (compact_reason >= 0) { if (gc_heap_compact_reason_mandatory_p[compact_reason]) is_compaction_mandatory = TRUE; } } #endif //GC_CONFIG_DRIVEN } #ifdef GC_CONFIG_DRIVEN if (!is_compaction_mandatory) { // If compaction is not mandatory we can feel free to change it to a sweeping GC. // Note that we may want to change this to only checking every so often instead of every single GC. if (should_do_sweeping_gc (pol_max >= policy_compact)) { pol_max = policy_sweep; } else { if (pol_max == policy_sweep) pol_max = policy_compact; } } #endif //GC_CONFIG_DRIVEN for (i = 0; i < n_heaps; i++) { if (pol_max > g_heaps[i]->gc_policy) g_heaps[i]->gc_policy = pol_max; #ifndef USE_REGIONS //get the segment while we are serialized if (g_heaps[i]->gc_policy == policy_expand) { g_heaps[i]->new_heap_segment = g_heaps[i]->soh_get_segment_to_expand(); if (!g_heaps[i]->new_heap_segment) { set_expand_in_full_gc (condemned_gen_number); //we are out of memory, cancel the expansion g_heaps[i]->gc_policy = policy_compact; } } #endif //!USE_REGIONS } BOOL is_full_compacting_gc = FALSE; if ((gc_policy >= policy_compact) && (condemned_gen_number == max_generation)) { full_gc_counts[gc_type_compacting]++; is_full_compacting_gc = TRUE; } for (i = 0; i < n_heaps; i++) { //copy the card and brick tables if (g_gc_card_table!= g_heaps[i]->card_table) { g_heaps[i]->copy_brick_card_table(); } if (is_full_compacting_gc) { g_heaps[i]->loh_alloc_since_cg = 0; } } } #ifdef FEATURE_EVENT_TRACE if (informational_event_enabled_p) { gc_time_info[time_sweep] = GetHighPrecisionTimeStamp(); gc_time_info[time_plan] = gc_time_info[time_sweep] - gc_time_info[time_plan]; } #endif //FEATURE_EVENT_TRACE dprintf(3, ("Starting all gc threads after compaction decision")); gc_t_join.restart(); } should_compact = (gc_policy >= policy_compact); should_expand = (gc_policy >= policy_expand); #else //MULTIPLE_HEAPS //safe place to delete large heap segments if (condemned_gen_number == max_generation) { rearrange_uoh_segments (); } if (maxgen_size_inc_p && provisional_mode_triggered && !is_bgc_in_progress()) { pm_trigger_full_gc = true; dprintf (GTC_LOG, ("in PM: maxgen size inc, doing a sweeping gen1 and trigger NGC2")); } else { #ifndef USE_REGIONS // for regions it was already set when we set plan_gen_num for regions. settings.demotion = ((demotion_high >= demotion_low) ? TRUE : FALSE); if (settings.demotion) get_gc_data_per_heap()->set_mechanism_bit (gc_demotion_bit); #endif //!USE_REGIONS #ifdef GC_CONFIG_DRIVEN BOOL is_compaction_mandatory = FALSE; int compact_reason = get_gc_data_per_heap()->get_mechanism (gc_heap_compact); if (compact_reason >= 0) is_compaction_mandatory = gc_heap_compact_reason_mandatory_p[compact_reason]; if (!is_compaction_mandatory) { if (should_do_sweeping_gc (should_compact)) should_compact = FALSE; else should_compact = TRUE; } #endif //GC_CONFIG_DRIVEN if (should_compact && (condemned_gen_number == max_generation)) { full_gc_counts[gc_type_compacting]++; loh_alloc_since_cg = 0; } } #ifdef FEATURE_EVENT_TRACE if (informational_event_enabled_p) { gc_time_info[time_sweep] = GetHighPrecisionTimeStamp(); gc_time_info[time_plan] = gc_time_info[time_sweep] - gc_time_info[time_plan]; } #endif //FEATURE_EVENT_TRACE #endif //MULTIPLE_HEAPS if (!pm_trigger_full_gc && pm_stress_on && provisional_mode_triggered) { if ((settings.condemned_generation == (max_generation - 1)) && ((settings.gc_index % 5) == 0) && !is_bgc_in_progress()) { pm_trigger_full_gc = true; } } if (settings.condemned_generation == (max_generation - 1)) { if (provisional_mode_triggered) { if (should_expand) { should_expand = FALSE; dprintf (GTC_LOG, ("h%d in PM cannot expand", heap_number)); } } if (pm_trigger_full_gc) { should_compact = FALSE; dprintf (GTC_LOG, ("h%d PM doing sweeping", heap_number)); } } if (should_compact) { dprintf (2,( "**** Doing Compacting GC ****")); #ifndef USE_REGIONS if (should_expand) { #ifndef MULTIPLE_HEAPS heap_segment* new_heap_segment = soh_get_segment_to_expand(); #endif //!MULTIPLE_HEAPS if (new_heap_segment) { consing_gen = expand_heap(condemned_gen_number, consing_gen, new_heap_segment); } // If we couldn't get a new segment, or we were able to // reserve one but no space to commit, we couldn't // expand heap. if (ephemeral_heap_segment != new_heap_segment) { set_expand_in_full_gc (condemned_gen_number); should_expand = FALSE; } } #endif //!USE_REGIONS generation_allocation_limit (condemned_gen1) = generation_allocation_pointer (condemned_gen1); if ((condemned_gen_number < max_generation)) { generation_allocator (older_gen)->commit_alloc_list_changes(); // Fix the allocation area of the older generation fix_older_allocation_area (older_gen); #ifdef FEATURE_EVENT_TRACE if (record_fl_info_p) { // For plugs allocated in condemned we kept track of each one but only fire the // event for buckets with non zero items. uint16_t non_zero_buckets = 0; for (uint16_t bucket_index = 0; bucket_index < NUM_GEN2_ALIST; bucket_index++) { if (bucket_info[bucket_index].count != 0) { if (bucket_index != non_zero_buckets) { bucket_info[non_zero_buckets].set (bucket_index, bucket_info[bucket_index].count, bucket_info[bucket_index].size); } else { bucket_info[bucket_index].index = bucket_index; } non_zero_buckets++; } } if (non_zero_buckets) { FIRE_EVENT(GCFitBucketInfo, (uint16_t)etw_bucket_kind::plugs_in_condemned, recorded_fl_info_size, non_zero_buckets, (uint32_t)(sizeof (etw_bucket_info)), (void *)bucket_info); init_bucket_info(); } // We want to get an idea of the sizes of free items in the top 25% of the free list // for gen2 (to be accurate - we stop as soon as the size we count exceeds 25%. This // is just so that if we have a really big free item we will still count that one). // The idea is we want to see if they all in a few big ones or many smaller ones? // To limit the amount of time we spend counting, we stop till we have counted the // top percentage, or exceeded max_etw_item_count items. size_t max_size_to_count = generation_free_list_space (older_gen) / 4; non_zero_buckets = generation_allocator (older_gen)->count_largest_items (bucket_info, max_size_to_count, max_etw_item_count, &recorded_fl_info_size); if (non_zero_buckets) { FIRE_EVENT(GCFitBucketInfo, (uint16_t)etw_bucket_kind::largest_fl_items, recorded_fl_info_size, non_zero_buckets, (uint32_t)(sizeof (etw_bucket_info)), (void *)bucket_info); } } #endif //FEATURE_EVENT_TRACE } #ifndef USE_REGIONS assert (generation_allocation_segment (consing_gen) == ephemeral_heap_segment); #endif //!USE_REGIONS GCToEEInterface::DiagWalkSurvivors(__this, true); relocate_phase (condemned_gen_number, first_condemned_address); compact_phase (condemned_gen_number, first_condemned_address, (!settings.demotion && settings.promotion)); fix_generation_bounds (condemned_gen_number, consing_gen); assert (generation_allocation_limit (youngest_generation) == generation_allocation_pointer (youngest_generation)); #ifndef USE_REGIONS if (condemned_gen_number >= (max_generation -1)) { #ifdef MULTIPLE_HEAPS // this needs be serialized just because we have one // segment_standby_list/seg_table for all heaps. We should make it at least // so that when hoarding is not on we don't need this join because // decommitting memory can take a long time. //must serialize on deleting segments gc_t_join.join(this, gc_join_rearrange_segs_compaction); if (gc_t_join.joined()) #endif //MULTIPLE_HEAPS { #ifdef FEATURE_EVENT_TRACE if (informational_event_enabled_p) { uint64_t current_time = GetHighPrecisionTimeStamp(); gc_time_info[time_compact] = current_time - gc_time_info[time_compact]; } #endif //FEATURE_EVENT_TRACE #ifdef MULTIPLE_HEAPS for (int i = 0; i < n_heaps; i++) { g_heaps[i]->rearrange_heap_segments(TRUE); } #else //MULTIPLE_HEAPS rearrange_heap_segments(TRUE); #endif //MULTIPLE_HEAPS #ifdef MULTIPLE_HEAPS gc_t_join.restart(); #endif //MULTIPLE_HEAPS } if (should_expand) { //fix the start_segment for the ephemeral generations for (int i = 0; i < max_generation; i++) { generation* gen = generation_of (i); generation_start_segment (gen) = ephemeral_heap_segment; generation_allocation_segment (gen) = ephemeral_heap_segment; } } } #endif //!USE_REGIONS { #ifdef FEATURE_PREMORTEM_FINALIZATION finalize_queue->UpdatePromotedGenerations (condemned_gen_number, (!settings.demotion && settings.promotion)); #endif // FEATURE_PREMORTEM_FINALIZATION #ifdef MULTIPLE_HEAPS dprintf(3, ("Joining after end of compaction")); gc_t_join.join(this, gc_join_adjust_handle_age_compact); if (gc_t_join.joined()) { #endif //MULTIPLE_HEAPS #ifdef FEATURE_EVENT_TRACE if (informational_event_enabled_p && (condemned_gen_number < (max_generation -1))) { uint64_t current_time = GetHighPrecisionTimeStamp(); gc_time_info[time_compact] = current_time - gc_time_info[time_compact]; } #endif //FEATURE_EVENT_TRACE #ifdef MULTIPLE_HEAPS //join all threads to make sure they are synchronized dprintf(3, ("Restarting after Promotion granted")); gc_t_join.restart(); } #endif //MULTIPLE_HEAPS ScanContext sc; sc.thread_number = heap_number; sc.promotion = FALSE; sc.concurrent = FALSE; // new generations bounds are set can call this guy if (settings.promotion && !settings.demotion) { dprintf (2, ("Promoting EE roots for gen %d", condemned_gen_number)); GCScan::GcPromotionsGranted(condemned_gen_number, max_generation, &sc); } else if (settings.demotion) { dprintf (2, ("Demoting EE roots for gen %d", condemned_gen_number)); GCScan::GcDemote (condemned_gen_number, max_generation, &sc); } } { reset_pinned_queue_bos(); #ifndef USE_REGIONS unsigned int gen_number = min (max_generation, 1 + condemned_gen_number); generation* gen = generation_of (gen_number); uint8_t* low = generation_allocation_start (generation_of (gen_number-1)); uint8_t* high = heap_segment_allocated (ephemeral_heap_segment); #endif //!USE_REGIONS while (!pinned_plug_que_empty_p()) { mark* m = pinned_plug_of (deque_pinned_plug()); size_t len = pinned_len (m); uint8_t* arr = (pinned_plug (m) - len); dprintf(3,("free [%Ix %Ix[ pin", (size_t)arr, (size_t)arr + len)); if (len != 0) { assert (len >= Align (min_obj_size)); make_unused_array (arr, len); // fix fully contained bricks + first one // if the array goes beyond the first brick size_t start_brick = brick_of (arr); size_t end_brick = brick_of (arr + len); if (end_brick != start_brick) { dprintf (3, ("Fixing bricks [%Ix, %Ix[ to point to unused array %Ix", start_brick, end_brick, (size_t)arr)); set_brick (start_brick, arr - brick_address (start_brick)); size_t brick = start_brick+1; while (brick < end_brick) { set_brick (brick, start_brick - brick); brick++; } } #ifdef USE_REGIONS int gen_number = object_gennum_plan (arr); generation* gen = generation_of (gen_number); #else //when we take an old segment to make the new //ephemeral segment. we can have a bunch of //pinned plugs out of order going to the new ephemeral seg //and then the next plugs go back to max_generation if ((heap_segment_mem (ephemeral_heap_segment) <= arr) && (heap_segment_reserved (ephemeral_heap_segment) > arr)) { while ((low <= arr) && (high > arr)) { gen_number--; assert ((gen_number >= 1) || (demotion_low != MAX_PTR) || settings.demotion || !settings.promotion); dprintf (3, ("new free list generation %d", gen_number)); gen = generation_of (gen_number); if (gen_number >= 1) low = generation_allocation_start (generation_of (gen_number-1)); else low = high; } } else { dprintf (3, ("new free list generation %d", max_generation)); gen_number = max_generation; gen = generation_of (gen_number); } #endif //USE_REGIONS dprintf(3,("h%d threading %Ix (%Id) before pin in gen %d", heap_number, arr, len, gen_number)); thread_gap (arr, len, gen); add_gen_free (gen_number, len); } } } clear_gen1_cards(); } else { //force promotion for sweep settings.promotion = TRUE; settings.compaction = FALSE; #ifdef USE_REGIONS // This should be set for segs too actually. We should always reset demotion // if we sweep. settings.demotion = FALSE; #endif //USE_REGIONS ScanContext sc; sc.thread_number = heap_number; sc.promotion = FALSE; sc.concurrent = FALSE; dprintf (2, ("**** Doing Mark and Sweep GC****")); if ((condemned_gen_number < max_generation)) { #ifdef FREE_USAGE_STATS memcpy (older_gen->gen_free_spaces, r_older_gen_free_space, sizeof (r_older_gen_free_space)); #endif //FREE_USAGE_STATS generation_allocator (older_gen)->copy_from_alloc_list (r_free_list); generation_free_list_space (older_gen) = r_free_list_space; generation_free_obj_space (older_gen) = r_free_obj_space; #ifdef DOUBLY_LINKED_FL if (condemned_gen_number == (max_generation - 1)) { dprintf (2, ("[h%d] no undo, FL %Id-%Id -> %Id, FO %Id+%Id=%Id", heap_number, generation_free_list_space (older_gen), gen2_removed_no_undo, (generation_free_list_space (older_gen) - gen2_removed_no_undo), generation_free_obj_space (older_gen), gen2_removed_no_undo, (generation_free_obj_space (older_gen) + gen2_removed_no_undo))); generation_free_list_space (older_gen) -= gen2_removed_no_undo; generation_free_obj_space (older_gen) += gen2_removed_no_undo; } #endif //DOUBLY_LINKED_FL generation_free_list_allocated (older_gen) = r_older_gen_free_list_allocated; generation_end_seg_allocated (older_gen) = r_older_gen_end_seg_allocated; generation_condemned_allocated (older_gen) = r_older_gen_condemned_allocated; generation_sweep_allocated (older_gen) += dd_survived_size (dynamic_data_of (condemned_gen_number)); generation_allocation_limit (older_gen) = r_allocation_limit; generation_allocation_pointer (older_gen) = r_allocation_pointer; generation_allocation_context_start_region (older_gen) = r_allocation_start_region; generation_allocation_segment (older_gen) = r_allocation_segment; #ifdef USE_REGIONS if (older_gen->gen_num == max_generation) { check_seg_gen_num (r_allocation_segment); } #endif //USE_REGIONS } if ((condemned_gen_number < max_generation)) { // Fix the allocation area of the older generation fix_older_allocation_area (older_gen); } GCToEEInterface::DiagWalkSurvivors(__this, false); make_free_lists (condemned_gen_number); size_t total_recovered_sweep_size = recover_saved_pinned_info(); if (total_recovered_sweep_size > 0) { generation_free_obj_space (generation_of (max_generation)) -= total_recovered_sweep_size; dprintf (2, ("h%d: deduct %Id for pin, fo->%Id", heap_number, total_recovered_sweep_size, generation_free_obj_space (generation_of (max_generation)))); } #ifdef FEATURE_PREMORTEM_FINALIZATION if (!special_sweep_p) { finalize_queue->UpdatePromotedGenerations (condemned_gen_number, TRUE); } #endif // FEATURE_PREMORTEM_FINALIZATION #ifdef MULTIPLE_HEAPS dprintf(3, ("Joining after end of sweep")); gc_t_join.join(this, gc_join_adjust_handle_age_sweep); if (gc_t_join.joined()) #endif //MULTIPLE_HEAPS { #ifdef FEATURE_EVENT_TRACE if (informational_event_enabled_p) { uint64_t current_time = GetHighPrecisionTimeStamp(); gc_time_info[time_sweep] = current_time - gc_time_info[time_sweep]; } #endif //FEATURE_EVENT_TRACE if (!special_sweep_p) { GCScan::GcPromotionsGranted(condemned_gen_number, max_generation, &sc); } #ifndef USE_REGIONS if (condemned_gen_number >= (max_generation -1)) { #ifdef MULTIPLE_HEAPS for (int i = 0; i < n_heaps; i++) { g_heaps[i]->rearrange_heap_segments(FALSE); } #else rearrange_heap_segments(FALSE); #endif //MULTIPLE_HEAPS } #endif //!USE_REGIONS #ifdef MULTIPLE_HEAPS //join all threads to make sure they are synchronized dprintf(3, ("Restarting after Promotion granted")); gc_t_join.restart(); #endif //MULTIPLE_HEAPS } if (!special_sweep_p) { clear_gen1_cards(); } } //verify_partial(); } #ifdef _PREFAST_ #pragma warning(pop) #endif //_PREFAST_ /***************************** Called after compact phase to fix all generation gaps ********************************/ void gc_heap::fix_generation_bounds (int condemned_gen_number, generation* consing_gen) { #ifndef _DEBUG UNREFERENCED_PARAMETER(consing_gen); #endif //_DEBUG int gen_number = condemned_gen_number; dprintf (2, ("---- thread regions gen%d GC ----", gen_number)); #ifdef USE_REGIONS // For ephemeral GCs, we handle up till the generation_allocation_segment as that's the last one we // changed in the older gen. if (settings.promotion && (condemned_gen_number < max_generation)) { int older_gen_number = condemned_gen_number + 1; generation* older_gen = generation_of (older_gen_number); heap_segment* last_alloc_region = generation_allocation_segment (older_gen); dprintf (REGIONS_LOG, ("fix till we see alloc region which is %Ix", heap_segment_mem (last_alloc_region))); heap_segment* region = heap_segment_rw (generation_start_segment (older_gen)); while (region) { heap_segment_allocated (region) = heap_segment_plan_allocated (region); if (region == last_alloc_region) break; region = heap_segment_next (region); } } thread_final_regions (true); ephemeral_heap_segment = generation_start_segment (generation_of (0)); alloc_allocated = heap_segment_allocated (ephemeral_heap_segment); #else //USE_REGIONS assert (generation_allocation_segment (consing_gen) == ephemeral_heap_segment); int bottom_gen = 0; while (gen_number >= bottom_gen) { generation* gen = generation_of (gen_number); dprintf(3,("Fixing generation pointers for %Ix", gen_number)); if ((gen_number < max_generation) && ephemeral_promotion) { size_t saved_eph_start_size = saved_ephemeral_plan_start_size[gen_number]; make_unused_array (saved_ephemeral_plan_start[gen_number], saved_eph_start_size); generation_free_obj_space (generation_of (max_generation)) += saved_eph_start_size; dprintf (2, ("[h%d] EP %Ix(%Id)", heap_number, saved_ephemeral_plan_start[gen_number], saved_ephemeral_plan_start_size[gen_number])); } reset_allocation_pointers (gen, generation_plan_allocation_start (gen)); make_unused_array (generation_allocation_start (gen), generation_plan_allocation_start_size (gen)); dprintf(3,(" start %Ix", (size_t)generation_allocation_start (gen))); gen_number--; } #ifdef MULTIPLE_HEAPS if (ephemeral_promotion) { //we are creating a generation fault. set the cards. // and we are only doing this for multiple heaps because in the single heap scenario the // new ephemeral generations will be empty and there'll be no need to set cards for the // old ephemeral generations that got promoted into max_generation. ptrdiff_t delta = 0; heap_segment* old_ephemeral_seg = seg_mapping_table_segment_of (saved_ephemeral_plan_start[max_generation-1]); assert (in_range_for_segment (saved_ephemeral_plan_start[max_generation-1], old_ephemeral_seg)); size_t end_card = card_of (align_on_card (heap_segment_plan_allocated (old_ephemeral_seg))); size_t card = card_of (saved_ephemeral_plan_start[max_generation-1]); while (card != end_card) { set_card (card); card++; } } #endif //MULTIPLE_HEAPS #endif //!USE_REGIONS { alloc_allocated = heap_segment_plan_allocated(ephemeral_heap_segment); //reset the allocated size #ifdef _DEBUG uint8_t* start = get_soh_start_object (ephemeral_heap_segment, youngest_generation); if (settings.promotion && !settings.demotion) { assert ((start + get_soh_start_obj_len (start)) == heap_segment_plan_allocated(ephemeral_heap_segment)); } #endif //_DEBUG heap_segment_allocated(ephemeral_heap_segment)= heap_segment_plan_allocated(ephemeral_heap_segment); } } #ifndef USE_REGIONS uint8_t* gc_heap::generation_limit (int gen_number) { if (settings.promotion) { if (gen_number <= 1) return heap_segment_reserved (ephemeral_heap_segment); else return generation_allocation_start (generation_of ((gen_number - 2))); } else { if (gen_number <= 0) return heap_segment_reserved (ephemeral_heap_segment); else return generation_allocation_start (generation_of ((gen_number - 1))); } } #endif //!USE_REGIONS BOOL gc_heap::ensure_gap_allocation (int condemned_gen_number) { #ifndef USE_REGIONS uint8_t* start = heap_segment_allocated (ephemeral_heap_segment); size_t size = Align (min_obj_size)*(condemned_gen_number+1); assert ((start + size) <= heap_segment_reserved (ephemeral_heap_segment)); if ((start + size) > heap_segment_committed (ephemeral_heap_segment)) { if (!grow_heap_segment (ephemeral_heap_segment, start + size)) { return FALSE; } } #endif //USE_REGIONS return TRUE; } uint8_t* gc_heap::allocate_at_end (size_t size) { uint8_t* start = heap_segment_allocated (ephemeral_heap_segment); size = Align (size); uint8_t* result = start; // only called to allocate a min obj so can't overflow here. assert ((start + size) <= heap_segment_reserved (ephemeral_heap_segment)); //ensure_gap_allocation took care of it assert ((start + size) <= heap_segment_committed (ephemeral_heap_segment)); heap_segment_allocated (ephemeral_heap_segment) += size; return result; } #ifdef USE_REGIONS // Find the first non empty region and also does the following in the process - // + decommit end of region if it's not a gen0 region; // + set the region gen_num to the new one; // // For empty regions, we always return empty regions to free unless it's a gen // start region. Note that I'm returning gen0 empty regions as well, however, // returning a region to free does not decommit. // // If this is called for a compacting GC, we know we always take the planned generation // on the region (and set the new allocated); else this is called for sweep in which case // it's more complicated - // // + if we are in the special sweep mode, we don't change the old gen number at all // + if we are not in special sweep we need to promote all regions, including the SIP ones // because we make the assumption that this is the case for sweep for handles. heap_segment* gc_heap::find_first_valid_region (heap_segment* region, bool compact_p) { check_seg_gen_num (generation_allocation_segment (generation_of (max_generation))); dprintf (REGIONS_LOG, (" FFVR region %Ix(%Ix), gen%d", (size_t)region, (region ? heap_segment_mem (region) : 0), (region ? heap_segment_gen_num (region) : 0))); if (!region) return 0; heap_segment* current_region = region; do { int gen_num = heap_segment_gen_num (current_region); int plan_gen_num = -1; if (compact_p) { assert (settings.compaction); plan_gen_num = heap_segment_plan_gen_num (current_region); dprintf (REGIONS_LOG, (" gen%d->%d", gen_num, plan_gen_num)); } else { plan_gen_num = (special_sweep_p ? gen_num : get_plan_gen_num (gen_num)); dprintf (REGIONS_LOG, (" gen%d->%d, special_sweep_p %d, swept_in_plan %d", gen_num, plan_gen_num, (int)special_sweep_p, (int)heap_segment_swept_in_plan (current_region))); } uint8_t* allocated = (compact_p ? heap_segment_plan_allocated (current_region) : heap_segment_allocated (current_region)); if (heap_segment_mem (current_region) == allocated) { heap_segment* region_to_delete = current_region; current_region = heap_segment_next (current_region); return_free_region (region_to_delete); dprintf (REGIONS_LOG, (" h%d gen%d return region %Ix to free, current->%Ix(%Ix)", heap_number, gen_num, heap_segment_mem (region_to_delete), current_region, (current_region ? heap_segment_mem (current_region) : 0))); if (!current_region) return 0; } else { if (compact_p) { dprintf (REGIONS_LOG, (" gen%d setting region %Ix alloc %Ix to plan %Ix", gen_num, heap_segment_mem (current_region), heap_segment_allocated (current_region), heap_segment_plan_allocated (current_region))); if (heap_segment_swept_in_plan (current_region)) { assert (heap_segment_allocated (current_region) == heap_segment_plan_allocated (current_region)); } else { heap_segment_allocated (current_region) = heap_segment_plan_allocated (current_region); } } else { // Set this so we keep plan gen and gen the same. set_region_plan_gen_num (current_region, plan_gen_num); } if (gen_num != 0) { dprintf (REGIONS_LOG, (" gen%d decommit end of region %Ix(%Ix)", gen_num, current_region, heap_segment_mem (current_region))); decommit_heap_segment_pages (current_region, 0); } dprintf (REGIONS_LOG, (" set region %Ix(%Ix) gen num to %d", current_region, heap_segment_mem (current_region), plan_gen_num)); set_region_gen_num (current_region, plan_gen_num); break; } } while (current_region); assert (current_region); if (heap_segment_swept_in_plan (current_region)) { int gen_num = heap_segment_gen_num (current_region); dprintf (REGIONS_LOG, ("threading SIP region %Ix surv %Id onto gen%d", heap_segment_mem (current_region), heap_segment_survived (current_region), gen_num)); generation* gen = generation_of (gen_num); generation_allocator (gen)->thread_sip_fl (current_region); generation_free_list_space (gen) += heap_segment_free_list_size (current_region); generation_free_obj_space (gen) += heap_segment_free_obj_size (current_region); } // Take this opportunity to make sure all the regions left with flags only for this GC are reset. heap_segment_swept_in_plan (current_region) = false; current_region->flags &= ~heap_segment_flags_demoted; return current_region; } void gc_heap::thread_final_regions (bool compact_p) { for (int i = 0; i < max_generation; i++) { if (reserved_free_regions_sip[i]) { return_free_region (reserved_free_regions_sip[i]); } } int condemned_gen_number = settings.condemned_generation; generation_region_info generation_final_regions[max_generation + 1]; memset (generation_final_regions, 0, sizeof (generation_final_regions)); // Step 1: we initialize all the regions for generations we are not condemning with their // current head and tail as we know these regions will for sure exist. for (int gen_idx = max_generation; gen_idx > condemned_gen_number; gen_idx--) { generation* gen = generation_of (gen_idx); // Note this needs to be the first rw region as we will not be changing any ro regions and // we will work on thread rw regions here. generation_final_regions[gen_idx].head = heap_segment_rw (generation_start_segment (gen)); generation_final_regions[gen_idx].tail = generation_tail_region (gen); } // Step 2: for each region in the condemned generations, we thread it onto its planned generation // in our generation_final_regions array. for (int gen_idx = condemned_gen_number; gen_idx >= 0; gen_idx--) { heap_segment* current_region = heap_segment_rw (generation_start_segment (generation_of (gen_idx))); dprintf (REGIONS_LOG, ("gen%d start from %Ix", gen_idx, heap_segment_mem (current_region))); while ((current_region = find_first_valid_region (current_region, compact_p))) { assert (!compact_p || (heap_segment_plan_gen_num (current_region) == heap_segment_gen_num (current_region))); int new_gen_num = heap_segment_plan_gen_num (current_region); generation* new_gen = generation_of (new_gen_num); heap_segment* next_region = heap_segment_next (current_region); if (generation_final_regions[new_gen_num].head) { assert (generation_final_regions[new_gen_num].tail); // The new gen already exists, just thread this region onto it. dprintf (REGIONS_LOG, ("gen%d exists, tail region %Ix next -> %Ix", new_gen_num, heap_segment_mem (generation_final_regions[new_gen_num].tail), heap_segment_mem (current_region))); heap_segment_next (generation_final_regions[new_gen_num].tail) = current_region; generation_final_regions[new_gen_num].tail = current_region; } else { generation_final_regions[new_gen_num].head = current_region; generation_final_regions[new_gen_num].tail = current_region; } current_region = next_region; } } // Step 3: all the tail regions' next needs to be set to 0. for (int gen_idx = 0; gen_idx <= max_generation; gen_idx++) { generation* gen = generation_of (gen_idx); if (generation_final_regions[gen_idx].tail) { heap_segment_next (generation_final_regions[gen_idx].tail) = 0; //if (heap_segment_next (generation_final_regions[gen_idx].tail) != 0) //{ // dprintf (REGIONS_LOG, ("tail->next is %Ix", // heap_segment_next (generation_final_regions[gen_idx].tail))); // GCToOSInterface::DebugBreak(); //} } } // Step 4: if a generation doesn't have any regions, we need to get a new one for it; // otherwise we just set the head region as the start region for that generation. for (int gen_idx = 0; gen_idx <= max_generation; gen_idx++) { bool condemned_p = (gen_idx <= condemned_gen_number); assert (condemned_p || generation_final_regions[gen_idx].head); generation* gen = generation_of (gen_idx); heap_segment* start_region = 0; if (generation_final_regions[gen_idx].head) { if (condemned_p) { start_region = generation_final_regions[gen_idx].head; thread_start_region (gen, start_region); } generation_tail_region (gen) = generation_final_regions[gen_idx].tail; dprintf (REGIONS_LOG, ("setting gen%d start %Ix, tail %Ix", gen_idx, heap_segment_mem (heap_segment_rw (generation_start_segment (gen))), heap_segment_mem (generation_tail_region (gen)))); } else { start_region = get_free_region (gen_idx); thread_start_region (gen, start_region); dprintf (REGIONS_LOG, ("creating new gen%d at %Ix", gen_idx, heap_segment_mem (start_region))); } if (condemned_p) { uint8_t* gen_start = heap_segment_mem (start_region); reset_allocation_pointers (gen, gen_start); } } verify_regions (true); } void gc_heap::thread_start_region (generation* gen, heap_segment* region) { heap_segment* prev_region = generation_tail_ro_region (gen); if (prev_region) { heap_segment_next (prev_region) = region; dprintf (REGIONS_LOG,("gen%d tail ro %Ix(%Ix) next -> %Ix(%Ix)", gen->gen_num, (size_t)prev_region, heap_segment_mem (prev_region), (size_t)region, heap_segment_mem (region))); } else { generation_start_segment (gen) = region; dprintf (REGIONS_LOG, ("start region of gen%d -> %Ix(%Ix)", gen->gen_num, (size_t)region, heap_segment_mem (region))); } dprintf (REGIONS_LOG, ("tail region of gen%d -> %Ix(%Ix)", gen->gen_num, (size_t)region, heap_segment_mem (region))); generation_tail_region (gen) = region; } heap_segment* gc_heap::get_new_region (int gen_number, size_t size) { heap_segment* new_region = get_free_region (gen_number, size); if (new_region) { switch (gen_number) { default: assert ((new_region->flags & (heap_segment_flags_loh | heap_segment_flags_poh)) == 0); break; case loh_generation: new_region->flags |= heap_segment_flags_loh; break; case poh_generation: new_region->flags |= heap_segment_flags_poh; break; } generation* gen = generation_of (gen_number); heap_segment_next (generation_tail_region (gen)) = new_region; generation_tail_region (gen) = new_region; verify_regions (gen_number, false); } return new_region; } heap_segment* gc_heap::allocate_new_region (gc_heap* hp, int gen_num, bool uoh_p, size_t size) { uint8_t* start = 0; uint8_t* end = 0; // size parameter should be non-zero only for large regions assert (uoh_p || size == 0); // REGIONS TODO: allocate POH regions on the right bool allocated_p = (uoh_p ? global_region_allocator.allocate_large_region (&start, &end, allocate_forward, size) : global_region_allocator.allocate_basic_region (&start, &end)); if (!allocated_p) { return 0; } heap_segment* res = make_heap_segment (start, (end - start), hp, gen_num); dprintf (REGIONS_LOG, ("got a new region %Ix %Ix->%Ix", (size_t)res, start, end)); return res; } void gc_heap::update_start_tail_regions (generation* gen, heap_segment* region_to_delete, heap_segment* prev_region, heap_segment* next_region) { if (region_to_delete == heap_segment_rw (generation_start_segment (gen))) { assert (!prev_region); heap_segment* tail_ro_region = generation_tail_ro_region (gen); if (tail_ro_region) { heap_segment_next (tail_ro_region) = next_region; dprintf (REGIONS_LOG, ("gen%d tail ro %Ix(%Ix) next updated to %Ix(%Ix)", gen->gen_num, (size_t)tail_ro_region, heap_segment_mem (tail_ro_region), (size_t)next_region, heap_segment_mem (next_region))); } else { generation_start_segment (gen) = next_region; dprintf (REGIONS_LOG, ("start region of gen%d updated to %Ix(%Ix)", gen->gen_num, (size_t)next_region, heap_segment_mem (next_region))); } } if (region_to_delete == generation_tail_region (gen)) { assert (!next_region); generation_tail_region (gen) = prev_region; dprintf (REGIONS_LOG, ("tail region of gen%d updated to %Ix(%Ix)", gen->gen_num, (size_t)prev_region, heap_segment_mem (prev_region))); } verify_regions (false); } // There's one complication with deciding whether we can make a region SIP or not - if the plan_gen_num of // a generation is not maxgen, and if we want to make every region in that generation maxgen, we need to // make sure we can get a new region for this generation so we can guarantee each generation has at least // one region. If we can't get a new region, we need to make sure we leave at least one region in that gen // to guarantee our invariant. // // This new region we get needs to be temporarily recorded instead of being on the free_regions list because // we can't use it for other purposes. inline bool gc_heap::should_sweep_in_plan (heap_segment* region) { bool sip_p = false; int gen_num = get_region_gen_num (region); int new_gen_num = get_plan_gen_num (gen_num); heap_segment_swept_in_plan (region) = false; dprintf (REGIONS_LOG, ("checking if region %Ix should be SIP", heap_segment_mem (region))); #ifdef STRESS_REGIONS // Only do this for testing or it would keep too much swept. if (0) { num_condemned_regions++; if ((num_condemned_regions % sip_seg_interval) == 0) { set_region_plan_gen_num (region, new_gen_num); sip_p = true; } if ((num_condemned_regions % sip_seg_maxgen_interval) == 0) { set_region_plan_gen_num (region, max_generation); sip_maxgen_regions_per_gen[gen_num]++; sip_p = true; } } else #endif //STRESS_REGIONS { size_t basic_region_size = (size_t)1 << min_segment_size_shr; assert (heap_segment_gen_num (region) == heap_segment_plan_gen_num (region)); int surv_ratio = (int)(((double)heap_segment_survived (region) * 100.0) / (double)basic_region_size); dprintf (2222, ("SSIP: region %Ix surv %Id / %Id = %d%%(%d)", heap_segment_mem (region), heap_segment_survived (region), basic_region_size, surv_ratio, sip_surv_ratio_th)); if (surv_ratio >= sip_surv_ratio_th) { set_region_plan_gen_num (region, new_gen_num); sip_p = true; } if (new_gen_num < max_generation) { int old_card_surv_ratio = (int)(((double)heap_segment_old_card_survived (region) * 100.0) / (double)basic_region_size); dprintf (2222, ("SSIP: region %Ix old card surv %Id / %Id = %d%%(%d)", heap_segment_mem (region), heap_segment_old_card_survived (region), basic_region_size, old_card_surv_ratio, sip_surv_ratio_th)); if (old_card_surv_ratio >= sip_old_card_surv_ratio_th) { set_region_plan_gen_num (region, max_generation); sip_maxgen_regions_per_gen[gen_num]++; sip_p = true; } } } if (sip_p) { num_sip_regions++; if ((new_gen_num < max_generation) && (sip_maxgen_regions_per_gen[gen_num] == regions_per_gen[gen_num])) { assert (get_region_gen_num (region) == 0); assert (new_gen_num < max_generation); heap_segment* reserved_free_region = get_free_region (gen_num); if (reserved_free_region) { dprintf (REGIONS_LOG, ("all regions in gen%d -> SIP 2, get a new region for it %Ix", gen_num, heap_segment_mem (reserved_free_region))); reserved_free_regions_sip[gen_num] = reserved_free_region; } else { // If we cannot get another region, simply revert our decision. sip_maxgen_regions_per_gen[gen_num]--; set_region_plan_gen_num (region, new_gen_num); } } } dprintf (REGIONS_LOG, ("region %Ix %s SIP", heap_segment_mem (region), (sip_p ? "is" : "is not"))); return sip_p; } void heap_segment::thread_free_obj (uint8_t* obj, size_t s) { //dprintf (REGIONS_LOG, ("threading SIP free obj %Ix-%Ix(%Id)", obj, (obj + s), s)); if (s >= min_free_list) { free_list_slot (obj) = 0; if (free_list_head) { assert (free_list_tail); free_list_slot (free_list_tail) = obj; } else { free_list_head = obj; } free_list_tail = obj; free_list_size += s; } else { free_obj_size += s; } } // For a region that we sweep in plan, we need to do the following - // // + set the swept_in_plan_p for this region. // + update allocated for this region. // + build bricks. // + build free objects. We keep a list of them which will then be threaded onto the appropriate generation's // free list. This can be optimized, both gen0 and gen2 GCs are easy to handle - need to see how easy it is // to handle gen1 GCs as the commit/repair there is complicated. // // in plan_phase we also need to make sure to not call update_brick_table when handling end of this region, // and the plan gen num is set accordingly. void gc_heap::sweep_region_in_plan (heap_segment* region, BOOL use_mark_list, uint8_t**& mark_list_next, uint8_t** mark_list_index) { heap_segment_swept_in_plan (region) = true; region->init_free_list(); uint8_t* x = heap_segment_mem (region); uint8_t* last_marked_obj_start = 0; uint8_t* last_marked_obj_end = 0; uint8_t* end = heap_segment_allocated (region); dprintf (2222, ("h%d region %Ix->%Ix SIP, gen %d->%d, %s mark list(%Ix->%Ix, %Ix->%Ix)", heap_number, x, end, heap_segment_gen_num (region), heap_segment_plan_gen_num (region), (use_mark_list ? "using" : "not using"), (uint8_t*)mark_list_next, (mark_list_next ? *mark_list_next : 0), (uint8_t*)mark_list_index, (mark_list_index ? *mark_list_index : 0))); #ifdef _DEBUG size_t survived = 0; uint8_t* saved_last_unmarked_obj_start = 0; uint8_t* saved_last_unmarked_obj_end = 0; size_t saved_obj_brick = 0; size_t saved_next_obj_brick = 0; #endif //_DEBUG while (x < end) { uint8_t* obj = x; size_t obj_brick = (size_t)obj / brick_size; uint8_t* next_obj = 0; if (marked (obj)) { if (pinned(obj)) { clear_pinned (obj); } clear_marked (obj); size_t s = size (obj); next_obj = obj + Align (s); last_marked_obj_start = obj; last_marked_obj_end = next_obj; #ifdef _DEBUG survived += s; #endif //_DEBUG dprintf (4444, ("M: %Ix-%Ix(%Id)", obj, next_obj, s)); } else { next_obj = find_next_marked (x, end, use_mark_list, mark_list_next, mark_list_index); #ifdef _DEBUG saved_last_unmarked_obj_start = obj; saved_last_unmarked_obj_end = next_obj; #endif //_DEBUG if ((next_obj > obj) && (next_obj != end)) { size_t free_obj_size = next_obj - obj; make_unused_array (obj, free_obj_size); region->thread_free_obj (obj, free_obj_size); dprintf (4444, ("UM threading: %Ix-%Ix(%Id)", obj, next_obj, (next_obj - obj))); } } size_t next_obj_brick = (size_t)next_obj / brick_size; #ifdef _DEBUG saved_obj_brick = obj_brick; saved_next_obj_brick = next_obj_brick; #endif //_DEBUG if (next_obj_brick != obj_brick) { fix_brick_to_highest (obj, next_obj); } x = next_obj; } if (last_marked_obj_start) { // We only need to make sure we fix the brick the last marked object's end is in. // Note this brick could have been fixed already. size_t last_marked_obj_start_b = brick_of (last_marked_obj_start); size_t last_marked_obj_end_b = brick_of (last_marked_obj_end - 1); dprintf (REGIONS_LOG, ("last live obj %Ix(%Ix)-%Ix, fixing its brick(s) %Ix-%Ix", last_marked_obj_start, method_table (last_marked_obj_start), last_marked_obj_end, last_marked_obj_start_b, last_marked_obj_end_b)); if (last_marked_obj_start_b == last_marked_obj_end_b) { set_brick (last_marked_obj_start_b, (last_marked_obj_start - brick_address (last_marked_obj_start_b))); } else { set_brick (last_marked_obj_end_b, (last_marked_obj_start_b - last_marked_obj_end_b)); } } else { last_marked_obj_end = heap_segment_mem (region); } #ifdef _DEBUG size_t region_index = get_basic_region_index_for_address (heap_segment_mem (region)); dprintf (REGIONS_LOG, ("region #%d %Ix survived %Id, %s recorded %Id", region_index, heap_segment_mem (region), survived, ((survived == heap_segment_survived (region)) ? "same as" : "diff from"), heap_segment_survived (region))); #ifdef MULTIPLE_HEAPS assert (survived <= (size_t)heap_segment_survived (region)); #else assert (survived == (size_t)heap_segment_survived (region)); #endif //MULTIPLE_HEAPS #endif //_DEBUG assert (last_marked_obj_end); heap_segment_saved_allocated (region) = heap_segment_allocated (region); heap_segment_allocated (region) = last_marked_obj_end; heap_segment_plan_allocated (region) = heap_segment_allocated (region); } inline void gc_heap::check_demotion_helper_sip (uint8_t** pval, int parent_gen_num, uint8_t* parent_loc) { uint8_t* child_object = *pval; if (!child_object) return; int child_object_plan_gen = get_region_plan_gen_num (child_object); if (child_object_plan_gen < parent_gen_num) { set_card (card_of (parent_loc)); } dprintf (3, ("SCS %d, %d", child_object_plan_gen, parent_gen_num)); } heap_segment* gc_heap::relocate_advance_to_non_sip (heap_segment* region) { THREAD_FROM_HEAP; heap_segment* current_region = region; dprintf (REGIONS_LOG, ("Relocate searching for next non SIP, starting from %Ix", (region ? heap_segment_mem (region) : 0))); while (current_region) { if (heap_segment_swept_in_plan (current_region)) { int gen_num = heap_segment_gen_num (current_region); int plan_gen_num = heap_segment_plan_gen_num (current_region); bool use_sip_demotion = (plan_gen_num > get_plan_gen_num (gen_num)); dprintf (REGIONS_LOG, ("region %Ix is SIP, relocating, gen %d, plan gen: %d(supposed to be %d) %s", heap_segment_mem (current_region), gen_num, plan_gen_num, get_plan_gen_num (gen_num), (use_sip_demotion ? "Sd" : "d"))); uint8_t* x = heap_segment_mem (current_region); uint8_t* end = heap_segment_allocated (current_region); // For SIP regions, we go linearly in the region and relocate each object's references. while (x < end) { size_t s = size (x); assert (s > 0); uint8_t* next_obj = x + Align (s); Prefetch (next_obj); if (!(((CObjectHeader*)x)->IsFree())) { //relocate_obj_helper (x, s); if (contain_pointers (x)) { dprintf (3, ("$%Ix$", (size_t)x)); go_through_object_nostart (method_table(x), x, s, pval, { uint8_t* child = *pval; //reloc_survivor_helper (pval); relocate_address (pval THREAD_NUMBER_ARG); if (use_sip_demotion) check_demotion_helper_sip (pval, plan_gen_num, (uint8_t*)pval); else check_demotion_helper (pval, (uint8_t*)pval); if (child) { dprintf (4444, ("SIP %Ix(%Ix)->%Ix->%Ix(%Ix)", x, (uint8_t*)pval, child, *pval, method_table (child))); } }); } check_class_object_demotion (x); } x = next_obj; } } else { int gen_num = heap_segment_gen_num (current_region); int plan_gen_num = heap_segment_plan_gen_num (current_region); dprintf (REGIONS_LOG, ("region %Ix is not SIP, relocating, gen %d, plan gen: %d", heap_segment_mem (current_region), gen_num, plan_gen_num)); return current_region; } current_region = heap_segment_next (current_region); } return 0; } #ifdef STRESS_REGIONS void gc_heap::pin_by_gc (uint8_t* object) { heap_segment* region = region_of (object); HndAssignHandleGC(pinning_handles_for_alloc[ph_index_per_heap], object); dprintf (REGIONS_LOG, ("h%d pinning object at %Ix on eph seg %Ix (ph#%d)", heap_number, object, heap_segment_mem (region), ph_index_per_heap)); ph_index_per_heap++; if (ph_index_per_heap == PINNING_HANDLE_INITIAL_LENGTH) { ph_index_per_heap = 0; } } #endif //STRESS_REGIONS #endif //USE_REGIONS void gc_heap::make_free_lists (int condemned_gen_number) { //Promotion has to happen in sweep case. assert (settings.promotion); make_free_args args; int stop_gen_idx = get_stop_generation_index (condemned_gen_number); for (int i = condemned_gen_number; i >= stop_gen_idx; i--) { generation* condemned_gen = generation_of (i); heap_segment* current_heap_segment = get_start_segment (condemned_gen); #ifdef USE_REGIONS if (!current_heap_segment) continue; #endif //USE_REGIONS uint8_t* start_address = get_soh_start_object (current_heap_segment, condemned_gen); size_t current_brick = brick_of (start_address); PREFIX_ASSUME(current_heap_segment != NULL); uint8_t* end_address = heap_segment_allocated (current_heap_segment); size_t end_brick = brick_of (end_address-1); int current_gen_num = i; args.free_list_gen_number = (special_sweep_p ? current_gen_num : get_plan_gen_num (current_gen_num)); args.free_list_gen = generation_of (args.free_list_gen_number); args.highest_plug = 0; #ifdef USE_REGIONS dprintf (REGIONS_LOG, ("starting at gen%d %Ix -> %Ix", i, start_address, end_address)); #else assert (!special_sweep_p); args.current_gen_limit = (((current_gen_num == max_generation)) ? MAX_PTR : (generation_limit (args.free_list_gen_number))); #endif //USE_REGIONS #ifndef USE_REGIONS if ((start_address >= end_address) && (condemned_gen_number < max_generation)) { break; } #endif //!USE_REGIONS while (1) { if ((current_brick > end_brick)) { #ifndef USE_REGIONS if (args.current_gen_limit == MAX_PTR) { //We had an empty segment //need to allocate the generation start generation* gen = generation_of (max_generation); heap_segment* start_seg = heap_segment_rw (generation_start_segment (gen)); PREFIX_ASSUME(start_seg != NULL); uint8_t* gap = heap_segment_mem (start_seg); generation_allocation_start (gen) = gap; heap_segment_allocated (start_seg) = gap + Align (min_obj_size); make_unused_array (gap, Align (min_obj_size)); reset_allocation_pointers (gen, gap); dprintf (3, ("Start segment empty, fixing generation start of %d to: %Ix", max_generation, (size_t)gap)); args.current_gen_limit = generation_limit (args.free_list_gen_number); } #endif //!USE_REGIONS if (heap_segment_next_non_sip (current_heap_segment)) { current_heap_segment = heap_segment_next_non_sip (current_heap_segment); } else { break; } current_brick = brick_of (heap_segment_mem (current_heap_segment)); end_brick = brick_of (heap_segment_allocated (current_heap_segment)-1); continue; } { int brick_entry = brick_table [ current_brick ]; if ((brick_entry >= 0)) { make_free_list_in_brick (brick_address (current_brick) + brick_entry-1, &args); dprintf(3,("Fixing brick entry %Ix to %Ix", current_brick, (size_t)args.highest_plug)); set_brick (current_brick, (args.highest_plug - brick_address (current_brick))); } else { if ((brick_entry > -32768)) { #ifdef _DEBUG ptrdiff_t offset = brick_of (args.highest_plug) - current_brick; if ((brick_entry != -32767) && (! ((offset == brick_entry)))) { assert ((brick_entry == -1)); } #endif //_DEBUG //init to -1 for faster find_first_object set_brick (current_brick, -1); } } } current_brick++; } } { #ifdef USE_REGIONS check_seg_gen_num (generation_allocation_segment (generation_of (max_generation))); thread_final_regions (false); generation* gen_gen0 = generation_of (0); ephemeral_heap_segment = generation_start_segment (gen_gen0); alloc_allocated = heap_segment_allocated (ephemeral_heap_segment); // Since we didn't compact, we should recalculate the end_gen0_region_space. end_gen0_region_space = get_gen0_end_space(); #else //USE_REGIONS int bottom_gen = 0; args.free_list_gen_number--; while (args.free_list_gen_number >= bottom_gen) { uint8_t* gap = 0; generation* gen2 = generation_of (args.free_list_gen_number); gap = allocate_at_end (Align(min_obj_size)); generation_allocation_start (gen2) = gap; reset_allocation_pointers (gen2, gap); dprintf(3,("Fixing generation start of %d to: %Ix", args.free_list_gen_number, (size_t)gap)); PREFIX_ASSUME(gap != NULL); make_unused_array (gap, Align (min_obj_size)); args.free_list_gen_number--; } //reset the allocated size uint8_t* start2 = generation_allocation_start (youngest_generation); alloc_allocated = start2 + Align (size (start2)); #endif //USE_REGIONS } } void gc_heap::make_free_list_in_brick (uint8_t* tree, make_free_args* args) { assert ((tree != NULL)); { int right_node = node_right_child (tree); int left_node = node_left_child (tree); args->highest_plug = 0; if (! (0 == tree)) { if (! (0 == left_node)) { make_free_list_in_brick (tree + left_node, args); } { uint8_t* plug = tree; size_t gap_size = node_gap_size (tree); uint8_t* gap = (plug - gap_size); args->highest_plug = tree; dprintf (3,("plug: %Ix (highest p: %Ix), free %Ix len %Id in %d", plug, args->highest_plug, (size_t)gap, gap_size, args->free_list_gen_number)); #ifdef SHORT_PLUGS if (is_plug_padded (plug)) { dprintf (3, ("%Ix padded", plug)); clear_plug_padded (plug); } #endif //SHORT_PLUGS #ifdef DOUBLY_LINKED_FL // These 2 checks should really just be merged into one. if (is_plug_bgc_mark_bit_set (plug)) { dprintf (3333, ("cbgcm: %Ix", plug)); clear_plug_bgc_mark_bit (plug); } if (is_free_obj_in_compact_bit_set (plug)) { dprintf (3333, ("cfoc: %Ix", plug)); clear_free_obj_in_compact_bit (plug); } #endif //DOUBLY_LINKED_FL #ifndef USE_REGIONS gen_crossing: { if ((args->current_gen_limit == MAX_PTR) || ((plug >= args->current_gen_limit) && ephemeral_pointer_p (plug))) { dprintf(3,(" Crossing Generation boundary at %Ix", (size_t)args->current_gen_limit)); if (!(args->current_gen_limit == MAX_PTR)) { args->free_list_gen_number--; args->free_list_gen = generation_of (args->free_list_gen_number); } dprintf(3,( " Fixing generation start of %d to: %Ix", args->free_list_gen_number, (size_t)gap)); reset_allocation_pointers (args->free_list_gen, gap); args->current_gen_limit = generation_limit (args->free_list_gen_number); if ((gap_size >= (2*Align (min_obj_size)))) { dprintf(3,(" Splitting the gap in two %Id left", gap_size)); make_unused_array (gap, Align(min_obj_size)); gap_size = (gap_size - Align(min_obj_size)); gap = (gap + Align(min_obj_size)); } else { make_unused_array (gap, gap_size); gap_size = 0; } goto gen_crossing; } } #endif //!USE_REGIONS thread_gap (gap, gap_size, args->free_list_gen); add_gen_free (args->free_list_gen->gen_num, gap_size); } if (! (0 == right_node)) { make_free_list_in_brick (tree + right_node, args); } } } } void gc_heap::thread_gap (uint8_t* gap_start, size_t size, generation* gen) { #ifndef USE_REGIONS assert (generation_allocation_start (gen)); #endif if ((size > 0)) { #ifndef USE_REGIONS assert ((heap_segment_rw (generation_start_segment (gen)) != ephemeral_heap_segment) || (gap_start > generation_allocation_start (gen))); #endif //USE_REGIONS // The beginning of a segment gap is not aligned assert (size >= Align (min_obj_size)); make_unused_array (gap_start, size, (!settings.concurrent && (gen != youngest_generation)), (gen->gen_num == max_generation)); dprintf (3, ("fr: [%Ix, %Ix[", (size_t)gap_start, (size_t)gap_start+size)); if ((size >= min_free_list)) { generation_free_list_space (gen) += size; generation_allocator (gen)->thread_item (gap_start, size); } else { generation_free_obj_space (gen) += size; } } } void gc_heap::uoh_thread_gap_front (uint8_t* gap_start, size_t size, generation* gen) { #ifndef USE_REGIONS assert (generation_allocation_start (gen)); #endif if (size >= min_free_list) { generation_free_list_space (gen) += size; generation_allocator (gen)->thread_item_front (gap_start, size); } } void gc_heap::make_unused_array (uint8_t* x, size_t size, BOOL clearp, BOOL resetp) { dprintf (3, (ThreadStressLog::gcMakeUnusedArrayMsg(), (size_t)x, (size_t)(x+size))); assert (size >= Align (min_obj_size)); //#if defined (VERIFY_HEAP) && defined (BACKGROUND_GC) // check_batch_mark_array_bits (x, x+size); //#endif //VERIFY_HEAP && BACKGROUND_GC if (resetp) { #ifdef BGC_SERVO_TUNING // Don't do this for servo tuning because it makes it even harder to regulate WS. if (!(bgc_tuning::enable_fl_tuning && bgc_tuning::fl_tuning_triggered)) #endif //BGC_SERVO_TUNING { reset_memory (x, size); } } ((CObjectHeader*)x)->SetFree(size); #ifdef HOST_64BIT #if BIGENDIAN #error "This won't work on big endian platforms" #endif size_t size_as_object = (uint32_t)(size - free_object_base_size) + free_object_base_size; if (size_as_object < size) { // // If the size is more than 4GB, we need to create multiple objects because of // the Array::m_NumComponents is uint32_t and the high 32 bits of unused array // size is ignored in regular object size computation. // uint8_t * tmp = x + size_as_object; size_t remaining_size = size - size_as_object; while (remaining_size > UINT32_MAX) { // Make sure that there will be at least Align(min_obj_size) left size_t current_size = UINT32_MAX - get_alignment_constant (FALSE) - Align (min_obj_size, get_alignment_constant (FALSE)); ((CObjectHeader*)tmp)->SetFree(current_size); remaining_size -= current_size; tmp += current_size; } ((CObjectHeader*)tmp)->SetFree(remaining_size); } #endif if (clearp) clear_card_for_addresses (x, x + Align(size)); } // Clear memory set by make_unused_array. void gc_heap::clear_unused_array (uint8_t* x, size_t size) { // Also clear the sync block *(((PTR_PTR)x)-1) = 0; ((CObjectHeader*)x)->UnsetFree(); #ifdef HOST_64BIT #if BIGENDIAN #error "This won't work on big endian platforms" #endif // The memory could have been cleared in the meantime. We have to mirror the algorithm // from make_unused_array since we cannot depend on the object sizes in memory. size_t size_as_object = (uint32_t)(size - free_object_base_size) + free_object_base_size; if (size_as_object < size) { uint8_t * tmp = x + size_as_object; size_t remaining_size = size - size_as_object; while (remaining_size > UINT32_MAX) { size_t current_size = UINT32_MAX - get_alignment_constant (FALSE) - Align (min_obj_size, get_alignment_constant (FALSE)); ((CObjectHeader*)tmp)->UnsetFree(); remaining_size -= current_size; tmp += current_size; } ((CObjectHeader*)tmp)->UnsetFree(); } #else UNREFERENCED_PARAMETER(size); #endif } inline uint8_t* tree_search (uint8_t* tree, uint8_t* old_address) { uint8_t* candidate = 0; int cn; while (1) { if (tree < old_address) { if ((cn = node_right_child (tree)) != 0) { assert (candidate < tree); candidate = tree; tree = tree + cn; Prefetch (tree - 8); continue; } else break; } else if (tree > old_address) { if ((cn = node_left_child (tree)) != 0) { tree = tree + cn; Prefetch (tree - 8); continue; } else break; } else break; } if (tree <= old_address) return tree; else if (candidate) return candidate; else return tree; } #ifdef FEATURE_BASICFREEZE bool gc_heap::frozen_object_p (Object* obj) { heap_segment* seg = seg_mapping_table_segment_of ((uint8_t*)obj); return heap_segment_read_only_p (seg); } #endif // FEATURE_BASICFREEZE void gc_heap::relocate_address (uint8_t** pold_address THREAD_NUMBER_DCL) { uint8_t* old_address = *pold_address; #ifdef USE_REGIONS if (!old_address || !should_check_brick_for_reloc (old_address)) { return; } #else //USE_REGIONS if (!((old_address >= gc_low) && (old_address < gc_high))) #ifdef MULTIPLE_HEAPS { UNREFERENCED_PARAMETER(thread); if (old_address == 0) return; gc_heap* hp = heap_of (old_address); if ((hp == this) || !((old_address >= hp->gc_low) && (old_address < hp->gc_high))) return; } #else //MULTIPLE_HEAPS return ; #endif //MULTIPLE_HEAPS #endif //USE_REGIONS // delta translates old_address into address_gc (old_address); size_t brick = brick_of (old_address); int brick_entry = brick_table [ brick ]; uint8_t* new_address = old_address; if (! ((brick_entry == 0))) { retry: { while (brick_entry < 0) { brick = (brick + brick_entry); brick_entry = brick_table [ brick ]; } uint8_t* old_loc = old_address; uint8_t* node = tree_search ((brick_address (brick) + brick_entry-1), old_loc); if ((node <= old_loc)) new_address = (old_address + node_relocation_distance (node)); else { if (node_left_p (node)) { dprintf(3,(" L: %Ix", (size_t)node)); new_address = (old_address + (node_relocation_distance (node) + node_gap_size (node))); } else { brick = brick - 1; brick_entry = brick_table [ brick ]; goto retry; } } } dprintf (4, (ThreadStressLog::gcRelocateReferenceMsg(), pold_address, old_address, new_address)); *pold_address = new_address; return; } #ifdef FEATURE_LOH_COMPACTION if (settings.loh_compaction) { heap_segment* pSegment = seg_mapping_table_segment_of ((uint8_t*)old_address); #ifdef USE_REGIONS // pSegment could be 0 for regions, see comment for is_in_condemned. if (!pSegment) { return; } #endif //USE_REGIONS #ifdef MULTIPLE_HEAPS if (heap_segment_heap (pSegment)->loh_compacted_p) #else if (loh_compacted_p) #endif { size_t flags = pSegment->flags; if ((flags & heap_segment_flags_loh) #ifdef FEATURE_BASICFREEZE && !(flags & heap_segment_flags_readonly) #endif ) { new_address = old_address + loh_node_relocation_distance (old_address); dprintf (4, (ThreadStressLog::gcRelocateRefe