// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // // #Overview // // GC automatically manages memory allocated by managed code. // The design doc for GC can be found at docs/design/coreclr/botr/garbage-collection.md // // This file includes both the code for GC and the allocator. The most common // case for a GC to be triggered is from the allocator code. See // code:#try_allocate_more_space where it calls GarbageCollectGeneration. // // Entry points for the allocator are GCHeap::Alloc* which are called by the // allocation helpers in gcscan.cpp // #include "common.h" #include "gcenv.h" #include "gc.h" #include "gcscan.h" #include "gcdesc.h" #include "softwarewritewatch.h" #include "handletable.h" #include "handletable.inl" #include "gcenv.inl" #include "gceventstatus.h" #include // If FEATURE_INTERPRETER is set, always enable the GC side of FEATURE_CONSERVATIVE_GC #ifdef FEATURE_INTERPRETER #ifndef FEATURE_CONSERVATIVE_GC #define FEATURE_CONSERVATIVE_GC #endif #endif // FEATURE_INTERPRETER #ifdef __INTELLISENSE__ #if defined(FEATURE_SVR_GC) #define SERVER_GC 1 #else // defined(FEATURE_SVR_GC) #ifdef SERVER_GC #undef SERVER_GC #endif #endif // defined(FEATURE_SVR_GC) #endif // __INTELLISENSE__ #if defined(TARGET_AMD64) || defined(TARGET_ARM64) #include "vxsort/do_vxsort.h" #define USE_VXSORT #else #define USE_INTROSORT #endif // TARGET_AMD64 || TARGET_ARM64 #include "introsort.h" #ifdef SERVER_GC namespace SVR { #else // SERVER_GC namespace WKS { #endif // SERVER_GC #include "gcimpl.h" #include "gcpriv.h" #ifdef DACCESS_COMPILE #error this source file should not be compiled with DACCESS_COMPILE! #endif //DACCESS_COMPILE // We just needed a simple random number generator for testing. class gc_rand { public: static uint64_t x; static uint64_t get_rand() { x = (314159269*x+278281) & 0x7FFFFFFF; return x; } // obtain random number in the range 0 .. r-1 static uint64_t get_rand(uint64_t r) { // require r >= 0 uint64_t x = (uint64_t)((get_rand() * r) >> 31); return x; } }; uint64_t gc_rand::x = 0; #if defined(BACKGROUND_GC) && defined(FEATURE_EVENT_TRACE) BOOL bgc_heap_walk_for_etw_p = FALSE; #endif //BACKGROUND_GC && FEATURE_EVENT_TRACE #define MAX_PTR ((uint8_t*)(~(ptrdiff_t)0)) #define commit_min_th (16*OS_PAGE_SIZE) #define MIN_SOH_CROSS_GEN_REFS (400) #define MIN_LOH_CROSS_GEN_REFS (800) #ifdef SERVER_GC #define partial_size_th 100 #define num_partial_refs 64 #else //SERVER_GC #define partial_size_th 100 #define num_partial_refs 32 #endif //SERVER_GC #define demotion_plug_len_th (6*1024*1024) #ifdef USE_REGIONS // If the survived / region_size is 90+%, we don't compact this region. #define sip_surv_ratio_th (90) // If the survived due to cards from old generations / region_size is 90+%, // we don't compact this region, also we immediately promote it to gen2. #define sip_old_card_surv_ratio_th (90) #endif //USE_REGIONS #ifdef HOST_64BIT #define MARK_STACK_INITIAL_LENGTH 1024 #else #define MARK_STACK_INITIAL_LENGTH 128 #endif // HOST_64BIT #define LOH_PIN_QUEUE_LENGTH 100 #define LOH_PIN_DECAY 10 #define UOH_ALLOCATION_RETRY_MAX_COUNT 2 #define MAX_YP_SPIN_COUNT_UNIT 32768 uint32_t yp_spin_count_unit = 0; uint32_t original_spin_count_unit = 0; size_t loh_size_threshold = LARGE_OBJECT_SIZE; #ifdef GC_CONFIG_DRIVEN int compact_ratio = 0; #endif //GC_CONFIG_DRIVEN #ifdef FEATURE_SVR_GC bool g_built_with_svr_gc = true; #else bool g_built_with_svr_gc = false; #endif // FEATURE_SVR_GC #if defined(BUILDENV_DEBUG) uint8_t g_build_variant = 0; #elif defined(BUILDENV_CHECKED) uint8_t g_build_variant = 1; #else uint8_t g_build_variant = 2; #endif //BUILDENV_DEBUG VOLATILE(int32_t) g_no_gc_lock = -1; #ifdef TRACE_GC const char * const allocation_state_str[] = { "start", "can_allocate", "cant_allocate", "retry_allocate", "try_fit", "try_fit_new_seg", "try_fit_after_cg", "try_fit_after_bgc", "try_free_full_seg_in_bgc", "try_free_after_bgc", "try_seg_end", "acquire_seg", "acquire_seg_after_cg", "acquire_seg_after_bgc", "check_and_wait_for_bgc", "trigger_full_compact_gc", "trigger_ephemeral_gc", "trigger_2nd_ephemeral_gc", "check_retry_seg" }; const char * const msl_take_state_str[] = { "get_large_seg", "bgc_loh_sweep", "wait_bgc", "block_gc", "clr_mem", "clr_large_mem", "t_eph_gc", "t_full_gc", "alloc_small", "alloc_large", "alloc_small_cant", "alloc_large_cant", "try_alloc", "try_budget" }; #endif //TRACE_GC // Keep this in sync with the definition of gc_reason #if (defined(DT_LOG) || defined(TRACE_GC)) static const char* const str_gc_reasons[] = { "alloc_soh", "induced", "lowmem", "empty", "alloc_loh", "oos_soh", "oos_loh", "induced_noforce", "gcstress", "induced_lowmem", "induced_compacting", "lowmemory_host", "pm_full_gc", "lowmemory_host_blocking" }; static const char* const str_gc_pause_modes[] = { "batch", "interactive", "low_latency", "sustained_low_latency", "no_gc" }; static const char* const str_root_kinds[] = { "Stack", "FinalizeQueue", "Handles", "OlderGen", "SizedRef", "Overflow", "DependentHandles", "NewFQ", "Steal", "BGC" }; #endif //DT_LOG || TRACE_GC inline BOOL is_induced (gc_reason reason) { return ((reason == reason_induced) || (reason == reason_induced_noforce) || (reason == reason_lowmemory) || (reason == reason_lowmemory_blocking) || (reason == reason_induced_compacting) || (reason == reason_induced_aggressive) || (reason == reason_lowmemory_host) || (reason == reason_lowmemory_host_blocking)); } gc_oh_num gen_to_oh(int gen) { switch (gen) { case soh_gen0: return gc_oh_num::soh; case soh_gen1: return gc_oh_num::soh; case soh_gen2: return gc_oh_num::soh; case loh_generation: return gc_oh_num::loh; case poh_generation: return gc_oh_num::poh; default: assert(false); return gc_oh_num::unknown; } } uint64_t qpf; double qpf_ms; double qpf_us; uint64_t RawGetHighPrecisionTimeStamp() { return (uint64_t)GCToOSInterface::QueryPerformanceCounter(); } #ifdef BGC_SERVO_TUNING bool gc_heap::bgc_tuning::enable_fl_tuning = false; uint32_t gc_heap::bgc_tuning::memory_load_goal = 0; uint32_t gc_heap::bgc_tuning::memory_load_goal_slack = 0; uint64_t gc_heap::bgc_tuning::available_memory_goal = 0; bool gc_heap::bgc_tuning::panic_activated_p = false; double gc_heap::bgc_tuning::accu_error_panic = 0.0; double gc_heap::bgc_tuning::above_goal_kp = 0.0; double gc_heap::bgc_tuning::above_goal_ki = 0.0; bool gc_heap::bgc_tuning::enable_kd = false; bool gc_heap::bgc_tuning::enable_ki = false; bool gc_heap::bgc_tuning::enable_smooth = false; bool gc_heap::bgc_tuning::enable_tbh = false; bool gc_heap::bgc_tuning::enable_ff = false; bool gc_heap::bgc_tuning::enable_gradual_d = false; double gc_heap::bgc_tuning::above_goal_kd = 0.0; double gc_heap::bgc_tuning::above_goal_ff = 0.0; double gc_heap::bgc_tuning::num_gen1s_smooth_factor = 0.0; double gc_heap::bgc_tuning::ml_kp = 0.0; double gc_heap::bgc_tuning::ml_ki = 0.0; double gc_heap::bgc_tuning::accu_error = 0.0; bool gc_heap::bgc_tuning::fl_tuning_triggered = false; size_t gc_heap::bgc_tuning::num_bgcs_since_tuning_trigger = 0; bool gc_heap::bgc_tuning::next_bgc_p = false; size_t gc_heap::bgc_tuning::gen1_index_last_bgc_end; size_t gc_heap::bgc_tuning::gen1_index_last_bgc_start; size_t gc_heap::bgc_tuning::gen1_index_last_bgc_sweep; size_t gc_heap::bgc_tuning::actual_num_gen1s_to_trigger; gc_heap::bgc_tuning::tuning_calculation gc_heap::bgc_tuning::gen_calc[2]; gc_heap::bgc_tuning::tuning_stats gc_heap::bgc_tuning::gen_stats[2]; gc_heap::bgc_tuning::bgc_size_data gc_heap::bgc_tuning::current_bgc_end_data[2]; size_t gc_heap::bgc_tuning::last_stepping_bgc_count = 0; uint32_t gc_heap::bgc_tuning::last_stepping_mem_load = 0; uint32_t gc_heap::bgc_tuning::stepping_interval = 0; bool gc_heap::bgc_tuning::use_stepping_trigger_p = true; double gc_heap::bgc_tuning::gen2_ratio_correction = 0.0; double gc_heap::bgc_tuning::ratio_correction_step = 0.0; int gc_heap::saved_bgc_tuning_reason = -1; #endif //BGC_SERVO_TUNING inline size_t round_up_power2 (size_t size) { // Get the 0-based index of the most-significant bit in size-1. // If the call failed (because size-1 is zero), size must be 1, // so return 1 (because 1 rounds up to itself). DWORD highest_set_bit_index; if (0 == #ifdef HOST_64BIT BitScanReverse64( #else BitScanReverse( #endif &highest_set_bit_index, size - 1)) { return 1; } // The size == 0 case (which would have overflowed to SIZE_MAX when decremented) // is handled below by relying on the fact that highest_set_bit_index is the maximum value // (31 or 63, depending on sizeof(size_t)) and left-shifting a value >= 2 by that // number of bits shifts in zeros from the right, resulting in an output of zero. return static_cast(2) << highest_set_bit_index; } inline size_t round_down_power2 (size_t size) { // Get the 0-based index of the most-significant bit in size. // If the call failed, size must be zero so return zero. DWORD highest_set_bit_index; if (0 == #ifdef HOST_64BIT BitScanReverse64( #else BitScanReverse( #endif &highest_set_bit_index, size)) { return 0; } // Left-shift 1 by highest_set_bit_index to get back a value containing only // the most-significant set bit of size, i.e. size rounded down // to the next power-of-two value. return static_cast(1) << highest_set_bit_index; } // Get the 0-based index of the most-significant bit in the value. // Returns -1 if the input value is zero (i.e. has no set bits). inline int index_of_highest_set_bit (size_t value) { // Get the 0-based index of the most-significant bit in the value. // If the call failed (because value is zero), return -1. DWORD highest_set_bit_index; return (0 == #ifdef HOST_64BIT BitScanReverse64( #else BitScanReverse( #endif &highest_set_bit_index, value)) ? -1 : static_cast(highest_set_bit_index); } inline float mb (size_t num) { return (float)((float)num / 1000.0 / 1000.0); } inline size_t gib (size_t num) { return (num / 1024 / 1024 / 1024); } #ifdef BACKGROUND_GC uint32_t bgc_alloc_spin_count = 140; uint32_t bgc_alloc_spin = 2; // The following 2 ratios dictate how UOH allocations that happen during a BGC should be handled. Because // UOH is not collected till the very end of a BGC, by default we don't want to allow UOH to grow too large // during a BGC. So if we only increase the size by 10%, we will allow to allocate normally. But if it's // too much (ie, > bgc_uoh_inc_ratio_alloc_wait), we will make the allocation wait till the BGC is done. // // This means threads that allocate heavily on UOH may be paused during a BGC. If you're willing to accept // larger UOH sizes in exchange for fewer pauses, you can use the UOHWaitBGCSizeIncPercent config to increase // the wait ratio. Likewise, set it to use a smaller ratio if you observe that UOH grows too large during // BGCs. float bgc_uoh_inc_ratio_alloc_normal = 0.1f; // This ratio is 2x for regions because regions could start with a much smaller size since a lot of // memory could be in the free pool. #ifdef USE_REGIONS float bgc_uoh_inc_ratio_alloc_wait = 2.0f; #else float bgc_uoh_inc_ratio_alloc_wait = 1.0f; #endif //USE_REGIONS inline void c_write (uint32_t& place, uint32_t value) { Interlocked::Exchange (&place, value); } // If every heap's gen2 or gen3 size is less than this threshold we will do a blocking GC. const size_t bgc_min_per_heap = 4*1024*1024; int gc_heap::gchist_index = 0; gc_mechanisms_store gc_heap::gchist[max_history_count]; #ifndef MULTIPLE_HEAPS VOLATILE(bgc_state) gc_heap::current_bgc_state = bgc_not_in_process; int gc_heap::gchist_index_per_heap = 0; gc_heap::gc_history gc_heap::gchist_per_heap[max_history_count]; #endif //MULTIPLE_HEAPS #endif //BACKGROUND_GC #ifdef GC_CONFIG_DRIVEN BOOL gc_config_log_on = FALSE; FILE* gc_config_log = NULL; // we keep this much in a buffer and only flush when the buffer is full #define gc_config_log_buffer_size (1*1024) // TEMP uint8_t* gc_config_log_buffer = 0; size_t gc_config_log_buffer_offset = 0; // For config since we log so little we keep the whole history. Also it's only // ever logged by one thread so no need to synchronize. void log_va_msg_config(const char *fmt, va_list args) { const int BUFFERSIZE = 256; static char rgchBuffer[BUFFERSIZE]; char * pBuffer = &rgchBuffer[0]; pBuffer[0] = '\n'; int buffer_start = 1; int msg_len = _vsnprintf_s (&pBuffer[buffer_start], BUFFERSIZE - buffer_start, _TRUNCATE, fmt, args ); assert (msg_len != -1); msg_len += buffer_start; if ((gc_config_log_buffer_offset + msg_len) > gc_config_log_buffer_size) { fwrite(gc_config_log_buffer, gc_config_log_buffer_offset, 1, gc_config_log); fflush(gc_config_log); gc_config_log_buffer_offset = 0; } memcpy (gc_config_log_buffer + gc_config_log_buffer_offset, pBuffer, msg_len); gc_config_log_buffer_offset += msg_len; } void GCLogConfig (const char *fmt, ... ) { if (gc_config_log_on && (gc_config_log != NULL)) { va_list args; va_start( args, fmt ); log_va_msg_config (fmt, args); } } #endif // GC_CONFIG_DRIVEN #ifdef SYNCHRONIZATION_STATS // Number of GCs have we done since we last logged. static unsigned int gc_count_during_log; // In ms. This is how often we print out stats. static const unsigned int log_interval = 5000; // Time (in ms) when we start a new log interval. static uint64_t log_start_tick; static unsigned int gc_lock_contended; static int64_t log_start_hires; // Cycles accumulated in SuspendEE during log_interval. static uint64_t suspend_ee_during_log; // Cycles accumulated in RestartEE during log_interval. static uint64_t restart_ee_during_log; static uint64_t gc_during_log; #endif //SYNCHRONIZATION_STATS void process_sync_log_stats() { #ifdef SYNCHRONIZATION_STATS uint64_t log_elapsed = GCToOSInterface::GetLowPrecisionTimeStamp() - log_start_tick; if (log_elapsed > log_interval) { uint64_t total = GCToOSInterface::QueryPerformanceCounter() - log_start_hires; // Print out the cycles we spent on average in each suspend and restart. printf("\n_________________________________________________________________________________\n" "Past %d(s): #%3d GCs; Total gc_lock contended: %8u; GC: %12u\n" "SuspendEE: %8u; RestartEE: %8u GC %.3f%%\n", log_interval / 1000, gc_count_during_log, gc_lock_contended, (unsigned int)(gc_during_log / gc_count_during_log), (unsigned int)(suspend_ee_during_log / gc_count_during_log), (unsigned int)(restart_ee_during_log / gc_count_during_log), (double)(100.0f * gc_during_log / total)); gc_heap::print_sync_stats(gc_count_during_log); gc_count_during_log = 0; } #endif //SYNCHRONIZATION_STATS } #ifdef MULTIPLE_HEAPS uint32_t g_num_active_processors = 0; // Note that when a join is no longer used we still keep the values here because // tooling already recognized them as having the meaning they were assigned originally. // It doesn't break tooling if we stop using them but does if we assign a new meaning // to them. enum gc_join_stage { gc_join_init_cpu_mapping = 0, gc_join_done = 1, gc_join_generation_determined = 2, gc_join_begin_mark_phase = 3, gc_join_scan_dependent_handles = 4, gc_join_rescan_dependent_handles = 5, gc_join_scan_sizedref_done = 6, gc_join_null_dead_short_weak = 7, gc_join_scan_finalization = 8, gc_join_null_dead_long_weak = 9, gc_join_null_dead_syncblk = 10, gc_join_decide_on_compaction = 11, gc_join_rearrange_segs_compaction = 12, gc_join_adjust_handle_age_compact = 13, gc_join_adjust_handle_age_sweep = 14, gc_join_begin_relocate_phase = 15, gc_join_relocate_phase_done = 16, gc_join_verify_objects_done = 17, gc_join_start_bgc = 18, gc_join_restart_ee = 19, gc_join_concurrent_overflow = 20, gc_join_suspend_ee = 21, gc_join_bgc_after_ephemeral = 22, gc_join_allow_fgc = 23, gc_join_bgc_sweep = 24, gc_join_suspend_ee_verify = 25, gc_join_restart_ee_verify = 26, gc_join_set_state_free = 27, gc_r_join_update_card_bundle = 28, gc_join_after_absorb = 29, gc_join_verify_copy_table = 30, gc_join_after_reset = 31, gc_join_after_ephemeral_sweep = 32, gc_join_after_profiler_heap_walk = 33, gc_join_minimal_gc = 34, gc_join_after_commit_soh_no_gc = 35, gc_join_expand_loh_no_gc = 36, gc_join_final_no_gc = 37, // No longer in use but do not remove, see comments for this enum. gc_join_disable_software_write_watch = 38, gc_join_merge_temp_fl = 39, gc_join_bridge_processing = 40, gc_join_max = 41 }; enum gc_join_flavor { join_flavor_server_gc = 0, join_flavor_bgc = 1 }; #define first_thread_arrived 2 #pragma warning(push) #pragma warning(disable:4324) // don't complain if DECLSPEC_ALIGN actually pads struct DECLSPEC_ALIGN(HS_CACHE_LINE_SIZE) join_structure { // Shared non volatile keep on separate line to prevent eviction int n_threads; // Keep polling/wait structures on separate line write once per join DECLSPEC_ALIGN(HS_CACHE_LINE_SIZE) GCEvent joined_event[3]; // the last event in the array is only used for first_thread_arrived. Volatile lock_color; VOLATILE(BOOL) wait_done; VOLATILE(BOOL) joined_p; // Keep volatile counted locks on separate cache line write many per join DECLSPEC_ALIGN(HS_CACHE_LINE_SIZE) VOLATILE(int) join_lock; VOLATILE(int) r_join_lock; }; #pragma warning(pop) enum join_type { type_last_join = 0, type_join = 1, type_restart = 2, type_first_r_join = 3, type_r_join = 4 }; enum join_time { time_start = 0, time_end = 1 }; enum join_heap_index { join_heap_restart = 100, join_heap_r_restart = 200 }; class t_join { join_structure join_struct; int id; gc_join_flavor flavor; #ifdef JOIN_STATS uint64_t start[MAX_SUPPORTED_HEAPS], end[MAX_SUPPORTED_HEAPS], start_seq; // remember join id and last thread to arrive so restart can use these int thd; // we want to print statistics every 10 seconds - this is to remember the start of the 10 sec interval uint64_t start_tick; // counters for joins, in 1000's of clock cycles uint64_t elapsed_total[gc_join_max], wake_total[gc_join_max], seq_loss_total[gc_join_max], par_loss_total[gc_join_max], in_join_total[gc_join_max]; #endif //JOIN_STATS public: BOOL init (int n_th, gc_join_flavor f) { dprintf (JOIN_LOG, ("Initializing join structure")); join_struct.n_threads = n_th; join_struct.lock_color = 0; for (int i = 0; i < 3; i++) { if (!join_struct.joined_event[i].IsValid()) { join_struct.joined_p = FALSE; dprintf (JOIN_LOG, ("Creating join event %d", i)); // TODO - changing this to a non OS event // because this is also used by BGC threads which are // managed threads and WaitEx does not allow you to wait // for an OS event on a managed thread. // But we are not sure if this plays well in the hosting // environment. //join_struct.joined_event[i].CreateOSManualEventNoThrow(FALSE); if (!join_struct.joined_event[i].CreateManualEventNoThrow(FALSE)) return FALSE; } } join_struct.join_lock = join_struct.n_threads; join_struct.r_join_lock = join_struct.n_threads; join_struct.wait_done = FALSE; flavor = f; #ifdef JOIN_STATS start_tick = GCToOSInterface::GetLowPrecisionTimeStamp(); #endif //JOIN_STATS return TRUE; } void update_n_threads(int n_th) { join_struct.n_threads = n_th; join_struct.join_lock = n_th; join_struct.r_join_lock = n_th; } int get_num_threads() { return join_struct.n_threads; } // This is for instrumentation only. int get_join_lock() { return VolatileLoadWithoutBarrier (&join_struct.join_lock); } void destroy () { dprintf (JOIN_LOG, ("Destroying join structure")); for (int i = 0; i < 3; i++) { if (join_struct.joined_event[i].IsValid()) join_struct.joined_event[i].CloseEvent(); } } inline void fire_event (int heap, join_time time, join_type type, int join_id) { FIRE_EVENT(GCJoin_V2, heap, time, type, join_id); } void join (gc_heap* gch, int join_id) { #ifdef JOIN_STATS // parallel execution ends here end[gch->heap_number] = get_ts(); #endif //JOIN_STATS assert (!join_struct.joined_p); int color = join_struct.lock_color.LoadWithoutBarrier(); if (Interlocked::Decrement(&join_struct.join_lock) != 0) { dprintf (JOIN_LOG, ("join%d(%d): Join() Waiting...join_lock is now %d", flavor, join_id, (int32_t)(join_struct.join_lock))); fire_event (gch->heap_number, time_start, type_join, join_id); //busy wait around the color if (color == join_struct.lock_color.LoadWithoutBarrier()) { respin: int spin_count = 128 * yp_spin_count_unit; for (int j = 0; j < spin_count; j++) { if (color != join_struct.lock_color.LoadWithoutBarrier()) { break; } YieldProcessor(); // indicate to the processor that we are spinning } // we've spun, and if color still hasn't changed, fall into hard wait if (color == join_struct.lock_color.LoadWithoutBarrier()) { dprintf (JOIN_LOG, ("join%d(%d): Join() hard wait on reset event %d, join_lock is now %d", flavor, join_id, color, (int32_t)(join_struct.join_lock))); uint32_t dwJoinWait = join_struct.joined_event[color].Wait(INFINITE, FALSE); if (dwJoinWait != WAIT_OBJECT_0) { STRESS_LOG1 (LF_GC, LL_FATALERROR, "joined event wait failed with code: %zx", dwJoinWait); FATAL_GC_ERROR (); } } // avoid race due to the thread about to reset the event (occasionally) being preempted before ResetEvent() if (color == join_struct.lock_color.LoadWithoutBarrier()) { dprintf (9999, ("---h%d %d j%d %d - respin!!! (c:%d-%d)", gch->heap_number, join_id, join_struct.n_threads, color, join_struct.lock_color.LoadWithoutBarrier())); goto respin; } dprintf (JOIN_LOG, ("join%d(%d): Join() done, join_lock is %d", flavor, join_id, (int32_t)(join_struct.join_lock))); } fire_event (gch->heap_number, time_end, type_join, join_id); #ifdef JOIN_STATS // parallel execution starts here start[gch->heap_number] = get_ts(); Interlocked::ExchangeAdd(&in_join_total[join_id], (start[gch->heap_number] - end[gch->heap_number])); #endif //JOIN_STATS } else { fire_event (gch->heap_number, time_start, type_last_join, join_id); join_struct.joined_p = TRUE; dprintf (JOIN_LOG, ("join%d(%d): Last thread to complete the join, setting id", flavor, join_id)); join_struct.joined_event[!color].Reset(); id = join_id; #ifdef JOIN_STATS // remember the join id, the last thread arriving, the start of the sequential phase, // and keep track of the cycles spent waiting in the join thd = gch->heap_number; start_seq = get_ts(); Interlocked::ExchangeAdd(&in_join_total[join_id], (start_seq - end[gch->heap_number])); #endif //JOIN_STATS } } // Reverse join - first thread gets here does the work; other threads will only proceed // after the work is done. // Note that you cannot call this twice in a row on the same thread. Plus there's no // need to call it twice in row - you should just merge the work. BOOL r_join (gc_heap* gch, int join_id) { if (join_struct.n_threads == 1) { return TRUE; } if (Interlocked::CompareExchange(&join_struct.r_join_lock, 0, join_struct.n_threads) == 0) { fire_event (gch->heap_number, time_start, type_join, join_id); dprintf (JOIN_LOG, ("r_join() Waiting...")); //busy wait around the color respin: int spin_count = 256 * yp_spin_count_unit; for (int j = 0; j < spin_count; j++) { if (join_struct.wait_done) { break; } YieldProcessor(); // indicate to the processor that we are spinning } // we've spun, and if color still hasn't changed, fall into hard wait if (!join_struct.wait_done) { dprintf (JOIN_LOG, ("Join() hard wait on reset event %d", first_thread_arrived)); uint32_t dwJoinWait = join_struct.joined_event[first_thread_arrived].Wait(INFINITE, FALSE); if (dwJoinWait != WAIT_OBJECT_0) { STRESS_LOG1 (LF_GC, LL_FATALERROR, "joined event wait failed with code: %zx", dwJoinWait); FATAL_GC_ERROR (); } } // avoid race due to the thread about to reset the event (occasionally) being preempted before ResetEvent() if (!join_struct.wait_done) { goto respin; } dprintf (JOIN_LOG, ("r_join() done")); fire_event (gch->heap_number, time_end, type_join, join_id); return FALSE; } else { fire_event (gch->heap_number, time_start, type_first_r_join, join_id); return TRUE; } } #ifdef JOIN_STATS uint64_t get_ts() { return GCToOSInterface::QueryPerformanceCounter(); } void start_ts (gc_heap* gch) { // parallel execution ends here start[gch->heap_number] = get_ts(); } #endif //JOIN_STATS void restart() { #ifdef JOIN_STATS uint64_t elapsed_seq = get_ts() - start_seq; uint64_t max = 0, sum = 0, wake = 0; uint64_t min_ts = start[0]; for (int i = 1; i < join_struct.n_threads; i++) { if(min_ts > start[i]) min_ts = start[i]; } for (int i = 0; i < join_struct.n_threads; i++) { uint64_t wake_delay = start[i] - min_ts; uint64_t elapsed = end[i] - start[i]; if (max < elapsed) max = elapsed; sum += elapsed; wake += wake_delay; } uint64_t seq_loss = (join_struct.n_threads - 1)*elapsed_seq; uint64_t par_loss = join_struct.n_threads*max - sum; double efficiency = 0.0; if (max > 0) efficiency = sum*100.0/(join_struct.n_threads*max); const double ts_scale = 1e-6; // enable this printf to get statistics on each individual join as it occurs //printf("join #%3d seq_loss = %5g par_loss = %5g efficiency = %3.0f%%\n", join_id, ts_scale*seq_loss, ts_scale*par_loss, efficiency); elapsed_total[id] += sum; wake_total[id] += wake; seq_loss_total[id] += seq_loss; par_loss_total[id] += par_loss; // every 10 seconds, print a summary of the time spent in each type of join if (GCToOSInterface::GetLowPrecisionTimeStamp() - start_tick > 10*1000) { printf("**** summary *****\n"); for (int i = 0; i < 16; i++) { printf("join #%3d elapsed_total = %8g wake_loss = %8g seq_loss = %8g par_loss = %8g in_join_total = %8g\n", i, ts_scale*elapsed_total[i], ts_scale*wake_total[i], ts_scale*seq_loss_total[i], ts_scale*par_loss_total[i], ts_scale*in_join_total[i]); elapsed_total[i] = wake_total[i] = seq_loss_total[i] = par_loss_total[i] = in_join_total[i] = 0; } start_tick = GCToOSInterface::GetLowPrecisionTimeStamp(); } #endif //JOIN_STATS fire_event (join_heap_restart, time_start, type_restart, -1); assert (join_struct.joined_p); join_struct.joined_p = FALSE; join_struct.join_lock = join_struct.n_threads; dprintf (JOIN_LOG, ("join%d(%d): Restarting from join: join_lock is %d", flavor, id, (int32_t)(join_struct.join_lock))); int color = join_struct.lock_color.LoadWithoutBarrier(); join_struct.lock_color = !color; join_struct.joined_event[color].Set(); fire_event (join_heap_restart, time_end, type_restart, -1); #ifdef JOIN_STATS start[thd] = get_ts(); #endif //JOIN_STATS } BOOL joined() { dprintf (JOIN_LOG, ("join%d(%d): joined, join_lock is %d", flavor, id, (int32_t)(join_struct.join_lock))); return join_struct.joined_p; } void r_restart() { if (join_struct.n_threads != 1) { fire_event (join_heap_r_restart, time_start, type_restart, -1); join_struct.wait_done = TRUE; join_struct.joined_event[first_thread_arrived].Set(); fire_event (join_heap_r_restart, time_end, type_restart, -1); } } void r_init() { if (join_struct.n_threads != 1) { join_struct.r_join_lock = join_struct.n_threads; join_struct.wait_done = FALSE; join_struct.joined_event[first_thread_arrived].Reset(); } } }; t_join gc_t_join; #ifdef BACKGROUND_GC t_join bgc_t_join; #endif //BACKGROUND_GC #endif //MULTIPLE_HEAPS #define spin_and_switch(count_to_spin, expr) \ { \ for (int j = 0; j < count_to_spin; j++) \ { \ if (expr) \ { \ break;\ } \ YieldProcessor(); \ } \ if (!(expr)) \ { \ GCToOSInterface::YieldThread(0); \ } \ } #define spin_and_wait(count_to_spin, expr) \ { \ while (!expr) \ { \ for (int j = 0; j < count_to_spin; j++) \ { \ if (expr) \ { \ break; \ } \ YieldProcessor (); \ } \ if (!(expr)) \ { \ GCToOSInterface::YieldThread (0); \ } \ } \ } #ifdef BACKGROUND_GC #define max_pending_allocs 64 class exclusive_sync { VOLATILE(uint8_t*) rwp_object; VOLATILE(int32_t) needs_checking; int spin_count; uint8_t cache_separator[HS_CACHE_LINE_SIZE - (sizeof (spin_count) + sizeof (needs_checking) + sizeof (rwp_object))]; // TODO - perhaps each object should be on its own cache line... VOLATILE(uint8_t*) alloc_objects[max_pending_allocs]; int find_free_index () { for (int i = 0; i < max_pending_allocs; i++) { if (alloc_objects [i] == (uint8_t*)0) { return i; } } return -1; } public: void init() { spin_count = 32 * (g_num_processors - 1); rwp_object = 0; needs_checking = 0; for (int i = 0; i < max_pending_allocs; i++) { alloc_objects [i] = (uint8_t*)0; } } void check() { for (int i = 0; i < max_pending_allocs; i++) { if (alloc_objects [i] != (uint8_t*)0) { FATAL_GC_ERROR(); } } } void bgc_mark_set (uint8_t* obj) { dprintf (3, ("cm: probing %p", obj)); retry: if (Interlocked::CompareExchange(&needs_checking, 1, 0) == 0) { // If we spend too much time spending all the allocs, // consider adding a high water mark and scan up // to that; we'll need to interlock in done when // we update the high watermark. for (int i = 0; i < max_pending_allocs; i++) { if (obj == alloc_objects[i]) { needs_checking = 0; dprintf (3, ("cm: will spin")); spin_and_switch (spin_count, (obj != alloc_objects[i])); goto retry; } } rwp_object = obj; needs_checking = 0; dprintf (3, ("cm: set %p", obj)); return; } else { spin_and_switch (spin_count, (needs_checking == 0)); goto retry; } } int uoh_alloc_set (uint8_t* obj) { if (!gc_heap::cm_in_progress) { return -1; } retry: dprintf (3, ("uoh alloc: probing %p", obj)); if (Interlocked::CompareExchange(&needs_checking, 1, 0) == 0) { if (obj == rwp_object) { needs_checking = 0; spin_and_switch (spin_count, (obj != rwp_object)); goto retry; } else { int cookie = find_free_index(); if (cookie != -1) { alloc_objects[cookie] = obj; needs_checking = 0; //if (cookie >= 4) //{ // GCToOSInterface::DebugBreak(); //} dprintf (3, ("uoh alloc: set %p at %d", obj, cookie)); return cookie; } else { needs_checking = 0; dprintf (3, ("uoh alloc: setting %p will spin to acquire a free index", obj)); spin_and_switch (spin_count, (find_free_index () != -1)); goto retry; } } } else { dprintf (3, ("uoh alloc: will spin on checking %p", obj)); spin_and_switch (spin_count, (needs_checking == 0)); goto retry; } } void bgc_mark_done () { dprintf (3, ("cm: release lock on %p", (uint8_t *)rwp_object)); rwp_object = 0; } void uoh_alloc_done_with_index (int index) { dprintf (3, ("uoh alloc: release lock on %p based on %d", (uint8_t *)alloc_objects[index], index)); assert ((index >= 0) && (index < max_pending_allocs)); alloc_objects[index] = (uint8_t*)0; } void uoh_alloc_done (uint8_t* obj) { if (!gc_heap::cm_in_progress) { return; } for (int i = 0; i < max_pending_allocs; i++) { if (alloc_objects [i] == obj) { uoh_alloc_done_with_index(i); return; } } dprintf (3, ("uoh alloc: could not release lock on %p", obj)); } }; #endif //BACKGROUND_GC void reset_memory (uint8_t* o, size_t sizeo); #ifdef WRITE_WATCH #ifndef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP static bool virtual_alloc_hardware_write_watch = false; #endif // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP static bool hardware_write_watch_capability = false; inline bool can_use_hardware_write_watch() { return hardware_write_watch_capability; } inline bool can_use_write_watch_for_card_table() { #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES return true; #else return can_use_hardware_write_watch(); #endif } #else //WRITE_WATCH #define mem_reserve (MEM_RESERVE) #endif //WRITE_WATCH void WaitLongerNoInstru (int i) { // every 8th attempt: bool bToggleGC = GCToEEInterface::EnablePreemptiveGC(); // if we're waiting for gc to finish, we should block immediately if (g_fSuspensionPending == 0) { if (g_num_processors > 1) { YieldProcessor(); // indicate to the processor that we are spinning if (i & 0x01f) GCToOSInterface::YieldThread (0); else GCToOSInterface::Sleep (5); } else GCToOSInterface::Sleep (5); } // If CLR is hosted, a thread may reach here while it is in preemptive GC mode, // or it has no Thread object, in order to force a task to yield, or to triger a GC. // It is important that the thread is going to wait for GC. Otherwise the thread // is in a tight loop. If the thread has high priority, the perf is going to be very BAD. if (bToggleGC) { #ifdef _DEBUG // In debug builds, all enter_spin_lock operations go through this code. If a GC has // started, it is important to block until the GC thread calls set_gc_done (since it is // guaranteed to have cleared g_TrapReturningThreads by this point). This avoids livelock // conditions which can otherwise occur if threads are allowed to spin in this function // (and therefore starve the GC thread) between the point when the GC thread sets the // WaitForGC event and the point when the GC thread clears g_TrapReturningThreads. if (gc_heap::gc_started) { gc_heap::wait_for_gc_done(); } #endif // _DEBUG GCToEEInterface::DisablePreemptiveGC(); } else if (g_fSuspensionPending > 0) { g_theGCHeap->WaitUntilGCComplete(); } } inline static void safe_switch_to_thread() { bool cooperative_mode = gc_heap::enable_preemptive(); GCToOSInterface::YieldThread(0); gc_heap::disable_preemptive(cooperative_mode); } #define check_msl_status(msg, size) if (msl_status == msl_retry_different_heap) \ { \ dprintf (5555, ("h%d RETRY %s(%Id)", heap_number, msg, size)); \ return a_state_retry_allocate; \ } static const int32_t lock_free = -1; static const int32_t lock_taken = 0; static const int32_t lock_decommissioned = 1; // If our heap got decommissioned, we need to try an existing heap. //inline bool gc_heap::should_move_heap (GCSpinLock* msl) { #ifdef MULTIPLE_HEAPS if (msl->lock == lock_decommissioned) { dprintf (5555, ("heap#%d got decommissioned! need to retry", heap_number)); } return (msl->lock == lock_decommissioned); #else //MULTIPLE_HEAPS return false; #endif //MULTIPLE_HEAPS } // All the places where we could be stopped because there was a suspension should call should_move_heap to check if we need to return // so we can try another heap or we can continue the allocation on the same heap. enter_msl_status gc_heap::enter_spin_lock_msl_helper (GCSpinLock* msl) { do { #ifdef DYNAMIC_HEAP_COUNT uint64_t start = GetHighPrecisionTimeStamp(); #endif //DYNAMIC_HEAP_COUNT unsigned int i = 0; while (VolatileLoad (&msl->lock) != lock_free) { if (should_move_heap (msl)) { return msl_retry_different_heap; } if ((++i & 7) && !IsGCInProgress ()) { if (g_num_processors > 1) { #ifndef MULTIPLE_HEAPS int spin_count = 32 * yp_spin_count_unit; #else //!MULTIPLE_HEAPS int spin_count = yp_spin_count_unit; #endif //!MULTIPLE_HEAPS for (int j = 0; j < spin_count; j++) { if (VolatileLoad (&msl->lock) == lock_free || IsGCInProgress ()) break; // give the HT neighbor a chance to run YieldProcessor (); } if (VolatileLoad (&msl->lock) != lock_free && !IsGCInProgress ()) { #ifdef DYNAMIC_HEAP_COUNT start -= GetHighPrecisionTimeStamp(); #endif //DYNAMIC_HEAP_COUNT safe_switch_to_thread (); #ifdef DYNAMIC_HEAP_COUNT start += GetHighPrecisionTimeStamp(); #endif //DYNAMIC_HEAP_COUNT } } else { safe_switch_to_thread (); } } else { #ifdef DYNAMIC_HEAP_COUNT start -= GetHighPrecisionTimeStamp(); #endif //DYNAMIC_HEAP_COUNT WaitLongerNoInstru (i); #ifdef DYNAMIC_HEAP_COUNT start += GetHighPrecisionTimeStamp(); #endif //DYNAMIC_HEAP_COUNT } } #ifdef DYNAMIC_HEAP_COUNT uint64_t end = GetHighPrecisionTimeStamp(); Interlocked::ExchangeAdd64 (&msl->msl_wait_time, end - start); dprintf (3, ("h%d wait for msl lock wait time %zd, total wait time: %zd", heap_number, (end - start), msl->msl_wait_time)); #endif //DYNAMIC_HEAP_COUNT } while (Interlocked::CompareExchange (&msl->lock, lock_taken, lock_free) != lock_free); return msl_entered; } inline enter_msl_status gc_heap::enter_spin_lock_msl (GCSpinLock* msl) { if (Interlocked::CompareExchange (&msl->lock, lock_taken, lock_free) == lock_free) return msl_entered; return enter_spin_lock_msl_helper (msl); } // // We need the following methods to have volatile arguments, so that they can accept // raw pointers in addition to the results of the & operator on Volatile. // this will never be used for the more_space_lock_xxx, which is why // "lock_decommissioned" cannot happen. inline static void enter_spin_lock_noinstru (RAW_KEYWORD(volatile) int32_t* lock) { retry: if (Interlocked::CompareExchange(lock, lock_taken, lock_free) != lock_free) { unsigned int i = 0; while (VolatileLoad(lock) != lock_free) { // will never be used for more_space_lock_xxx assert (VolatileLoad(lock) != lock_decommissioned); if ((++i & 7) && !IsGCInProgress()) { if (g_num_processors > 1) { #ifndef MULTIPLE_HEAPS int spin_count = 32 * yp_spin_count_unit; #else //!MULTIPLE_HEAPS int spin_count = yp_spin_count_unit; #endif //!MULTIPLE_HEAPS for (int j = 0; j < spin_count; j++) { if (VolatileLoad(lock) == lock_free || IsGCInProgress()) break; YieldProcessor(); // indicate to the processor that we are spinning } if (VolatileLoad(lock) != lock_free && !IsGCInProgress()) { safe_switch_to_thread(); } } else { safe_switch_to_thread(); } } else { WaitLongerNoInstru(i); } } goto retry; } } inline static BOOL try_enter_spin_lock_noinstru(RAW_KEYWORD(volatile) int32_t* lock) { return (Interlocked::CompareExchange(&*lock, lock_taken, lock_free) == lock_free); } inline static void leave_spin_lock_noinstru (RAW_KEYWORD(volatile) int32_t* lock) { VolatileStore((int32_t*)lock, lock_free); } #ifdef _DEBUG inline static void enter_spin_lock (GCSpinLock *pSpinLock) { enter_spin_lock_noinstru (&pSpinLock->lock); assert (pSpinLock->holding_thread == (Thread*)-1); pSpinLock->holding_thread = GCToEEInterface::GetThread(); } inline static BOOL try_enter_spin_lock(GCSpinLock *pSpinLock) { BOOL ret = try_enter_spin_lock_noinstru(&pSpinLock->lock); if (ret) pSpinLock->holding_thread = GCToEEInterface::GetThread(); return ret; } inline static void leave_spin_lock(GCSpinLock *pSpinLock) { bool gc_thread_p = GCToEEInterface::WasCurrentThreadCreatedByGC(); pSpinLock->released_by_gc_p = gc_thread_p; pSpinLock->holding_thread = (Thread*) -1; if (pSpinLock->lock != lock_free) leave_spin_lock_noinstru(&pSpinLock->lock); } #define ASSERT_HOLDING_SPIN_LOCK(pSpinLock) \ _ASSERTE((pSpinLock)->holding_thread == GCToEEInterface::GetThread()); #define ASSERT_NOT_HOLDING_SPIN_LOCK(pSpinLock) \ _ASSERTE((pSpinLock)->holding_thread != GCToEEInterface::GetThread()); #else //_DEBUG //In the concurrent version, the Enable/DisablePreemptiveGC is optional because //the gc thread call WaitLonger. void WaitLonger (int i #ifdef SYNCHRONIZATION_STATS , GCSpinLock* spin_lock #endif //SYNCHRONIZATION_STATS ) { #ifdef SYNCHRONIZATION_STATS (spin_lock->num_wait_longer)++; #endif //SYNCHRONIZATION_STATS // every 8th attempt: bool bToggleGC = GCToEEInterface::EnablePreemptiveGC(); assert (bToggleGC); // if we're waiting for gc to finish, we should block immediately if (!gc_heap::gc_started) { #ifdef SYNCHRONIZATION_STATS (spin_lock->num_switch_thread_w)++; #endif //SYNCHRONIZATION_STATS if (g_num_processors > 1) { YieldProcessor(); // indicate to the processor that we are spinning if (i & 0x01f) GCToOSInterface::YieldThread (0); else GCToOSInterface::Sleep (5); } else GCToOSInterface::Sleep (5); } // If CLR is hosted, a thread may reach here while it is in preemptive GC mode, // or it has no Thread object, in order to force a task to yield, or to triger a GC. // It is important that the thread is going to wait for GC. Otherwise the thread // is in a tight loop. If the thread has high priority, the perf is going to be very BAD. if (gc_heap::gc_started) { gc_heap::wait_for_gc_done(); } if (bToggleGC) { #ifdef SYNCHRONIZATION_STATS (spin_lock->num_disable_preemptive_w)++; #endif //SYNCHRONIZATION_STATS GCToEEInterface::DisablePreemptiveGC(); } } inline static void enter_spin_lock (GCSpinLock* spin_lock) { retry: if (Interlocked::CompareExchange(&spin_lock->lock, lock_taken, lock_free) != lock_free) { unsigned int i = 0; while (spin_lock->lock != lock_free) { assert (spin_lock->lock != lock_decommissioned); if ((++i & 7) && !gc_heap::gc_started) { if (g_num_processors > 1) { #ifndef MULTIPLE_HEAPS int spin_count = 32 * yp_spin_count_unit; #else //!MULTIPLE_HEAPS int spin_count = yp_spin_count_unit; #endif //!MULTIPLE_HEAPS for (int j = 0; j < spin_count; j++) { if (spin_lock->lock == lock_free || gc_heap::gc_started) break; YieldProcessor(); // indicate to the processor that we are spinning } if (spin_lock->lock != lock_free && !gc_heap::gc_started) { #ifdef SYNCHRONIZATION_STATS (spin_lock->num_switch_thread)++; #endif //SYNCHRONIZATION_STATS bool cooperative_mode = gc_heap::enable_preemptive (); GCToOSInterface::YieldThread(0); gc_heap::disable_preemptive (cooperative_mode); } } else GCToOSInterface::YieldThread(0); } else { WaitLonger(i #ifdef SYNCHRONIZATION_STATS , spin_lock #endif //SYNCHRONIZATION_STATS ); } } goto retry; } } inline static BOOL try_enter_spin_lock(GCSpinLock* spin_lock) { return (Interlocked::CompareExchange(&spin_lock->lock, lock_taken, lock_free) == lock_free); } inline static void leave_spin_lock (GCSpinLock * spin_lock) { spin_lock->lock = lock_free; } #define ASSERT_HOLDING_SPIN_LOCK(pSpinLock) #endif //_DEBUG bool gc_heap::enable_preemptive () { return GCToEEInterface::EnablePreemptiveGC(); } void gc_heap::disable_preemptive (bool restore_cooperative) { if (restore_cooperative) { GCToEEInterface::DisablePreemptiveGC(); } } typedef void ** PTR_PTR; inline void memclr ( uint8_t* mem, size_t size) { dprintf (3, ("MEMCLR: %p, %zd", mem, size)); assert ((size & (sizeof(PTR_PTR)-1)) == 0); assert (sizeof(PTR_PTR) == DATA_ALIGNMENT); memset (mem, 0, size); } inline ptrdiff_t round_down (ptrdiff_t add, int pitch) { return ((add / pitch) * pitch); } #if defined(FEATURE_STRUCTALIGN) && defined(RESPECT_LARGE_ALIGNMENT) // FEATURE_STRUCTALIGN allows the compiler to dictate the alignment, // i.e, if a larger alignment matters or is beneficial, the compiler // generated info tells us so. RESPECT_LARGE_ALIGNMENT is just the // converse - it's a heuristic for the GC to use a larger alignment. #error FEATURE_STRUCTALIGN should imply !RESPECT_LARGE_ALIGNMENT #endif #if defined(FEATURE_STRUCTALIGN) && defined(FEATURE_LOH_COMPACTION) #error FEATURE_STRUCTALIGN and FEATURE_LOH_COMPACTION are mutually exclusive #endif // Returns true if two pointers have the same large (double than normal) alignment. inline BOOL same_large_alignment_p (uint8_t* p1, uint8_t* p2) { #ifdef RESPECT_LARGE_ALIGNMENT const size_t LARGE_ALIGNMENT_MASK = 2 * DATA_ALIGNMENT - 1; return ((((size_t)p1 ^ (size_t)p2) & LARGE_ALIGNMENT_MASK) == 0); #else UNREFERENCED_PARAMETER(p1); UNREFERENCED_PARAMETER(p2); return TRUE; #endif // RESPECT_LARGE_ALIGNMENT } // Determines the padding size required to fix large alignment during relocation. inline size_t switch_alignment_size (BOOL already_padded_p) { #ifndef RESPECT_LARGE_ALIGNMENT assert (!"Should not be called"); #endif // RESPECT_LARGE_ALIGNMENT if (already_padded_p) return DATA_ALIGNMENT; else return Align (min_obj_size) | DATA_ALIGNMENT; } #ifdef FEATURE_STRUCTALIGN void set_node_aligninfo (uint8_t *node, int requiredAlignment, ptrdiff_t pad); void clear_node_aligninfo (uint8_t *node); #else // FEATURE_STRUCTALIGN #define node_realigned(node) (((plug_and_reloc*)(node))[-1].reloc & 1) void set_node_realigned (uint8_t* node); void clear_node_realigned(uint8_t* node); #endif // FEATURE_STRUCTALIGN inline size_t AlignQword (size_t nbytes) { #ifdef FEATURE_STRUCTALIGN // This function is used to align everything on the large object // heap to an 8-byte boundary, to reduce the number of unaligned // accesses to (say) arrays of doubles. With FEATURE_STRUCTALIGN, // the compiler dictates the optimal alignment instead of having // a heuristic in the GC. return Align (nbytes); #else // FEATURE_STRUCTALIGN return (nbytes + 7) & ~7; #endif // FEATURE_STRUCTALIGN } inline BOOL Aligned (size_t n) { return (n & ALIGNCONST) == 0; } #define OBJECT_ALIGNMENT_OFFSET (sizeof(MethodTable *)) #ifdef FEATURE_STRUCTALIGN #define MAX_STRUCTALIGN OS_PAGE_SIZE #else // FEATURE_STRUCTALIGN #define MAX_STRUCTALIGN 0 #endif // FEATURE_STRUCTALIGN #ifdef FEATURE_STRUCTALIGN inline ptrdiff_t AdjustmentForMinPadSize(ptrdiff_t pad, int requiredAlignment) { // The resulting alignpad must be either 0 or at least min_obj_size. // Note that by computing the following difference on unsigned types, // we can do the range check 0 < alignpad < min_obj_size with a // single conditional branch. if ((size_t)(pad - DATA_ALIGNMENT) < Align (min_obj_size) - DATA_ALIGNMENT) { return requiredAlignment; } return 0; } inline ptrdiff_t ComputeStructAlignPad (uint8_t* plug, int requiredAlignment, size_t alignmentOffset=OBJECT_ALIGNMENT_OFFSET) { return StructAlign (plug, requiredAlignment, alignmentOffset) - plug; } BOOL IsStructAligned (uint8_t *ptr, int requiredAlignment) { return StructAlign (ptr, requiredAlignment) == ptr; } inline ptrdiff_t ComputeMaxStructAlignPad (int requiredAlignment) { if (requiredAlignment == DATA_ALIGNMENT) return 0; // Since a non-zero alignment padding cannot be less than min_obj_size (so we can fit the // alignment padding object), the worst-case alignment padding is correspondingly larger // than the required alignment. return requiredAlignment + Align (min_obj_size) - DATA_ALIGNMENT; } inline ptrdiff_t ComputeMaxStructAlignPadLarge (int requiredAlignment) { if (requiredAlignment <= get_alignment_constant (TRUE)+1) return 0; // This is the same as ComputeMaxStructAlignPad, except that in addition to leaving space // for padding before the actual object, it also leaves space for filling a gap after the // actual object. This is needed on the large object heap, as the outer allocation functions // don't operate on an allocation context (which would have left space for the final gap). return requiredAlignment + Align (min_obj_size) * 2 - DATA_ALIGNMENT; } #else // FEATURE_STRUCTALIGN #define ComputeMaxStructAlignPad(requiredAlignment) 0 #define ComputeMaxStructAlignPadLarge(requiredAlignment) 0 #endif // FEATURE_STRUCTALIGN //CLR_SIZE is the max amount of bytes from gen0 that is set to 0 in one chunk #define CLR_SIZE ((size_t)(8*1024+32)) #define END_SPACE_AFTER_GC (loh_size_threshold + MAX_STRUCTALIGN) // When we fit into the free list we need an extra of a min obj #define END_SPACE_AFTER_GC_FL (END_SPACE_AFTER_GC + Align (min_obj_size)) #if defined(BACKGROUND_GC) && !defined(USE_REGIONS) #define SEGMENT_INITIAL_COMMIT (2*OS_PAGE_SIZE) #else #define SEGMENT_INITIAL_COMMIT (OS_PAGE_SIZE) #endif //BACKGROUND_GC && !USE_REGIONS // This is always power of 2. #ifdef HOST_64BIT const size_t min_segment_size_hard_limit = 1024*1024*16; #else //HOST_64BIT const size_t min_segment_size_hard_limit = 1024*1024*4; #endif //HOST_64BIT #ifndef HOST_64BIT // Max size of heap hard limit (2^31) to be able to be aligned and rounded up on power of 2 and not overflow const size_t max_heap_hard_limit = (size_t)2 * (size_t)1024 * (size_t)1024 * (size_t)1024; #endif //!HOST_64BIT #ifdef SERVER_GC #ifdef HOST_64BIT #define INITIAL_ALLOC ((size_t)((size_t)4*1024*1024*1024)) #define LHEAP_ALLOC ((size_t)(1024*1024*256)) #else #define INITIAL_ALLOC ((size_t)(1024*1024*64)) #define LHEAP_ALLOC ((size_t)(1024*1024*32)) #endif // HOST_64BIT #else //SERVER_GC #ifdef HOST_64BIT #define INITIAL_ALLOC ((size_t)(1024*1024*256)) #define LHEAP_ALLOC ((size_t)(1024*1024*128)) #else #define INITIAL_ALLOC ((size_t)(1024*1024*16)) #define LHEAP_ALLOC ((size_t)(1024*1024*16)) #endif // HOST_64BIT #endif //SERVER_GC const size_t etw_allocation_tick = 100*1024; const size_t low_latency_alloc = 256*1024; const size_t fgn_check_quantum = 2*1024*1024; #ifdef MH_SC_MARK const int max_snoop_level = 128; #endif //MH_SC_MARK #ifdef CARD_BUNDLE //threshold of heap size to turn on card bundles. #define SH_TH_CARD_BUNDLE (40*1024*1024) #define MH_TH_CARD_BUNDLE (180*1024*1024) #endif //CARD_BUNDLE // min size to decommit to make the OS call worthwhile #define MIN_DECOMMIT_SIZE (100*OS_PAGE_SIZE) // max size to decommit per millisecond #define DECOMMIT_SIZE_PER_MILLISECOND (160*1024) // time in milliseconds between decommit steps #define DECOMMIT_TIME_STEP_MILLISECONDS (100) inline size_t align_on_page (size_t add) { return ((add + OS_PAGE_SIZE - 1) & ~((size_t)OS_PAGE_SIZE - 1)); } inline uint8_t* align_on_page (uint8_t* add) { return (uint8_t*)align_on_page ((size_t) add); } inline size_t align_lower_page (size_t add) { return (add & ~((size_t)OS_PAGE_SIZE - 1)); } inline uint8_t* align_lower_page (uint8_t* add) { return (uint8_t*)align_lower_page ((size_t)add); } inline size_t align_write_watch_lower_page (size_t add) { return (add & ~(WRITE_WATCH_UNIT_SIZE - 1)); } inline uint8_t* align_write_watch_lower_page (uint8_t* add) { return (uint8_t*)align_lower_page ((size_t)add); } inline BOOL power_of_two_p (size_t integer) { return !(integer & (integer-1)); } #ifdef USE_REGIONS void region_write_barrier_settings (WriteBarrierParameters* args, gc_heap::region_info* map_region_to_generation_skewed, uint8_t region_shr) { switch (GCConfig::GetGCWriteBarrier()) { default: case GCConfig::WRITE_BARRIER_DEFAULT: case GCConfig::WRITE_BARRIER_REGION_BIT: // bitwise region write barrier is the default now args->region_to_generation_table = (uint8_t*)map_region_to_generation_skewed; args->region_shr = region_shr; args->region_use_bitwise_write_barrier = true; break; case GCConfig::WRITE_BARRIER_REGION_BYTE: // bytewise region write barrier args->region_to_generation_table = (uint8_t*)map_region_to_generation_skewed; args->region_shr = region_shr; assert (args->region_use_bitwise_write_barrier == false); break; case GCConfig::WRITE_BARRIER_SERVER: // server write barrier // args should have been zero initialized assert (args->region_use_bitwise_write_barrier == false); assert (args->region_to_generation_table == nullptr); assert (args->region_shr == 0); break; } } #endif //USE_REGIONS void stomp_write_barrier_ephemeral (uint8_t* ephemeral_low, uint8_t* ephemeral_high #ifdef USE_REGIONS , gc_heap::region_info* map_region_to_generation_skewed , uint8_t region_shr #endif //USE_REGIONS ) { #ifndef USE_REGIONS initGCShadow(); #endif WriteBarrierParameters args = {}; args.operation = WriteBarrierOp::StompEphemeral; args.is_runtime_suspended = true; args.ephemeral_low = ephemeral_low; args.ephemeral_high = ephemeral_high; #ifdef USE_REGIONS region_write_barrier_settings (&args, map_region_to_generation_skewed, region_shr); #endif //USE_REGIONS GCToEEInterface::StompWriteBarrier(&args); } void stomp_write_barrier_initialize(uint8_t* ephemeral_low, uint8_t* ephemeral_high #ifdef USE_REGIONS , gc_heap::region_info* map_region_to_generation_skewed , uint8_t region_shr #endif //USE_REGIONS ) { WriteBarrierParameters args = {}; args.operation = WriteBarrierOp::Initialize; args.is_runtime_suspended = true; args.requires_upper_bounds_check = false; args.card_table = g_gc_card_table; #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES args.card_bundle_table = g_gc_card_bundle_table; #endif args.lowest_address = g_gc_lowest_address; args.highest_address = g_gc_highest_address; args.ephemeral_low = ephemeral_low; args.ephemeral_high = ephemeral_high; #ifdef USE_REGIONS region_write_barrier_settings (&args, map_region_to_generation_skewed, region_shr); #endif //USE_REGIONS GCToEEInterface::StompWriteBarrier(&args); } //extract the low bits [0,low[ of a uint32_t #define lowbits(wrd, bits) ((wrd) & ((1 << (bits))-1)) //extract the high bits [high, 32] of a uint32_t #define highbits(wrd, bits) ((wrd) & ~((1 << (bits))-1)) class mark; class generation; class heap_segment; class CObjectHeader; class dynamic_data; class l_heap; class sorted_table; class c_synchronize; #ifdef FEATURE_PREMORTEM_FINALIZATION static HRESULT AllocateCFinalize(CFinalize **pCFinalize); #endif // FEATURE_PREMORTEM_FINALIZATION uint8_t* tree_search (uint8_t* tree, uint8_t* old_address); #ifdef USE_INTROSORT #define _sort introsort::sort #elif defined(USE_VXSORT) // in this case we have do_vxsort which takes an additional range that // all items to be sorted are contained in // so do not #define _sort #else //USE_INTROSORT #define _sort qsort1 void qsort1(uint8_t** low, uint8_t** high, unsigned int depth); #endif //USE_INTROSORT void* virtual_alloc (size_t size); void* virtual_alloc (size_t size, bool use_large_pages_p, uint16_t numa_node = NUMA_NODE_UNDEFINED); /* per heap static initialization */ #if defined(BACKGROUND_GC) && !defined(MULTIPLE_HEAPS) uint32_t* gc_heap::mark_array; #endif //BACKGROUND_GC && !MULTIPLE_HEAPS uint8_t** gc_heap::g_mark_list; uint8_t** gc_heap::g_mark_list_copy; size_t gc_heap::mark_list_size; size_t gc_heap::g_mark_list_total_size; bool gc_heap::mark_list_overflow; #ifdef USE_REGIONS uint8_t*** gc_heap::g_mark_list_piece; size_t gc_heap::g_mark_list_piece_size; size_t gc_heap::g_mark_list_piece_total_size; #endif //USE_REGIONS seg_mapping* seg_mapping_table; #ifdef FEATURE_BASICFREEZE sorted_table* gc_heap::seg_table; #endif //FEATURE_BASICFREEZE #ifdef MULTIPLE_HEAPS GCEvent gc_heap::ee_suspend_event; size_t gc_heap::min_gen0_balance_delta = 0; size_t gc_heap::min_balance_threshold = 0; #endif //MULTIPLE_HEAPS VOLATILE(BOOL) gc_heap::gc_started; #ifdef MULTIPLE_HEAPS GCEvent gc_heap::gc_start_event; bool gc_heap::gc_thread_no_affinitize_p = false; uintptr_t process_mask = 0; int gc_heap::n_heaps; // current number of heaps int gc_heap::n_max_heaps; // maximum number of heaps gc_heap** gc_heap::g_heaps; #if !defined(USE_REGIONS) || defined(_DEBUG) size_t* gc_heap::g_promoted; #endif //!USE_REGIONS || _DEBUG #ifdef MH_SC_MARK int* gc_heap::g_mark_stack_busy; #endif //MH_SC_MARK #ifdef BACKGROUND_GC size_t* gc_heap::g_bpromoted; #endif //BACKGROUND_GC BOOL gc_heap::gradual_decommit_in_progress_p = FALSE; size_t gc_heap::max_decommit_step_size = 0; #else //MULTIPLE_HEAPS #if !defined(USE_REGIONS) || defined(_DEBUG) size_t gc_heap::g_promoted; #endif //!USE_REGIONS || _DEBUG #ifdef BACKGROUND_GC size_t gc_heap::g_bpromoted; #endif //BACKGROUND_GC // this is just to have fewer #ifdefs in code shared between WKS and SVR // for filling out ScanContext structs const int n_heaps = 1; #endif //MULTIPLE_HEAPS size_t gc_heap::card_table_element_layout[total_bookkeeping_elements + 1]; uint8_t* gc_heap::bookkeeping_start = nullptr; #ifdef USE_REGIONS uint8_t* gc_heap::bookkeeping_covered_committed = nullptr; size_t gc_heap::bookkeeping_sizes[total_bookkeeping_elements]; #endif //USE_REGIONS size_t gc_heap::reserved_memory = 0; size_t gc_heap::reserved_memory_limit = 0; BOOL gc_heap::g_low_memory_status; static gc_reason gc_trigger_reason = reason_empty; gc_latency_level gc_heap::latency_level = latency_level_default; gc_mechanisms gc_heap::settings; gc_history_global gc_heap::gc_data_global; uint64_t gc_heap::gc_last_ephemeral_decommit_time = 0; CLRCriticalSection gc_heap::check_commit_cs; #ifdef COMMITTED_BYTES_SHADOW CLRCriticalSection gc_heap::decommit_lock; #endif //COMMITTED_BYTES_SHADOW size_t gc_heap::current_total_committed = 0; size_t gc_heap::committed_by_oh[recorded_committed_bucket_counts]; size_t gc_heap::current_total_committed_bookkeeping = 0; BOOL gc_heap::reset_mm_p = TRUE; #ifdef FEATURE_EVENT_TRACE bool gc_heap::informational_event_enabled_p = false; uint64_t* gc_heap::gc_time_info = 0; #ifdef BACKGROUND_GC uint64_t* gc_heap::bgc_time_info = 0; #endif //BACKGROUND_GC size_t gc_heap::physical_memory_from_config = 0; size_t gc_heap::gen0_min_budget_from_config = 0; size_t gc_heap::gen0_max_budget_from_config = 0; int gc_heap::high_mem_percent_from_config = 0; bool gc_heap::use_frozen_segments_p = false; #ifdef FEATURE_LOH_COMPACTION gc_heap::etw_loh_compact_info* gc_heap::loh_compact_info; #endif //FEATURE_LOH_COMPACTION #endif //FEATURE_EVENT_TRACE bool gc_heap::hard_limit_config_p = false; #if defined(SHORT_PLUGS) && !defined(USE_REGIONS) double gc_heap::short_plugs_pad_ratio = 0; #endif //SHORT_PLUGS && !USE_REGIONS int gc_heap::generation_skip_ratio_threshold = 0; int gc_heap::conserve_mem_setting = 0; bool gc_heap::spin_count_unit_config_p = false; uint64_t gc_heap::suspended_start_time = 0; uint64_t gc_heap::end_gc_time = 0; uint64_t gc_heap::total_suspended_time = 0; uint64_t gc_heap::process_start_time = 0; last_recorded_gc_info gc_heap::last_ephemeral_gc_info; last_recorded_gc_info gc_heap::last_full_blocking_gc_info; uint64_t gc_heap::last_alloc_reset_suspended_end_time = 0; size_t gc_heap::max_peak_heap_size = 0; VOLATILE(size_t) gc_heap::llc_size = 0; #ifdef BACKGROUND_GC last_recorded_gc_info gc_heap::last_bgc_info[2]; VOLATILE(bool) gc_heap::is_last_recorded_bgc = false; VOLATILE(int) gc_heap::last_bgc_info_index = 0; #endif //BACKGROUND_GC #ifdef DYNAMIC_HEAP_COUNT size_t gc_heap::hc_change_cancelled_count_prep = 0; #ifdef BACKGROUND_GC int gc_heap::bgc_th_creation_hist_index = 0; gc_heap::bgc_thread_creation_history gc_heap::bgc_th_creation_hist[max_bgc_thread_creation_count]; size_t gc_heap::bgc_th_count_created = 0; size_t gc_heap::bgc_th_count_created_th_existed = 0; size_t gc_heap::bgc_th_count_creation_failed = 0; size_t gc_heap::bgc_init_gc_index = 0; VOLATILE(short) gc_heap::bgc_init_n_heaps = 0; size_t gc_heap::hc_change_cancelled_count_bgc = 0; #endif //BACKGROUND_GC #endif //DYNAMIC_HEAP_COUNT #if defined(HOST_64BIT) #define MAX_ALLOWED_MEM_LOAD 85 // consider putting this in dynamic data - // we may want different values for workstation // and server GC. #define MIN_YOUNGEST_GEN_DESIRED (16*1024*1024) size_t gc_heap::youngest_gen_desired_th; #endif //HOST_64BIT uint64_t gc_heap::mem_one_percent = 0; uint32_t gc_heap::high_memory_load_th = 0; uint32_t gc_heap::m_high_memory_load_th; uint32_t gc_heap::v_high_memory_load_th; uint32_t gc_heap::almost_high_memory_load_th; bool gc_heap::is_restricted_physical_mem; uint64_t gc_heap::total_physical_mem = 0; uint64_t gc_heap::entry_available_physical_mem = 0; size_t gc_heap::heap_hard_limit = 0; size_t gc_heap::heap_hard_limit_oh[total_oh_count]; #ifdef USE_REGIONS size_t gc_heap::regions_range = 0; #endif //USE_REGIONS bool affinity_config_specified_p = false; #ifdef USE_REGIONS region_allocator global_region_allocator; uint8_t*(*initial_regions)[total_generation_count][2] = nullptr; size_t gc_heap::region_count = 0; gc_heap::region_info* gc_heap::map_region_to_generation = nullptr; gc_heap::region_info* gc_heap::map_region_to_generation_skewed = nullptr; #endif //USE_REGIONS #ifdef BACKGROUND_GC GCEvent gc_heap::bgc_start_event; gc_mechanisms gc_heap::saved_bgc_settings; gc_history_global gc_heap::bgc_data_global; GCEvent gc_heap::background_gc_done_event; GCEvent gc_heap::ee_proceed_event; bool gc_heap::gc_can_use_concurrent = false; bool gc_heap::temp_disable_concurrent_p = false; uint32_t gc_heap::cm_in_progress = FALSE; BOOL gc_heap::dont_restart_ee_p = FALSE; BOOL gc_heap::keep_bgc_threads_p = FALSE; GCEvent gc_heap::bgc_threads_sync_event; BOOL gc_heap::do_ephemeral_gc_p = FALSE; BOOL gc_heap::do_concurrent_p = FALSE; size_t gc_heap::ephemeral_fgc_counts[max_generation]; VOLATILE(c_gc_state) gc_heap::current_c_gc_state = c_gc_state_free; VOLATILE(BOOL) gc_heap::gc_background_running = FALSE; #endif //BACKGROUND_GC #ifdef USE_REGIONS #ifdef MULTIPLE_HEAPS uint8_t* gc_heap::gc_low; uint8_t* gc_heap::gc_high; #endif //MULTIPLE_HEAPS VOLATILE(uint8_t*) gc_heap::ephemeral_low; VOLATILE(uint8_t*) gc_heap::ephemeral_high; #endif //USE_REGIONS #ifndef MULTIPLE_HEAPS #ifdef SPINLOCK_HISTORY int gc_heap::spinlock_info_index = 0; spinlock_info gc_heap::last_spinlock_info[max_saved_spinlock_info]; allocation_state gc_heap::current_uoh_alloc_state = (allocation_state)-1; #endif //SPINLOCK_HISTORY uint32_t gc_heap::fgn_maxgen_percent = 0; size_t gc_heap::fgn_last_alloc = 0; int gc_heap::generation_skip_ratio = 100; #ifdef FEATURE_CARD_MARKING_STEALING VOLATILE(size_t) gc_heap::n_eph_soh = 0; VOLATILE(size_t) gc_heap::n_gen_soh = 0; VOLATILE(size_t) gc_heap::n_eph_loh = 0; VOLATILE(size_t) gc_heap::n_gen_loh = 0; #endif //FEATURE_CARD_MARKING_STEALING uint64_t gc_heap::loh_alloc_since_cg = 0; BOOL gc_heap::elevation_requested = FALSE; BOOL gc_heap::last_gc_before_oom = FALSE; BOOL gc_heap::sufficient_gen0_space_p = FALSE; BOOL gc_heap::decide_promote_gen1_pins_p = TRUE; #ifdef BACKGROUND_GC uint8_t* gc_heap::background_saved_lowest_address = 0; uint8_t* gc_heap::background_saved_highest_address = 0; uint8_t* gc_heap::next_sweep_obj = 0; uint8_t* gc_heap::current_sweep_pos = 0; #ifdef DOUBLY_LINKED_FL heap_segment* gc_heap::current_sweep_seg = 0; #endif //DOUBLY_LINKED_FL exclusive_sync* gc_heap::bgc_alloc_lock; #endif //BACKGROUND_GC oom_history gc_heap::oom_info; int gc_heap::oomhist_index_per_heap = 0; oom_history gc_heap::oomhist_per_heap[max_oom_history_count]; fgm_history gc_heap::fgm_result; size_t gc_heap::allocated_since_last_gc[total_oh_count]; #ifndef USE_REGIONS BOOL gc_heap::ro_segments_in_range = FALSE; uint8_t* gc_heap::ephemeral_low; uint8_t* gc_heap::ephemeral_high; BOOL gc_heap::ephemeral_promotion; uint8_t* gc_heap::saved_ephemeral_plan_start[ephemeral_generation_count]; size_t gc_heap::saved_ephemeral_plan_start_size[ephemeral_generation_count]; #endif //!USE_REGIONS uint8_t* gc_heap::lowest_address; uint8_t* gc_heap::highest_address; short* gc_heap::brick_table; uint32_t* gc_heap::card_table; #ifdef CARD_BUNDLE uint32_t* gc_heap::card_bundle_table; #endif //CARD_BUNDLE uint8_t* gc_heap::gc_low = 0; uint8_t* gc_heap::gc_high = 0; #ifndef USE_REGIONS uint8_t* gc_heap::demotion_low; uint8_t* gc_heap::demotion_high; uint8_t* gc_heap::last_gen1_pin_end; #endif //!USE_REGIONS gen_to_condemn_tuning gc_heap::gen_to_condemn_reasons; size_t gc_heap::etw_allocation_running_amount[total_oh_count]; uint64_t gc_heap::total_alloc_bytes_soh = 0; uint64_t gc_heap::total_alloc_bytes_uoh = 0; int gc_heap::gc_policy = 0; uint64_t gc_heap::allocation_running_time; size_t gc_heap::allocation_running_amount; heap_segment* gc_heap::ephemeral_heap_segment = 0; #ifdef USE_REGIONS #ifdef STRESS_REGIONS OBJECTHANDLE* gc_heap::pinning_handles_for_alloc = 0; int gc_heap::ph_index_per_heap = 0; int gc_heap::pinning_seg_interval = 2; size_t gc_heap::num_gen0_regions = 0; int gc_heap::sip_seg_interval = 0; int gc_heap::sip_seg_maxgen_interval = 0; size_t gc_heap::num_condemned_regions = 0; #endif //STRESS_REGIONS region_free_list gc_heap::free_regions[count_free_region_kinds]; int gc_heap::num_regions_freed_in_sweep = 0; int gc_heap::regions_per_gen[max_generation + 1]; int gc_heap::planned_regions_per_gen[max_generation + 1]; int gc_heap::sip_maxgen_regions_per_gen[max_generation + 1]; heap_segment* gc_heap::reserved_free_regions_sip[max_generation]; int gc_heap::new_gen0_regions_in_plns = 0; int gc_heap::new_regions_in_prr = 0; int gc_heap::new_regions_in_threading = 0; size_t gc_heap::end_gen0_region_space = 0; size_t gc_heap::end_gen0_region_committed_space = 0; size_t gc_heap::gen0_pinned_free_space = 0; bool gc_heap::gen0_large_chunk_found = false; size_t* gc_heap::survived_per_region = nullptr; size_t* gc_heap::old_card_survived_per_region = nullptr; #endif //USE_REGIONS BOOL gc_heap::blocking_collection = FALSE; heap_segment* gc_heap::freeable_uoh_segment = 0; uint64_t gc_heap::time_bgc_last = 0; size_t gc_heap::mark_stack_tos = 0; size_t gc_heap::mark_stack_bos = 0; size_t gc_heap::mark_stack_array_length = 0; mark* gc_heap::mark_stack_array = 0; #if defined (_DEBUG) && defined (VERIFY_HEAP) BOOL gc_heap::verify_pinned_queue_p = FALSE; #endif //_DEBUG && VERIFY_HEAP uint8_t* gc_heap::oldest_pinned_plug = 0; size_t gc_heap::num_pinned_objects = 0; #ifdef FEATURE_LOH_COMPACTION size_t gc_heap::loh_pinned_queue_tos = 0; size_t gc_heap::loh_pinned_queue_bos = 0; size_t gc_heap::loh_pinned_queue_length = 0; mark* gc_heap::loh_pinned_queue = 0; BOOL gc_heap::loh_compacted_p = FALSE; #endif //FEATURE_LOH_COMPACTION #ifdef BACKGROUND_GC EEThreadId gc_heap::bgc_thread_id; uint8_t* gc_heap::background_written_addresses [array_size+2]; heap_segment* gc_heap::freeable_soh_segment = 0; size_t gc_heap::bgc_overflow_count = 0; size_t gc_heap::bgc_begin_uoh_size[uoh_generation_count] = {}; size_t gc_heap::bgc_uoh_current_size[uoh_generation_count] = {}; size_t gc_heap::end_uoh_size[uoh_generation_count] = {}; size_t gc_heap::uoh_a_no_bgc[uoh_generation_count] = {}; size_t gc_heap::uoh_a_bgc_marking[uoh_generation_count] = {}; size_t gc_heap::uoh_a_bgc_planning[uoh_generation_count] = {}; #ifdef BGC_SERVO_TUNING size_t gc_heap::bgc_maxgen_end_fl_size = 0; #endif //BGC_SERVO_TUNING size_t gc_heap::background_soh_size_end_mark = 0; size_t gc_heap::background_soh_alloc_count = 0; uint8_t** gc_heap::background_mark_stack_tos = 0; uint8_t** gc_heap::background_mark_stack_array = 0; size_t gc_heap::background_mark_stack_array_length = 0; BOOL gc_heap::processed_eph_overflow_p = FALSE; #ifdef USE_REGIONS BOOL gc_heap::background_overflow_p = FALSE; #else //USE_REGIONS uint8_t* gc_heap::background_min_overflow_address =0; uint8_t* gc_heap::background_max_overflow_address =0; uint8_t* gc_heap::background_min_soh_overflow_address =0; uint8_t* gc_heap::background_max_soh_overflow_address =0; heap_segment* gc_heap::saved_overflow_ephemeral_seg = 0; heap_segment* gc_heap::saved_sweep_ephemeral_seg = 0; uint8_t* gc_heap::saved_sweep_ephemeral_start = 0; #endif //USE_REGIONS Thread* gc_heap::bgc_thread = 0; uint8_t** gc_heap::c_mark_list = 0; size_t gc_heap::c_mark_list_length = 0; size_t gc_heap::c_mark_list_index = 0; gc_history_per_heap gc_heap::bgc_data_per_heap; BOOL gc_heap::bgc_thread_running; CLRCriticalSection gc_heap::bgc_threads_timeout_cs; #endif //BACKGROUND_GC uint8_t** gc_heap::mark_list; uint8_t** gc_heap::mark_list_index; uint8_t** gc_heap::mark_list_end; #ifdef SNOOP_STATS snoop_stats_data gc_heap::snoop_stat; #endif //SNOOP_STATS uint8_t* gc_heap::min_overflow_address = MAX_PTR; uint8_t* gc_heap::max_overflow_address = 0; uint8_t* gc_heap::shigh = 0; uint8_t* gc_heap::slow = MAX_PTR; #ifndef USE_REGIONS size_t gc_heap::ordered_free_space_indices[MAX_NUM_BUCKETS]; size_t gc_heap::saved_ordered_free_space_indices[MAX_NUM_BUCKETS]; size_t gc_heap::ordered_plug_indices[MAX_NUM_BUCKETS]; size_t gc_heap::saved_ordered_plug_indices[MAX_NUM_BUCKETS]; BOOL gc_heap::ordered_plug_indices_init = FALSE; BOOL gc_heap::use_bestfit = FALSE; uint8_t* gc_heap::bestfit_first_pin = 0; BOOL gc_heap::commit_end_of_seg = FALSE; size_t gc_heap::max_free_space_items = 0; size_t gc_heap::free_space_buckets = 0; size_t gc_heap::free_space_items = 0; int gc_heap::trimmed_free_space_index = 0; size_t gc_heap::total_ephemeral_plugs = 0; seg_free_spaces* gc_heap::bestfit_seg = 0; size_t gc_heap::total_ephemeral_size = 0; #endif //!USE_REGIONS #ifdef HEAP_ANALYZE size_t gc_heap::internal_root_array_length = initial_internal_roots; uint8_t** gc_heap::internal_root_array = 0; size_t gc_heap::internal_root_array_index = 0; BOOL gc_heap::heap_analyze_success = TRUE; uint8_t* gc_heap::current_obj = 0; size_t gc_heap::current_obj_size = 0; #endif //HEAP_ANALYZE #ifdef GC_CONFIG_DRIVEN size_t gc_heap::interesting_data_per_gc[max_idp_count]; //size_t gc_heap::interesting_data_per_heap[max_idp_count]; //size_t gc_heap::interesting_mechanisms_per_heap[max_im_count]; #endif //GC_CONFIG_DRIVEN #endif //MULTIPLE_HEAPS no_gc_region_info gc_heap::current_no_gc_region_info; FinalizerWorkItem* gc_heap::finalizer_work; BOOL gc_heap::proceed_with_gc_p = FALSE; GCSpinLock gc_heap::gc_lock; #ifdef FEATURE_JAVAMARSHAL uint8_t** gc_heap::global_bridge_list; size_t gc_heap::num_global_bridge_objs; #endif //FEATURE_JAVAMARSHAL #ifdef BACKGROUND_GC uint64_t gc_heap::total_uoh_a_last_bgc = 0; #endif //BACKGROUND_GC #ifdef USE_REGIONS region_free_list gc_heap::global_regions_to_decommit[count_free_region_kinds]; region_free_list gc_heap::global_free_huge_regions; #else //USE_REGIONS size_t gc_heap::eph_gen_starts_size = 0; heap_segment* gc_heap::segment_standby_list; #endif //USE_REGIONS bool gc_heap::use_large_pages_p = 0; bool gc_heap::large_pages_emulation_mode_p = 0; bool gc_heap::never_decommit_p = 0; #ifdef HEAP_BALANCE_INSTRUMENTATION size_t gc_heap::last_gc_end_time_us = 0; #endif //HEAP_BALANCE_INSTRUMENTATION #ifdef USE_REGIONS bool gc_heap::enable_special_regions_p = false; #else //USE_REGIONS size_t gc_heap::min_segment_size = 0; size_t gc_heap::min_uoh_segment_size = 0; #endif //!USE_REGIONS size_t gc_heap::min_segment_size_shr = 0; size_t gc_heap::soh_segment_size = 0; size_t gc_heap::segment_info_size = 0; #ifdef GC_CONFIG_DRIVEN size_t gc_heap::compact_or_sweep_gcs[2]; #endif //GC_CONFIG_DRIVEN #ifdef FEATURE_LOH_COMPACTION BOOL gc_heap::loh_compaction_always_p = FALSE; gc_loh_compaction_mode gc_heap::loh_compaction_mode = loh_compaction_default; #endif //FEATURE_LOH_COMPACTION GCEvent gc_heap::full_gc_approach_event; GCEvent gc_heap::full_gc_end_event; uint32_t gc_heap::fgn_loh_percent = 0; #ifdef BACKGROUND_GC BOOL gc_heap::fgn_last_gc_was_concurrent = FALSE; #endif //BACKGROUND_GC VOLATILE(bool) gc_heap::full_gc_approach_event_set; size_t gc_heap::full_gc_counts[gc_type_max]; bool gc_heap::maxgen_size_inc_p = false; #ifndef USE_REGIONS BOOL gc_heap::should_expand_in_full_gc = FALSE; #endif //!USE_REGIONS #ifdef DYNAMIC_HEAP_COUNT int gc_heap::dynamic_adaptation_mode = dynamic_adaptation_default; gc_heap::dynamic_heap_count_data_t SVR::gc_heap::dynamic_heap_count_data; size_t gc_heap::current_total_soh_stable_size = 0; uint64_t gc_heap::last_suspended_end_time = 0; uint64_t gc_heap::change_heap_count_time = 0; uint64_t gc_heap::total_change_heap_count = 0; uint64_t gc_heap::total_change_heap_count_time = 0; size_t gc_heap::gc_index_full_gc_end = 0; uint64_t gc_heap::before_distribute_free_regions_time = 0; bool gc_heap::trigger_initial_gen2_p = false; #ifdef BACKGROUND_GC bool gc_heap::trigger_bgc_for_rethreading_p = false; int gc_heap::total_bgc_threads = 0; int gc_heap::last_bgc_n_heaps = 0; int gc_heap::last_total_bgc_threads = 0; #endif //BACKGROUND_GC #ifdef STRESS_DYNAMIC_HEAP_COUNT int gc_heap::heaps_in_this_gc = 0; int gc_heap::bgc_to_ngc2_ratio = 0; #endif //STRESS_DYNAMIC_HEAP_COUNT #endif // DYNAMIC_HEAP_COUNT // Provisional mode related stuff. bool gc_heap::provisional_mode_triggered = false; bool gc_heap::pm_trigger_full_gc = false; size_t gc_heap::provisional_triggered_gc_count = 0; size_t gc_heap::provisional_off_gc_count = 0; size_t gc_heap::num_provisional_triggered = 0; bool gc_heap::pm_stress_on = false; #ifdef HEAP_ANALYZE BOOL gc_heap::heap_analyze_enabled = FALSE; #endif //HEAP_ANALYZE #ifndef MULTIPLE_HEAPS alloc_list gc_heap::gen2_alloc_list[NUM_GEN2_ALIST - 1]; alloc_list gc_heap::loh_alloc_list [NUM_LOH_ALIST - 1]; alloc_list gc_heap::poh_alloc_list [NUM_POH_ALIST - 1]; #ifdef DOUBLY_LINKED_FL // size we removed with no undo; only for recording purpose size_t gc_heap::gen2_removed_no_undo = 0; size_t gc_heap::saved_pinned_plug_index = INVALID_SAVED_PINNED_PLUG_INDEX; #endif //DOUBLY_LINKED_FL #ifdef FEATURE_EVENT_TRACE etw_bucket_info gc_heap::bucket_info[NUM_GEN2_ALIST]; #endif //FEATURE_EVENT_TRACE dynamic_data gc_heap::dynamic_data_table [total_generation_count]; gc_history_per_heap gc_heap::gc_data_per_heap; size_t gc_heap::total_promoted_bytes = 0; size_t gc_heap::finalization_promoted_bytes = 0; size_t gc_heap::maxgen_pinned_compact_before_advance = 0; uint8_t* gc_heap::alloc_allocated = 0; size_t gc_heap::allocation_quantum = CLR_SIZE; GCSpinLock gc_heap::more_space_lock_soh; GCSpinLock gc_heap::more_space_lock_uoh; #ifdef BACKGROUND_GC VOLATILE(int32_t) gc_heap::uoh_alloc_thread_count = 0; #endif //BACKGROUND_GC #ifdef SYNCHRONIZATION_STATS unsigned int gc_heap::good_suspension = 0; unsigned int gc_heap::bad_suspension = 0; uint64_t gc_heap::total_msl_acquire = 0; unsigned int gc_heap::num_msl_acquired = 0; unsigned int gc_heap::num_high_msl_acquire = 0; unsigned int gc_heap::num_low_msl_acquire = 0; #endif //SYNCHRONIZATION_STATS size_t gc_heap::alloc_contexts_used = 0; size_t gc_heap::soh_allocation_no_gc = 0; size_t gc_heap::loh_allocation_no_gc = 0; bool gc_heap::no_gc_oom_p = false; heap_segment* gc_heap::saved_loh_segment_no_gc = 0; #endif //MULTIPLE_HEAPS #ifndef MULTIPLE_HEAPS BOOL gc_heap::gen0_bricks_cleared = FALSE; int gc_heap::gen0_must_clear_bricks = 0; #ifdef FEATURE_PREMORTEM_FINALIZATION CFinalize* gc_heap::finalize_queue = 0; #endif // FEATURE_PREMORTEM_FINALIZATION #ifdef FEATURE_CARD_MARKING_STEALING VOLATILE(uint32_t) gc_heap::card_mark_chunk_index_soh; VOLATILE(bool) gc_heap::card_mark_done_soh; VOLATILE(uint32_t) gc_heap::card_mark_chunk_index_loh; VOLATILE(uint32_t) gc_heap::card_mark_chunk_index_poh; VOLATILE(bool) gc_heap::card_mark_done_uoh; #endif // FEATURE_CARD_MARKING_STEALING generation gc_heap::generation_table [total_generation_count]; size_t gc_heap::interesting_data_per_heap[max_idp_count]; size_t gc_heap::compact_reasons_per_heap[max_compact_reasons_count]; size_t gc_heap::expand_mechanisms_per_heap[max_expand_mechanisms_count]; size_t gc_heap::interesting_mechanism_bits_per_heap[max_gc_mechanism_bits_count]; mark_queue_t gc_heap::mark_queue; #ifdef USE_REGIONS bool gc_heap::special_sweep_p = false; #endif //USE_REGIONS int gc_heap::loh_pinned_queue_decay = LOH_PIN_DECAY; #endif // MULTIPLE_HEAPS /* end of per heap static initialization */ #ifdef USE_REGIONS const size_t uninitialized_end_gen0_region_space = (size_t)(-1); #endif //USE_REGIONS // budget smoothing size_t gc_heap::smoothed_desired_total[total_generation_count]; /* end of static initialization */ // This is for methods that need to iterate through all SOH heap segments/regions. inline int get_start_generation_index() { #ifdef USE_REGIONS return 0; #else return max_generation; #endif //USE_REGIONS } inline int get_stop_generation_index (int condemned_gen_number) { #ifdef USE_REGIONS return 0; #else return condemned_gen_number; #endif //USE_REGIONS } void gen_to_condemn_tuning::print (int heap_num) { #ifdef DT_LOG dprintf (DT_LOG_0, ("condemned reasons (%d %d)", condemn_reasons_gen, condemn_reasons_condition)); dprintf (DT_LOG_0, ("%s", record_condemn_reasons_gen_header)); gc_condemn_reason_gen r_gen; for (int i = 0; i < gcrg_max; i++) { r_gen = (gc_condemn_reason_gen)(i); str_reasons_gen[i * 2] = get_gen_char (get_gen (r_gen)); } dprintf (DT_LOG_0, ("[%2d]%s", heap_num, str_reasons_gen)); dprintf (DT_LOG_0, ("%s", record_condemn_reasons_condition_header)); gc_condemn_reason_condition r_condition; for (int i = 0; i < gcrc_max; i++) { r_condition = (gc_condemn_reason_condition)(i); str_reasons_condition[i * 2] = get_condition_char (get_condition (r_condition)); } dprintf (DT_LOG_0, ("[%2d]%s", heap_num, str_reasons_condition)); #else UNREFERENCED_PARAMETER(heap_num); #endif //DT_LOG } void gc_generation_data::print (int heap_num, int gen_num) { #if defined(SIMPLE_DPRINTF) && defined(DT_LOG) dprintf (DT_LOG_0, ("[%2d]gen%d beg %zd fl %zd fo %zd end %zd fl %zd fo %zd in %zd p %zd np %zd alloc %zd", heap_num, gen_num, size_before, free_list_space_before, free_obj_space_before, size_after, free_list_space_after, free_obj_space_after, in, pinned_surv, npinned_surv, new_allocation)); #else UNREFERENCED_PARAMETER(heap_num); UNREFERENCED_PARAMETER(gen_num); #endif //SIMPLE_DPRINTF && DT_LOG } void gc_history_per_heap::set_mechanism (gc_mechanism_per_heap mechanism_per_heap, uint32_t value) { uint32_t* mechanism = &mechanisms[mechanism_per_heap]; *mechanism = 0; *mechanism |= mechanism_mask; *mechanism |= (1 << value); #ifdef DT_LOG gc_mechanism_descr* descr = &gc_mechanisms_descr[mechanism_per_heap]; dprintf (DT_LOG_0, ("setting %s: %s", descr->name, (descr->descr)[value])); #endif //DT_LOG } void gc_history_per_heap::print() { #if defined(SIMPLE_DPRINTF) && defined(DT_LOG) for (int i = 0; i < (sizeof (gen_data)/sizeof (gc_generation_data)); i++) { gen_data[i].print (heap_index, i); } dprintf (DT_LOG_0, ("fla %zd flr %zd esa %zd ca %zd pa %zd paa %zd, rfle %d, ec %zd", maxgen_size_info.free_list_allocated, maxgen_size_info.free_list_rejected, maxgen_size_info.end_seg_allocated, maxgen_size_info.condemned_allocated, maxgen_size_info.pinned_allocated, maxgen_size_info.pinned_allocated_advance, maxgen_size_info.running_free_list_efficiency, extra_gen0_committed)); int mechanism = 0; gc_mechanism_descr* descr = 0; for (int i = 0; i < max_mechanism_per_heap; i++) { mechanism = get_mechanism ((gc_mechanism_per_heap)i); if (mechanism >= 0) { descr = &gc_mechanisms_descr[(gc_mechanism_per_heap)i]; dprintf (DT_LOG_0, ("[%2d]%s%s", heap_index, descr->name, (descr->descr)[mechanism])); } } #endif //SIMPLE_DPRINTF && DT_LOG } void gc_history_global::print() { #ifdef DT_LOG char str_settings[64]; memset (str_settings, '|', sizeof (char) * 64); str_settings[max_global_mechanisms_count*2] = 0; for (int i = 0; i < max_global_mechanisms_count; i++) { str_settings[i * 2] = (get_mechanism_p ((gc_global_mechanism_p)i) ? 'Y' : 'N'); } dprintf (DT_LOG_0, ("[hp]|c|p|o|d|b|e|")); dprintf (DT_LOG_0, ("%4d|%s", num_heaps, str_settings)); dprintf (DT_LOG_0, ("Condemned gen%d(reason: %s; mode: %s), youngest budget %zd(%d), memload %d", condemned_generation, str_gc_reasons[reason], str_gc_pause_modes[pause_mode], final_youngest_desired, gen0_reduction_count, mem_pressure)); #endif //DT_LOG } uint32_t limit_time_to_uint32 (uint64_t time) { time = min (time, (uint64_t)UINT32_MAX); return (uint32_t)time; } inline BOOL in_range_for_segment(uint8_t* add, heap_segment* seg) { return ((add >= heap_segment_mem (seg)) && (add < heap_segment_reserved (seg))); } #ifdef FEATURE_BASICFREEZE // The array we allocate is organized as follows: // 0th element is the address of the last array we allocated. // starting from the 1st element are the segment addresses, that's // what buckets() returns. struct bk { uint8_t* add; size_t val; }; class sorted_table { private: ptrdiff_t size; ptrdiff_t count; bk* slots; bk* buckets() { return (slots + 1); } uint8_t*& last_slot (bk* arr) { return arr[0].add; } bk* old_slots; public: static sorted_table* make_sorted_table (); BOOL insert (uint8_t* add, size_t val);; size_t lookup (uint8_t*& add); void remove (uint8_t* add); void clear (); void delete_sorted_table(); void delete_old_slots(); void enqueue_old_slot(bk* sl); BOOL ensure_space_for_insert(); }; sorted_table* sorted_table::make_sorted_table () { size_t size = 400; // allocate one more bk to store the older slot address. sorted_table* res = (sorted_table*)new (nothrow) char [sizeof (sorted_table) + (size + 1) * sizeof (bk)]; if (!res) return 0; res->size = size; res->slots = (bk*)(res + 1); res->old_slots = 0; res->clear(); return res; } void sorted_table::delete_sorted_table() { if (slots != (bk*)(this+1)) { delete[] slots; } delete_old_slots(); } void sorted_table::delete_old_slots() { uint8_t* sl = (uint8_t*)old_slots; while (sl) { uint8_t* dsl = sl; sl = last_slot ((bk*)sl); delete[] dsl; } old_slots = 0; } void sorted_table::enqueue_old_slot(bk* sl) { last_slot (sl) = (uint8_t*)old_slots; old_slots = sl; } inline size_t sorted_table::lookup (uint8_t*& add) { ptrdiff_t high = (count-1); ptrdiff_t low = 0; ptrdiff_t ti; ptrdiff_t mid; bk* buck = buckets(); while (low <= high) { mid = ((low + high)/2); ti = mid; if (buck[ti].add > add) { if ((ti > 0) && (buck[ti-1].add <= add)) { add = buck[ti-1].add; return buck[ti - 1].val; } high = mid - 1; } else { if (buck[ti+1].add > add) { add = buck[ti].add; return buck[ti].val; } low = mid + 1; } } add = 0; return 0; } BOOL sorted_table::ensure_space_for_insert() { if (count == size) { size = (size * 3)/2; assert((size * sizeof (bk)) > 0); bk* res = (bk*)new (nothrow) char [(size + 1) * sizeof (bk)]; assert (res); if (!res) return FALSE; last_slot (res) = 0; memcpy (((bk*)res + 1), buckets(), count * sizeof (bk)); bk* last_old_slots = slots; slots = res; if (last_old_slots != (bk*)(this + 1)) enqueue_old_slot (last_old_slots); } return TRUE; } BOOL sorted_table::insert (uint8_t* add, size_t val) { //grow if no more room assert (count < size); //insert sorted ptrdiff_t high = (count-1); ptrdiff_t low = 0; ptrdiff_t ti; ptrdiff_t mid; bk* buck = buckets(); while (low <= high) { mid = ((low + high)/2); ti = mid; if (buck[ti].add > add) { if ((ti == 0) || (buck[ti-1].add <= add)) { // found insertion point for (ptrdiff_t k = count; k > ti;k--) { buck [k] = buck [k-1]; } buck[ti].add = add; buck[ti].val = val; count++; return TRUE; } high = mid - 1; } else { if (buck[ti+1].add > add) { //found the insertion point for (ptrdiff_t k = count; k > ti+1;k--) { buck [k] = buck [k-1]; } buck[ti+1].add = add; buck[ti+1].val = val; count++; return TRUE; } low = mid + 1; } } assert (0); return TRUE; } void sorted_table::remove (uint8_t* add) { ptrdiff_t high = (count-1); ptrdiff_t low = 0; ptrdiff_t ti; ptrdiff_t mid; bk* buck = buckets(); while (low <= high) { mid = ((low + high)/2); ti = mid; if (buck[ti].add > add) { if (buck[ti-1].add <= add) { for (ptrdiff_t k = ti; k < count; k++) buck[k-1] = buck[k]; count--; return; } high = mid - 1; } else { if (buck[ti+1].add > add) { for (ptrdiff_t k = ti+1; k < count; k++) buck[k-1] = buck[k]; count--; return; } low = mid + 1; } } assert (0); } void sorted_table::clear() { count = 1; buckets()[0].add = MAX_PTR; } #endif //FEATURE_BASICFREEZE #ifdef USE_REGIONS inline size_t get_skewed_basic_region_index_for_address (uint8_t* address) { assert ((g_gc_lowest_address <= address) && (address <= g_gc_highest_address)); size_t skewed_basic_region_index = (size_t)address >> gc_heap::min_segment_size_shr; return skewed_basic_region_index; } inline size_t get_basic_region_index_for_address (uint8_t* address) { size_t skewed_basic_region_index = get_skewed_basic_region_index_for_address (address); return (skewed_basic_region_index - get_skewed_basic_region_index_for_address (g_gc_lowest_address)); } // Go from a random address to its region info. The random address could be // in one of the basic regions of a larger region so we need to check for that. inline heap_segment* get_region_info_for_address (uint8_t* address) { size_t basic_region_index = (size_t)address >> gc_heap::min_segment_size_shr; heap_segment* basic_region_info_entry = (heap_segment*)&seg_mapping_table[basic_region_index]; ptrdiff_t first_field = (ptrdiff_t)heap_segment_allocated (basic_region_info_entry); if (first_field < 0) { basic_region_index += first_field; } return ((heap_segment*)(&seg_mapping_table[basic_region_index])); } // Go from the physical start of a region to its region info. inline heap_segment* get_region_info (uint8_t* region_start) { size_t region_index = (size_t)region_start >> gc_heap::min_segment_size_shr; heap_segment* region_info_entry = (heap_segment*)&seg_mapping_table[region_index]; dprintf (REGIONS_LOG, ("region info for region %p is at %zd, %zx (alloc: %p)", region_start, region_index, (size_t)region_info_entry, heap_segment_allocated (region_info_entry))); return (heap_segment*)&seg_mapping_table[region_index]; } // Go from the actual region info to its region start. inline uint8_t* get_region_start (heap_segment* region_info) { uint8_t* obj_start = heap_segment_mem (region_info); return (obj_start - sizeof (aligned_plug_and_gap)); } inline size_t get_region_size (heap_segment* region_info) { return (size_t)(heap_segment_reserved (region_info) - get_region_start (region_info)); } inline size_t get_region_committed_size (heap_segment* region) { uint8_t* start = get_region_start (region); uint8_t* committed = heap_segment_committed (region); return committed - start; } inline bool is_free_region (heap_segment* region) { return (heap_segment_allocated (region) == nullptr); } #endif //USE_REGIONS inline uint8_t* align_on_segment (uint8_t* add) { return (uint8_t*)((size_t)(add + (((size_t)1 << gc_heap::min_segment_size_shr) - 1)) & ~(((size_t)1 << gc_heap::min_segment_size_shr) - 1)); } inline uint8_t* align_lower_segment (uint8_t* add) { return (uint8_t*)((size_t)(add) & ~(((size_t)1 << gc_heap::min_segment_size_shr) - 1)); } #ifdef FEATURE_BASICFREEZE inline size_t ro_seg_begin_index (heap_segment* seg) { #ifdef USE_REGIONS size_t begin_index = (size_t)heap_segment_mem (seg) >> gc_heap::min_segment_size_shr; #else size_t begin_index = (size_t)seg >> gc_heap::min_segment_size_shr; #endif //USE_REGIONS begin_index = max (begin_index, (size_t)g_gc_lowest_address >> gc_heap::min_segment_size_shr); return begin_index; } inline size_t ro_seg_end_index (heap_segment* seg) { size_t end_index = (size_t)(heap_segment_reserved (seg) - 1) >> gc_heap::min_segment_size_shr; end_index = min (end_index, (size_t)g_gc_highest_address >> gc_heap::min_segment_size_shr); return end_index; } heap_segment* ro_segment_lookup (uint8_t* o) { uint8_t* ro_seg_start = o; heap_segment* seg = (heap_segment*)gc_heap::seg_table->lookup (ro_seg_start); if (ro_seg_start && in_range_for_segment (o, seg)) return seg; else return 0; } #endif //FEATURE_BASICFREEZE #ifdef MULTIPLE_HEAPS inline gc_heap* seg_mapping_table_heap_of_worker (uint8_t* o) { size_t index = (size_t)o >> gc_heap::min_segment_size_shr; seg_mapping* entry = &seg_mapping_table[index]; #ifdef USE_REGIONS gc_heap* hp = heap_segment_heap ((heap_segment*)entry); #else gc_heap* hp = ((o > entry->boundary) ? entry->h1 : entry->h0); dprintf (2, ("checking obj %p, index is %zd, entry: boundary: %p, h0: %p, seg0: %p, h1: %p, seg1: %p", o, index, (entry->boundary + 1), (uint8_t*)(entry->h0), (uint8_t*)(entry->seg0), (uint8_t*)(entry->h1), (uint8_t*)(entry->seg1))); #ifdef _DEBUG heap_segment* seg = ((o > entry->boundary) ? entry->seg1 : entry->seg0); #ifdef FEATURE_BASICFREEZE if ((size_t)seg & ro_in_entry) seg = (heap_segment*)((size_t)seg & ~ro_in_entry); #endif //FEATURE_BASICFREEZE #ifdef TRACE_GC if (seg) { if (in_range_for_segment (o, seg)) { dprintf (2, ("obj %p belongs to segment %p(-%p)", o, seg, (uint8_t*)heap_segment_allocated (seg))); } else { dprintf (2, ("found seg %p(-%p) for obj %p, but it's not on the seg", seg, (uint8_t*)heap_segment_allocated (seg), o)); } } else { dprintf (2, ("could not find obj %p in any existing segments", o)); } #endif //TRACE_GC #endif //_DEBUG #endif //USE_REGIONS return hp; } #endif //MULTIPLE_HEAPS // Only returns a valid seg if we can actually find o on the seg. heap_segment* seg_mapping_table_segment_of (uint8_t* o) { #ifdef FEATURE_BASICFREEZE if ((o < g_gc_lowest_address) || (o >= g_gc_highest_address)) return ro_segment_lookup (o); #endif //FEATURE_BASICFREEZE size_t index = (size_t)o >> gc_heap::min_segment_size_shr; seg_mapping* entry = &seg_mapping_table[index]; #ifdef USE_REGIONS // REGIONS TODO: I think we could simplify this to having the same info for each // basic entry in a large region so we can get it right away instead of having to go // back some entries. ptrdiff_t first_field = (ptrdiff_t)heap_segment_allocated ((heap_segment*)entry); if (first_field == 0) { dprintf (REGIONS_LOG, ("asked for seg for %p, in a freed region mem: %p, committed %p", o, heap_segment_mem ((heap_segment*)entry), heap_segment_committed ((heap_segment*)entry))); return 0; } // Regions are never going to intersect an ro seg, so this can never be ro_in_entry. assert (first_field != 0); assert (first_field != ro_in_entry); if (first_field < 0) { index += first_field; } heap_segment* seg = (heap_segment*)&seg_mapping_table[index]; #else //USE_REGIONS dprintf (2, ("checking obj %p, index is %zd, entry: boundary: %p, seg0: %p, seg1: %p", o, index, (entry->boundary + 1), (uint8_t*)(entry->seg0), (uint8_t*)(entry->seg1))); heap_segment* seg = ((o > entry->boundary) ? entry->seg1 : entry->seg0); #ifdef FEATURE_BASICFREEZE if ((size_t)seg & ro_in_entry) seg = (heap_segment*)((size_t)seg & ~ro_in_entry); #endif //FEATURE_BASICFREEZE #endif //USE_REGIONS if (seg) { if (in_range_for_segment (o, seg)) { dprintf (2, ("obj %p belongs to segment %p(-%p)", o, (uint8_t*)heap_segment_mem(seg), (uint8_t*)heap_segment_reserved(seg))); } else { dprintf (2, ("found seg %p(-%p) for obj %p, but it's not on the seg, setting it to 0", (uint8_t*)heap_segment_mem(seg), (uint8_t*)heap_segment_reserved(seg), o)); seg = 0; } } else { dprintf (2, ("could not find obj %p in any existing segments", o)); } #ifdef FEATURE_BASICFREEZE // TODO: This was originally written assuming that the seg_mapping_table would always contain entries for ro // segments whenever the ro segment falls into the [g_gc_lowest_address,g_gc_highest_address) range. I.e., it had an // extra "&& (size_t)(entry->seg1) & ro_in_entry" expression. However, at the moment, grow_brick_card_table does // not correctly go through the ro segments and add them back to the seg_mapping_table when the [lowest,highest) // range changes. We should probably go ahead and modify grow_brick_card_table and put back the // "&& (size_t)(entry->seg1) & ro_in_entry" here. if (!seg) { seg = ro_segment_lookup (o); if (seg && !in_range_for_segment (o, seg)) seg = 0; } #endif //FEATURE_BASICFREEZE return seg; } size_t gcard_of ( uint8_t*); #define GC_MARKED (size_t)0x1 #ifdef DOUBLY_LINKED_FL // This bit indicates that we'll need to set the bgc mark bit for this object during an FGC. // We only do this when we decide to compact. #define BGC_MARKED_BY_FGC (size_t)0x2 #define MAKE_FREE_OBJ_IN_COMPACT (size_t)0x4 #define ALLOWED_SPECIAL_HEADER_BITS (GC_MARKED|BGC_MARKED_BY_FGC|MAKE_FREE_OBJ_IN_COMPACT) #else //DOUBLY_LINKED_FL #define ALLOWED_SPECIAL_HEADER_BITS (GC_MARKED) #endif //!DOUBLY_LINKED_FL #ifdef HOST_64BIT #define SPECIAL_HEADER_BITS (0x7) #else #define SPECIAL_HEADER_BITS (0x3) #endif #define slot(i, j) ((uint8_t**)(i))[(j)+1] #define free_object_base_size (plug_skew + sizeof(ArrayBase)) #define free_list_slot(x) ((uint8_t**)(x))[2] #define free_list_undo(x) ((uint8_t**)(x))[-1] #define UNDO_EMPTY ((uint8_t*)1) #ifdef DOUBLY_LINKED_FL #define free_list_prev(x) ((uint8_t**)(x))[3] #define PREV_EMPTY ((uint8_t*)1) void check_and_clear_in_free_list (uint8_t* o, size_t size) { if (size >= min_free_list) { free_list_prev (o) = PREV_EMPTY; } } #endif //DOUBLY_LINKED_FL class CObjectHeader : public Object { public: #if defined(FEATURE_NATIVEAOT) || defined(BUILD_AS_STANDALONE) // The GC expects the following methods that are provided by the Object class in the CLR but not provided // by NativeAOT's version of Object. uint32_t GetNumComponents() { return ((ArrayBase *)this)->GetNumComponents(); } void Validate(BOOL bDeep=TRUE, BOOL bVerifyNextHeader = FALSE, BOOL bVerifySyncBlock = FALSE) { // declaration of extra parameters just so the call site would need no #ifdefs UNREFERENCED_PARAMETER(bVerifyNextHeader); UNREFERENCED_PARAMETER(bVerifySyncBlock); MethodTable * pMT = GetMethodTable(); _ASSERTE(pMT->SanityCheck()); bool noRangeChecks = (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_NO_RANGE_CHECKS) == GCConfig::HEAPVERIFY_NO_RANGE_CHECKS; BOOL fSmallObjectHeapPtr = FALSE, fLargeObjectHeapPtr = FALSE; if (!noRangeChecks) { fSmallObjectHeapPtr = g_theGCHeap->IsHeapPointer(this, TRUE); if (!fSmallObjectHeapPtr) fLargeObjectHeapPtr = g_theGCHeap->IsHeapPointer(this); _ASSERTE(fSmallObjectHeapPtr || fLargeObjectHeapPtr); } #ifdef FEATURE_STRUCTALIGN _ASSERTE(IsStructAligned((uint8_t *)this, GetMethodTable()->GetBaseAlignment())); #endif // FEATURE_STRUCTALIGN #if defined(FEATURE_64BIT_ALIGNMENT) && !defined(FEATURE_NATIVEAOT) if (pMT->RequiresAlign8()) { _ASSERTE((((size_t)this) & 0x7) == (pMT->IsValueType() ? 4U : 0U)); } #endif // FEATURE_64BIT_ALIGNMENT #ifdef VERIFY_HEAP if (bDeep && (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_GC)) g_theGCHeap->ValidateObjectMember(this); #endif if (fSmallObjectHeapPtr) { #ifdef FEATURE_BASICFREEZE _ASSERTE(!g_theGCHeap->IsLargeObject(this) || g_theGCHeap->IsInFrozenSegment(this)); #else _ASSERTE(!g_theGCHeap->IsLargeObject(this)); #endif } } void ValidateHeap(BOOL bDeep) { Validate(bDeep); } #endif //FEATURE_NATIVEAOT || BUILD_AS_STANDALONE ///// // // Header Status Information // MethodTable *GetMethodTable() const { return( (MethodTable *) (((size_t) RawGetMethodTable()) & (~SPECIAL_HEADER_BITS))); } void SetMarked() { _ASSERTE(RawGetMethodTable()); RawSetMethodTable((MethodTable *) (((size_t) RawGetMethodTable()) | GC_MARKED)); } BOOL IsMarked() const { return !!(((size_t)RawGetMethodTable()) & GC_MARKED); } void SetPinned() { assert (!(gc_heap::settings.concurrent)); GetHeader()->SetGCBit(); } BOOL IsPinned() const { return !!((((CObjectHeader*)this)->GetHeader()->GetBits()) & BIT_SBLK_GC_RESERVE); } // Now we set more bits should actually only clear the mark bit void ClearMarked() { #ifdef DOUBLY_LINKED_FL RawSetMethodTable ((MethodTable *)(((size_t) RawGetMethodTable()) & (~GC_MARKED))); #else RawSetMethodTable (GetMethodTable()); #endif //DOUBLY_LINKED_FL } #ifdef DOUBLY_LINKED_FL void SetBGCMarkBit() { RawSetMethodTable((MethodTable *) (((size_t) RawGetMethodTable()) | BGC_MARKED_BY_FGC)); } BOOL IsBGCMarkBitSet() const { return !!(((size_t)RawGetMethodTable()) & BGC_MARKED_BY_FGC); } void ClearBGCMarkBit() { RawSetMethodTable((MethodTable *)(((size_t) RawGetMethodTable()) & (~BGC_MARKED_BY_FGC))); } void SetFreeObjInCompactBit() { RawSetMethodTable((MethodTable *) (((size_t) RawGetMethodTable()) | MAKE_FREE_OBJ_IN_COMPACT)); } BOOL IsFreeObjInCompactBitSet() const { return !!(((size_t)RawGetMethodTable()) & MAKE_FREE_OBJ_IN_COMPACT); } void ClearFreeObjInCompactBit() { #ifdef _DEBUG // check this looks like an object, but do NOT validate pointers to other objects // as these may not be valid yet - we are calling this during compact_phase Validate(FALSE); #endif //_DEBUG RawSetMethodTable((MethodTable *)(((size_t) RawGetMethodTable()) & (~MAKE_FREE_OBJ_IN_COMPACT))); } #endif //DOUBLY_LINKED_FL size_t ClearSpecialBits() { size_t special_bits = ((size_t)RawGetMethodTable()) & SPECIAL_HEADER_BITS; if (special_bits != 0) { assert ((special_bits & (~ALLOWED_SPECIAL_HEADER_BITS)) == 0); RawSetMethodTable ((MethodTable*)(((size_t)RawGetMethodTable()) & ~(SPECIAL_HEADER_BITS))); } return special_bits; } void SetSpecialBits (size_t special_bits) { assert ((special_bits & (~ALLOWED_SPECIAL_HEADER_BITS)) == 0); if (special_bits != 0) { RawSetMethodTable ((MethodTable*)(((size_t)RawGetMethodTable()) | special_bits)); } } CGCDesc *GetSlotMap () { assert (GetMethodTable()->ContainsGCPointers()); return CGCDesc::GetCGCDescFromMT(GetMethodTable()); } void SetFree(size_t size) { assert (size >= free_object_base_size); assert (g_gc_pFreeObjectMethodTable->GetBaseSize() == free_object_base_size); assert (g_gc_pFreeObjectMethodTable->RawGetComponentSize() == 1); RawSetMethodTable( g_gc_pFreeObjectMethodTable ); size_t* numComponentsPtr = (size_t*) &((uint8_t*) this)[ArrayBase::GetOffsetOfNumComponents()]; *numComponentsPtr = size - free_object_base_size; #ifdef VERIFY_HEAP //This introduces a bug in the free list management. //((void**) this)[-1] = 0; // clear the sync block, assert (*numComponentsPtr >= 0); if (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_GC) { memset (((uint8_t*)this)+sizeof(ArrayBase), 0xcc, *numComponentsPtr); #ifdef DOUBLY_LINKED_FL // However, in this case we can't leave the Next field uncleared because no one will clear it // so it remains 0xcc and that's not good for verification if (*numComponentsPtr > 0) { free_list_slot (this) = 0; } #endif //DOUBLY_LINKED_FL } #endif //VERIFY_HEAP #ifdef DOUBLY_LINKED_FL // For background GC, we need to distinguish between a free object that's not on the free list // and one that is. So we always set its prev to PREV_EMPTY to indicate that it's a free // object that's not on the free list. If it should be on the free list, it will be set to the // appropriate non zero value. check_and_clear_in_free_list ((uint8_t*)this, size); #endif //DOUBLY_LINKED_FL } void UnsetFree() { size_t size = free_object_base_size - plug_skew; // since we only need to clear 2 ptr size, we do it manually PTR_PTR m = (PTR_PTR) this; for (size_t i = 0; i < size / sizeof(PTR_PTR); i++) *(m++) = 0; } BOOL IsFree () const { return (GetMethodTable() == g_gc_pFreeObjectMethodTable); } #ifdef FEATURE_STRUCTALIGN int GetRequiredAlignment () const { return GetMethodTable()->GetRequiredAlignment(); } #endif // FEATURE_STRUCTALIGN BOOL ContainsGCPointers() const { return GetMethodTable()->ContainsGCPointers(); } #ifdef COLLECTIBLE_CLASS BOOL Collectible() const { return GetMethodTable()->Collectible(); } FORCEINLINE BOOL ContainsGCPointersOrCollectible() const { MethodTable *pMethodTable = GetMethodTable(); return (pMethodTable->ContainsGCPointers() || pMethodTable->Collectible()); } #endif //COLLECTIBLE_CLASS Object* GetObjectBase() const { return (Object*) this; } }; #define header(i) ((CObjectHeader*)(i)) #define method_table(o) ((CObjectHeader*)(o))->GetMethodTable() #ifdef DOUBLY_LINKED_FL inline BOOL is_on_free_list (uint8_t* o, size_t size) { if (size >= min_free_list) { if (header(o)->GetMethodTable() == g_gc_pFreeObjectMethodTable) { return (free_list_prev (o) != PREV_EMPTY); } } return FALSE; } inline void set_plug_bgc_mark_bit (uint8_t* node) { header(node)->SetBGCMarkBit(); } inline BOOL is_plug_bgc_mark_bit_set (uint8_t* node) { return header(node)->IsBGCMarkBitSet(); } inline void clear_plug_bgc_mark_bit (uint8_t* node) { header(node)->ClearBGCMarkBit(); } inline void set_free_obj_in_compact_bit (uint8_t* node) { header(node)->SetFreeObjInCompactBit(); } inline BOOL is_free_obj_in_compact_bit_set (uint8_t* node) { return header(node)->IsFreeObjInCompactBitSet(); } inline void clear_free_obj_in_compact_bit (uint8_t* node) { header(node)->ClearFreeObjInCompactBit(); } #endif //DOUBLY_LINKED_FL #ifdef SHORT_PLUGS inline void set_plug_padded (uint8_t* node) { header(node)->SetMarked(); } inline void clear_plug_padded (uint8_t* node) { header(node)->ClearMarked(); } inline BOOL is_plug_padded (uint8_t* node) { return header(node)->IsMarked(); } #else //SHORT_PLUGS inline void set_plug_padded (uint8_t* node){} inline void clear_plug_padded (uint8_t* node){} inline BOOL is_plug_padded (uint8_t* node){return FALSE;} #endif //SHORT_PLUGS inline size_t unused_array_size(uint8_t * p) { assert(((CObjectHeader*)p)->IsFree()); size_t* numComponentsPtr = (size_t*)(p + ArrayBase::GetOffsetOfNumComponents()); return free_object_base_size + *numComponentsPtr; } inline heap_segment* heap_segment_non_sip (heap_segment* ns) { #ifdef USE_REGIONS if ((ns == 0) || !heap_segment_swept_in_plan (ns)) { return ns; } else { do { if (heap_segment_swept_in_plan (ns)) { dprintf (REGIONS_LOG, ("region %p->%p SIP", heap_segment_mem (ns), heap_segment_allocated (ns))); } ns = heap_segment_next (ns); } while ((ns != 0) && heap_segment_swept_in_plan (ns)); return ns; } #else //USE_REGIONS return ns; #endif //USE_REGIONS } inline heap_segment* heap_segment_next_non_sip (heap_segment* seg) { heap_segment* ns = heap_segment_next (seg); #ifdef USE_REGIONS return heap_segment_non_sip (ns); #else return ns; #endif //USE_REGIONS } heap_segment* heap_segment_rw (heap_segment* ns) { if ((ns == 0) || !heap_segment_read_only_p (ns)) { return ns; } else { do { ns = heap_segment_next (ns); } while ((ns != 0) && heap_segment_read_only_p (ns)); return ns; } } //returns the next non ro segment. heap_segment* heap_segment_next_rw (heap_segment* seg) { heap_segment* ns = heap_segment_next (seg); return heap_segment_rw (ns); } // returns the segment before seg. heap_segment* heap_segment_prev_rw (heap_segment* begin, heap_segment* seg) { assert (begin != 0); heap_segment* prev = begin; heap_segment* current = heap_segment_next_rw (begin); while (current && current != seg) { prev = current; current = heap_segment_next_rw (current); } if (current == seg) { return prev; } else { return 0; } } // returns the segment before seg. heap_segment* heap_segment_in_range (heap_segment* ns) { if ((ns == 0) || heap_segment_in_range_p (ns)) { return ns; } else { do { ns = heap_segment_next (ns); } while ((ns != 0) && !heap_segment_in_range_p (ns)); return ns; } } heap_segment* heap_segment_next_in_range (heap_segment* seg) { heap_segment* ns = heap_segment_next (seg); return heap_segment_in_range (ns); } struct imemory_data { uint8_t* memory_base; }; struct numa_reserved_block { uint8_t* memory_base; size_t block_size; numa_reserved_block() : memory_base(nullptr), block_size(0) { } }; struct initial_memory_details { imemory_data *initial_memory; imemory_data *initial_normal_heap; // points into initial_memory_array imemory_data *initial_large_heap; // points into initial_memory_array imemory_data *initial_pinned_heap; // points into initial_memory_array size_t block_size_normal; size_t block_size_large; size_t block_size_pinned; int block_count; // # of blocks in each int current_block_normal; int current_block_large; int current_block_pinned; enum { ALLATONCE = 1, EACH_GENERATION, EACH_BLOCK, ALLATONCE_SEPARATED_POH, EACH_NUMA_NODE }; size_t allocation_pattern; size_t block_size(int i) { switch (i / block_count) { case 0: return block_size_normal; case 1: return block_size_large; case 2: return block_size_pinned; default: UNREACHABLE(); } }; void* get_initial_memory (int gen, int h_number) { switch (gen) { case soh_gen0: case soh_gen1: case soh_gen2: return initial_normal_heap[h_number].memory_base; case loh_generation: return initial_large_heap[h_number].memory_base; case poh_generation: return initial_pinned_heap[h_number].memory_base; default: UNREACHABLE(); } }; size_t get_initial_size (int gen) { switch (gen) { case soh_gen0: case soh_gen1: case soh_gen2: return block_size_normal; case loh_generation: return block_size_large; case poh_generation: return block_size_pinned; default: UNREACHABLE(); } }; int numa_reserved_block_count; numa_reserved_block* numa_reserved_block_table; }; initial_memory_details memory_details; heap_segment* make_initial_segment (int gen, int h_number, gc_heap* hp) { void* mem = memory_details.get_initial_memory (gen, h_number); size_t size = memory_details.get_initial_size (gen); heap_segment* res = gc_heap::make_heap_segment ((uint8_t*)mem, size, hp, gen); return res; } void* virtual_alloc (size_t size) { return virtual_alloc(size, false); } void* virtual_alloc (size_t size, bool use_large_pages_p, uint16_t numa_node) { size_t requested_size = size; if ((gc_heap::reserved_memory_limit - gc_heap::reserved_memory) < requested_size) { gc_heap::reserved_memory_limit = gc_heap::reserved_memory_limit + requested_size; if ((gc_heap::reserved_memory_limit - gc_heap::reserved_memory) < requested_size) { return 0; } } uint32_t flags = VirtualReserveFlags::None; #ifndef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP if (virtual_alloc_hardware_write_watch) { flags = VirtualReserveFlags::WriteWatch; } #endif // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP void* prgmem = use_large_pages_p ? GCToOSInterface::VirtualReserveAndCommitLargePages(requested_size, numa_node) : GCToOSInterface::VirtualReserve(requested_size, card_size * card_word_width, flags, numa_node); void *aligned_mem = prgmem; // We don't want (prgmem + size) to be right at the end of the address space // because we'd have to worry about that everytime we do (address + size). // We also want to make sure that we leave loh_size_threshold at the end // so we allocate a small object we don't need to worry about overflow there // when we do alloc_ptr+size. if (prgmem) { uint8_t* end_mem = (uint8_t*)prgmem + requested_size; if ((end_mem == 0) || ((size_t)(MAX_PTR - end_mem) <= END_SPACE_AFTER_GC)) { GCToOSInterface::VirtualRelease (prgmem, requested_size); dprintf (2, ("Virtual Alloc size %zd returned memory right against 4GB [%zx, %zx[ - discarding", requested_size, (size_t)prgmem, (size_t)((uint8_t*)prgmem+requested_size))); prgmem = 0; aligned_mem = 0; } } if (prgmem) { gc_heap::reserved_memory += requested_size; } dprintf (2, ("Virtual Alloc size %zd: [%zx, %zx[", requested_size, (size_t)prgmem, (size_t)((uint8_t*)prgmem+requested_size))); return aligned_mem; } static size_t get_valid_segment_size (BOOL large_seg=FALSE) { size_t seg_size, initial_seg_size; if (!large_seg) { initial_seg_size = INITIAL_ALLOC; seg_size = static_cast(GCConfig::GetSegmentSize()); } else { initial_seg_size = LHEAP_ALLOC; seg_size = static_cast(GCConfig::GetSegmentSize()) / 2; } #ifdef MULTIPLE_HEAPS #ifdef HOST_64BIT if (!large_seg) #endif // HOST_64BIT { if (g_num_processors > 4) initial_seg_size /= 2; if (g_num_processors > 8) initial_seg_size /= 2; } #endif //MULTIPLE_HEAPS // if seg_size is small but not 0 (0 is default if config not set) // then set the segment to the minimum size if (!g_theGCHeap->IsValidSegmentSize(seg_size)) { // if requested size is between 1 byte and 4MB, use min if ((seg_size >> 1) && !(seg_size >> 22)) seg_size = 1024*1024*4; else seg_size = initial_seg_size; } #ifdef HOST_64BIT seg_size = round_up_power2 (seg_size); #else seg_size = round_down_power2 (seg_size); #endif // HOST_64BIT return (seg_size); } #ifndef USE_REGIONS void gc_heap::compute_new_ephemeral_size() { int eph_gen_max = max_generation - 1 - (settings.promotion ? 1 : 0); size_t padding_size = 0; for (int i = 0; i <= eph_gen_max; i++) { dynamic_data* dd = dynamic_data_of (i); total_ephemeral_size += (dd_survived_size (dd) - dd_pinned_survived_size (dd)); #ifdef RESPECT_LARGE_ALIGNMENT total_ephemeral_size += dd_num_npinned_plugs (dd) * switch_alignment_size (FALSE); #endif //RESPECT_LARGE_ALIGNMENT #ifdef FEATURE_STRUCTALIGN total_ephemeral_size += dd_num_npinned_plugs (dd) * MAX_STRUCTALIGN; #endif //FEATURE_STRUCTALIGN #ifdef SHORT_PLUGS padding_size += dd_padding_size (dd); #endif //SHORT_PLUGS } total_ephemeral_size += eph_gen_starts_size; #ifdef RESPECT_LARGE_ALIGNMENT size_t planned_ephemeral_size = heap_segment_plan_allocated (ephemeral_heap_segment) - generation_plan_allocation_start (generation_of (max_generation-1)); total_ephemeral_size = min (total_ephemeral_size, planned_ephemeral_size); #endif //RESPECT_LARGE_ALIGNMENT #ifdef SHORT_PLUGS total_ephemeral_size = Align ((size_t)((double)total_ephemeral_size * short_plugs_pad_ratio) + 1); total_ephemeral_size += Align (DESIRED_PLUG_LENGTH); #endif //SHORT_PLUGS dprintf (3, ("total ephemeral size is %zx, padding %zx(%zx)", total_ephemeral_size, padding_size, (total_ephemeral_size - padding_size))); } heap_segment* gc_heap::soh_get_segment_to_expand() { size_t size = soh_segment_size; ordered_plug_indices_init = FALSE; use_bestfit = FALSE; //compute the size of the new ephemeral heap segment. compute_new_ephemeral_size(); if ((settings.pause_mode != pause_low_latency) && (settings.pause_mode != pause_no_gc) #ifdef BACKGROUND_GC && (!gc_heap::background_running_p()) #endif //BACKGROUND_GC ) { assert (settings.condemned_generation <= max_generation); allocator* gen_alloc = ((settings.condemned_generation == max_generation) ? nullptr : generation_allocator (generation_of (max_generation))); dprintf (2, ("(gen%d)soh_get_segment_to_expand", settings.condemned_generation)); // try to find one in the gen 2 segment list, search backwards because the first segments // tend to be more compact than the later ones. heap_segment* fseg = heap_segment_rw (generation_start_segment (generation_of (max_generation))); _ASSERTE(fseg != NULL); #ifdef SEG_REUSE_STATS int try_reuse = 0; #endif //SEG_REUSE_STATS heap_segment* seg = ephemeral_heap_segment; while ((seg = heap_segment_prev_rw (fseg, seg)) && (seg != fseg)) { #ifdef SEG_REUSE_STATS try_reuse++; #endif //SEG_REUSE_STATS if (can_expand_into_p (seg, size/3, total_ephemeral_size, gen_alloc)) { get_gc_data_per_heap()->set_mechanism (gc_heap_expand, (use_bestfit ? expand_reuse_bestfit : expand_reuse_normal)); if (settings.condemned_generation == max_generation) { if (use_bestfit) { build_ordered_free_spaces (seg); dprintf (GTC_LOG, ("can use best fit")); } #ifdef SEG_REUSE_STATS dprintf (SEG_REUSE_LOG_0, ("(gen%d)soh_get_segment_to_expand: found seg #%d to reuse", settings.condemned_generation, try_reuse)); #endif //SEG_REUSE_STATS dprintf (GTC_LOG, ("max_gen: Found existing segment to expand into %zx", (size_t)seg)); return seg; } else { #ifdef SEG_REUSE_STATS dprintf (SEG_REUSE_LOG_0, ("(gen%d)soh_get_segment_to_expand: found seg #%d to reuse - returning", settings.condemned_generation, try_reuse)); #endif //SEG_REUSE_STATS dprintf (GTC_LOG, ("max_gen-1: Found existing segment to expand into %zx", (size_t)seg)); // If we return 0 here, the allocator will think since we are short on end // of seg we need to trigger a full compacting GC. So if sustained low latency // is set we should acquire a new seg instead, that way we wouldn't be short. // The real solution, of course, is to actually implement seg reuse in gen1. if (settings.pause_mode != pause_sustained_low_latency) { dprintf (GTC_LOG, ("max_gen-1: SustainedLowLatency is set, acquire a new seg")); get_gc_data_per_heap()->set_mechanism (gc_heap_expand, expand_next_full_gc); return 0; } } } } } heap_segment* result = get_segment (size, gc_oh_num::soh); if(result) { #ifdef BACKGROUND_GC if (current_c_gc_state == c_gc_state_planning) { // When we expand heap during bgc sweep, we set the seg to be swept so // we'll always look at cards for objects on the new segment. result->flags |= heap_segment_flags_swept; } #endif //BACKGROUND_GC FIRE_EVENT(GCCreateSegment_V1, heap_segment_mem(result), (size_t)(heap_segment_reserved (result) - heap_segment_mem(result)), gc_etw_segment_small_object_heap); } get_gc_data_per_heap()->set_mechanism (gc_heap_expand, (result ? expand_new_seg : expand_no_memory)); if (result == 0) { dprintf (2, ("h%d: failed to allocate a new segment!", heap_number)); } else { #ifdef MULTIPLE_HEAPS heap_segment_heap (result) = this; #endif //MULTIPLE_HEAPS } dprintf (GTC_LOG, ("(gen%d)creating new segment %p", settings.condemned_generation, result)); return result; } //returns 0 in case of allocation failure heap_segment* gc_heap::get_segment (size_t size, gc_oh_num oh) { assert(oh != gc_oh_num::unknown); BOOL uoh_p = (oh == gc_oh_num::loh) || (oh == gc_oh_num::poh); if (heap_hard_limit) return NULL; heap_segment* result = 0; if (segment_standby_list != 0) { result = segment_standby_list; heap_segment* last = 0; while (result) { size_t hs = (size_t)(heap_segment_reserved (result) - (uint8_t*)result); if ((hs >= size) && ((hs / 2) < size)) { dprintf (2, ("Hoarded segment %zx found", (size_t) result)); if (last) { heap_segment_next (last) = heap_segment_next (result); } else { segment_standby_list = heap_segment_next (result); } break; } else { last = result; result = heap_segment_next (result); } } } if (result) { init_heap_segment (result, __this); #ifdef BACKGROUND_GC if (is_bgc_in_progress()) { dprintf (GC_TABLE_LOG, ("hoarded seg %p, mark_array is %p", result, mark_array)); if (!commit_mark_array_new_seg (__this, result)) { dprintf (GC_TABLE_LOG, ("failed to commit mark array for hoarded seg")); // If we can't use it we need to thread it back. if (segment_standby_list != 0) { heap_segment_next (result) = segment_standby_list; segment_standby_list = result; } else { segment_standby_list = result; } result = 0; } } #endif //BACKGROUND_GC if (result) seg_mapping_table_add_segment (result, __this); } if (!result) { void* mem = virtual_alloc (size); if (!mem) { fgm_result.set_fgm (fgm_reserve_segment, size, uoh_p); return 0; } result = make_heap_segment ((uint8_t*)mem, size, __this, (oh + max_generation)); if (result) { uint8_t* start; uint8_t* end; if (mem < g_gc_lowest_address) { start = (uint8_t*)mem; } else { start = (uint8_t*)g_gc_lowest_address; } if (((uint8_t*)mem + size) > g_gc_highest_address) { end = (uint8_t*)mem + size; } else { end = (uint8_t*)g_gc_highest_address; } if (gc_heap::grow_brick_card_tables (start, end, size, result, __this, uoh_p) != 0) { // release_segment needs the flags to decrement the proper bucket size_t flags = 0; if (oh == poh) { flags = heap_segment_flags_poh; } else if (oh == loh) { flags = heap_segment_flags_loh; } result->flags |= flags; release_segment (result); return 0; } } else { fgm_result.set_fgm (fgm_commit_segment_beg, SEGMENT_INITIAL_COMMIT, uoh_p); virtual_free (mem, size); } if (result) { seg_mapping_table_add_segment (result, __this); } } #ifdef BACKGROUND_GC if (result) { ::record_changed_seg ((uint8_t*)result, heap_segment_reserved (result), settings.gc_index, current_bgc_state, seg_added); bgc_verify_mark_array_cleared (result); } #endif //BACKGROUND_GC dprintf (GC_TABLE_LOG, ("h%d: new seg: %p-%p (%zd)", heap_number, result, ((uint8_t*)result + size), size)); return result; } #endif //!USE_REGIONS #ifdef MULTIPLE_HEAPS #ifdef HOST_X86 #ifdef _MSC_VER #pragma warning(disable:4035) static ptrdiff_t get_cycle_count() { __asm rdtsc } #pragma warning(default:4035) #elif defined(__GNUC__) static ptrdiff_t get_cycle_count() { ptrdiff_t cycles; ptrdiff_t cyclesHi; __asm__ __volatile__ ("rdtsc":"=a" (cycles), "=d" (cyclesHi)); return cycles; } #else //_MSC_VER #error Unknown compiler #endif //_MSC_VER #elif defined(TARGET_AMD64) #ifdef _MSC_VER extern "C" uint64_t __rdtsc(); #pragma intrinsic(__rdtsc) static ptrdiff_t get_cycle_count() { return (ptrdiff_t)__rdtsc(); } #elif defined(__GNUC__) static ptrdiff_t get_cycle_count() { ptrdiff_t cycles; ptrdiff_t cyclesHi; __asm__ __volatile__ ("rdtsc":"=a" (cycles), "=d" (cyclesHi)); return (cyclesHi << 32) | cycles; } #else // _MSC_VER extern "C" ptrdiff_t get_cycle_count(void); #endif // _MSC_VER #elif defined(TARGET_LOONGARCH64) static ptrdiff_t get_cycle_count() { ////FIXME: TODO for LOONGARCH64: //ptrdiff_t cycle; __asm__ volatile ("break 0 \n"); return 0; } #else static ptrdiff_t get_cycle_count() { // @ARMTODO, @ARM64TODO, @WASMTODO: cycle counter is not exposed to user mode. For now (until we can show this // makes a difference on the configurations on which we'll run) just return 0. This will result in // all buffer access times being reported as equal in access_time(). return 0; } #endif //TARGET_X86 // We may not be on contiguous numa nodes so need to store // the node index as well. struct node_heap_count { int node_no; int heap_count; }; class heap_select { heap_select() {} public: static uint8_t* sniff_buffer; static unsigned n_sniff_buffers; static unsigned cur_sniff_index; static uint16_t *proc_no_to_heap_no; static uint16_t heap_no_to_proc_no[MAX_SUPPORTED_HEAPS]; static uint16_t heap_no_to_numa_node[MAX_SUPPORTED_HEAPS]; static uint16_t *numa_node_to_heap_map; #ifdef HEAP_BALANCE_INSTRUMENTATION // Note this is the total numa nodes GC heaps are on. There might be // more on the machine if GC threads aren't using all of them. static uint16_t total_numa_nodes; static node_heap_count heaps_on_node[MAX_SUPPORTED_NODES]; #endif static int access_time(uint8_t *sniff_buffer, int heap_number, unsigned sniff_index, unsigned n_sniff_buffers) { ptrdiff_t start_cycles = get_cycle_count(); uint8_t sniff = sniff_buffer[(1 + heap_number*n_sniff_buffers + sniff_index)*HS_CACHE_LINE_SIZE]; assert (sniff == 0); ptrdiff_t elapsed_cycles = get_cycle_count() - start_cycles; // add sniff here just to defeat the optimizer elapsed_cycles += sniff; return (int) elapsed_cycles; } public: static BOOL init(int n_heaps) { assert (sniff_buffer == NULL && n_sniff_buffers == 0); uint32_t maxCpuCount = GCToOSInterface::GetMaxProcessorCount(); proc_no_to_heap_no = new (nothrow) uint16_t[maxCpuCount]; if (proc_no_to_heap_no == NULL) { return FALSE; } memset(proc_no_to_heap_no, 0, maxCpuCount*sizeof(uint16_t)); if (!GCToOSInterface::CanGetCurrentProcessorNumber()) { n_sniff_buffers = n_heaps*2+1; size_t n_cache_lines = 1 + n_heaps * n_sniff_buffers + 1; size_t sniff_buf_size = n_cache_lines * HS_CACHE_LINE_SIZE; if (sniff_buf_size / HS_CACHE_LINE_SIZE != n_cache_lines) // check for overlow { return FALSE; } sniff_buffer = new (nothrow) uint8_t[sniff_buf_size]; if (sniff_buffer == 0) return FALSE; memset(sniff_buffer, 0, sniff_buf_size*sizeof(uint8_t)); } bool do_numa = GCToOSInterface::CanEnableGCNumaAware(); // we want to assign heap indices such that there is a contiguous // range of heap numbers for each numa node // we do this in two passes: // 1. gather processor numbers and numa node numbers for all heaps // 2. assign heap numbers for each numa node // Pass 1: gather processor numbers and numa node numbers uint16_t proc_no[MAX_SUPPORTED_HEAPS]; uint16_t node_no[MAX_SUPPORTED_HEAPS]; uint16_t max_node_no = 0; uint16_t heap_num; for (heap_num = 0; heap_num < n_heaps; heap_num++) { if (!GCToOSInterface::GetProcessorForHeap (heap_num, &proc_no[heap_num], &node_no[heap_num])) break; assert(proc_no[heap_num] < GCToOSInterface::GetMaxProcessorCount()); if (!do_numa || node_no[heap_num] == NUMA_NODE_UNDEFINED) node_no[heap_num] = 0; max_node_no = max(max_node_no, node_no[heap_num]); } // Pass 2: assign heap numbers by numa node int cur_heap_no = 0; for (uint16_t cur_node_no = 0; cur_node_no <= max_node_no; cur_node_no++) { for (int i = 0; i < heap_num; i++) { if (node_no[i] != cur_node_no) continue; // we found a heap on cur_node_no heap_no_to_proc_no[cur_heap_no] = proc_no[i]; heap_no_to_numa_node[cur_heap_no] = cur_node_no; cur_heap_no++; } } return TRUE; } static void init_cpu_mapping(int heap_number) { if (GCToOSInterface::CanGetCurrentProcessorNumber()) { uint32_t proc_no = GCToOSInterface::GetCurrentProcessorNumber(); proc_no_to_heap_no[proc_no] = (uint16_t)heap_number; } } static void mark_heap(int heap_number) { if (GCToOSInterface::CanGetCurrentProcessorNumber()) return; for (unsigned sniff_index = 0; sniff_index < n_sniff_buffers; sniff_index++) sniff_buffer[(1 + heap_number*n_sniff_buffers + sniff_index)*HS_CACHE_LINE_SIZE] &= 1; } static int select_heap(alloc_context* acontext) { #ifndef TRACE_GC UNREFERENCED_PARAMETER(acontext); // only referenced by dprintf #endif //TRACE_GC if (GCToOSInterface::CanGetCurrentProcessorNumber()) { uint32_t proc_no = GCToOSInterface::GetCurrentProcessorNumber(); int adjusted_heap = proc_no_to_heap_no[proc_no]; // with dynamic heap count, need to make sure the value is in range. if (adjusted_heap >= gc_heap::n_heaps) { adjusted_heap %= gc_heap::n_heaps; } return adjusted_heap; } unsigned sniff_index = Interlocked::Increment(&cur_sniff_index); sniff_index %= n_sniff_buffers; int best_heap = 0; int best_access_time = 1000*1000*1000; int second_best_access_time = best_access_time; uint8_t *l_sniff_buffer = sniff_buffer; unsigned l_n_sniff_buffers = n_sniff_buffers; for (int heap_number = 0; heap_number < gc_heap::n_heaps; heap_number++) { int this_access_time = access_time(l_sniff_buffer, heap_number, sniff_index, l_n_sniff_buffers); if (this_access_time < best_access_time) { second_best_access_time = best_access_time; best_access_time = this_access_time; best_heap = heap_number; } else if (this_access_time < second_best_access_time) { second_best_access_time = this_access_time; } } if (best_access_time*2 < second_best_access_time) { sniff_buffer[(1 + best_heap*n_sniff_buffers + sniff_index)*HS_CACHE_LINE_SIZE] &= 1; dprintf (3, ("select_heap yields crisp %d for context %p\n", best_heap, (void *)acontext)); } else { dprintf (3, ("select_heap yields vague %d for context %p\n", best_heap, (void *)acontext )); } return best_heap; } static bool can_find_heap_fast() { return GCToOSInterface::CanGetCurrentProcessorNumber(); } static uint16_t find_proc_no_from_heap_no(int heap_number) { return heap_no_to_proc_no[heap_number]; } static uint16_t find_numa_node_from_heap_no(int heap_number) { return heap_no_to_numa_node[heap_number]; } static bool init_numa_node_to_heap_map(int nheaps) { // Called right after GCHeap::Init() for each heap uint32_t maxCpuCount = GCToOSInterface::GetMaxProcessorCount(); // The upper limit of the numa node numbers is maxCpuCount - 1 since in the worst case each processor could be on a different NUMA node. // We add +1 here to make it easier to calculate the heap number range for the last NUMA node. numa_node_to_heap_map = new (nothrow) uint16_t[maxCpuCount + 1]; if (numa_node_to_heap_map == nullptr) { return false; } memset(numa_node_to_heap_map, 0, (maxCpuCount + 1)*sizeof(uint16_t)); // For each NUMA node used by the heaps, the // numa_node_to_heap_map[numa_node] is set to the first heap number on that node and // numa_node_to_heap_map[numa_node + 1] is set to the first heap number not on that node // Set the start of the heap number range for the first NUMA node numa_node_to_heap_map[heap_no_to_numa_node[0]] = 0; #ifdef HEAP_BALANCE_INSTRUMENTATION total_numa_nodes = 0; memset (heaps_on_node, 0, sizeof (heaps_on_node)); heaps_on_node[0].node_no = heap_no_to_numa_node[0]; heaps_on_node[0].heap_count = 1; #endif //HEAP_BALANCE_INSTRUMENTATION for (int i=1; i < nheaps; i++) { if (heap_no_to_numa_node[i] != heap_no_to_numa_node[i-1]) { #ifdef HEAP_BALANCE_INSTRUMENTATION total_numa_nodes++; heaps_on_node[total_numa_nodes].node_no = heap_no_to_numa_node[i]; #endif assert(heap_no_to_numa_node[i-1] < maxCpuCount); assert(heap_no_to_numa_node[i] < maxCpuCount); // Set the end of the heap number range for the previous NUMA node numa_node_to_heap_map[heap_no_to_numa_node[i-1] + 1] = // Set the start of the heap number range for the current NUMA node numa_node_to_heap_map[heap_no_to_numa_node[i]] = (uint16_t)i; } #ifdef HEAP_BALANCE_INSTRUMENTATION (heaps_on_node[total_numa_nodes].heap_count)++; #endif } assert(heap_no_to_numa_node[nheaps-1] < maxCpuCount); // Set the end of the heap range for the last NUMA node numa_node_to_heap_map[heap_no_to_numa_node[nheaps-1] + 1] = (uint16_t)nheaps; //mark the end with nheaps #ifdef HEAP_BALANCE_INSTRUMENTATION total_numa_nodes++; #endif return true; } static bool get_info_proc (int index, uint16_t* proc_no, uint16_t* node_no, int* start_heap, int* end_heap) { if (!GCToOSInterface::GetProcessorForHeap ((uint16_t)index, proc_no, node_no)) return false; if (*node_no == NUMA_NODE_UNDEFINED) *node_no = 0; *start_heap = (int)numa_node_to_heap_map[*node_no]; *end_heap = (int)(numa_node_to_heap_map[*node_no + 1]); return true; } static void distribute_other_procs (bool distribute_all_p) { if (affinity_config_specified_p) return; if (distribute_all_p) { uint16_t current_heap_no_on_node[MAX_SUPPORTED_HEAPS]; memset (current_heap_no_on_node, 0, sizeof (current_heap_no_on_node)); uint16_t current_heap_no = 0; uint16_t proc_no = 0; uint16_t node_no = 0; for (int i = gc_heap::n_heaps; i < (int)g_num_active_processors; i++) { int start_heap, end_heap; if (!get_info_proc (i, &proc_no, &node_no, &start_heap, &end_heap)) break; // This indicates there are heaps on this node if ((end_heap - start_heap) > 0) { proc_no_to_heap_no[proc_no] = (current_heap_no_on_node[node_no] % (uint16_t)(end_heap - start_heap)) + (uint16_t)start_heap; (current_heap_no_on_node[node_no])++; } else { proc_no_to_heap_no[proc_no] = current_heap_no % gc_heap::n_heaps; (current_heap_no)++; } } } else { // This is for scenarios where GCHeapCount is specified as something like // (g_num_active_processors - 2) to allow less randomization to the Server GC threads. // In this case we want to assign the right heaps to those procs, ie if they share // the same numa node we want to assign local heaps to those procs. Otherwise we // let the heap balancing mechanism take over for now. uint16_t proc_no = 0; uint16_t node_no = 0; int current_node_no = -1; int current_heap_on_node = -1; for (int i = gc_heap::n_heaps; i < (int)g_num_active_processors; i++) { int start_heap, end_heap; if (!get_info_proc (i, &proc_no, &node_no, &start_heap, &end_heap)) break; if ((end_heap - start_heap) > 0) { if (node_no == current_node_no) { // We already iterated through all heaps on this node, don't add more procs to these // heaps. if (current_heap_on_node >= end_heap) { continue; } } else { current_node_no = node_no; current_heap_on_node = start_heap; } proc_no_to_heap_no[proc_no] = (uint16_t)current_heap_on_node; current_heap_on_node++; } } } } static void get_heap_range_for_heap(int hn, int* start, int* end) { uint16_t numa_node = heap_no_to_numa_node[hn]; *start = (int)numa_node_to_heap_map[numa_node]; *end = (int)(numa_node_to_heap_map[numa_node+1]); #ifdef HEAP_BALANCE_INSTRUMENTATION dprintf(HEAP_BALANCE_TEMP_LOG, ("TEMPget_heap_range: %d is in numa node %d, start = %d, end = %d", hn, numa_node, *start, *end)); #endif //HEAP_BALANCE_INSTRUMENTATION } }; uint8_t* heap_select::sniff_buffer; unsigned heap_select::n_sniff_buffers; unsigned heap_select::cur_sniff_index; uint16_t* heap_select::proc_no_to_heap_no; uint16_t heap_select::heap_no_to_proc_no[MAX_SUPPORTED_HEAPS]; uint16_t heap_select::heap_no_to_numa_node[MAX_SUPPORTED_HEAPS]; uint16_t* heap_select::numa_node_to_heap_map; #ifdef HEAP_BALANCE_INSTRUMENTATION uint16_t heap_select::total_numa_nodes; node_heap_count heap_select::heaps_on_node[MAX_SUPPORTED_NODES]; #endif #ifdef HEAP_BALANCE_INSTRUMENTATION // This records info we use to look at effect of different strategies // for heap balancing. struct heap_balance_info { uint64_t timestamp; // This also encodes when we detect the thread runs on // different proc during a balance attempt. Sometimes // I observe this happens multiple times during one attempt! // If this happens, I just record the last proc we observe // and set MSB. int tid; // This records the final alloc_heap for the thread. // // This also encodes the reason why we needed to set_home_heap // in balance_heaps. // If we set it because the home heap is not the same as the proc, // we set MSB. // // If we set ideal proc, we set the 2nd MSB. int alloc_heap; int ideal_proc_no; }; // This means inbetween each GC we can log at most this many entries per proc. // This is usually enough. Most of the time we only need to log something every 128k // of allocations in balance_heaps and gen0 budget is <= 200mb. #define default_max_hb_heap_balance_info 4096 struct heap_balance_info_proc { int count; int index; heap_balance_info hb_info[default_max_hb_heap_balance_info]; }; struct heap_balance_info_numa { heap_balance_info_proc* hb_info_procs; }; uint64_t start_raw_ts = 0; bool cpu_group_enabled_p = false; uint32_t procs_per_numa_node = 0; uint16_t total_numa_nodes_on_machine = 0; uint32_t procs_per_cpu_group = 0; uint16_t total_cpu_groups_on_machine = 0; // Note this is still on one of the numa nodes, so we'll incur a remote access // no matter what. heap_balance_info_numa* hb_info_numa_nodes = NULL; // TODO: This doesn't work for multiple nodes per CPU group yet. int get_proc_index_numa (int proc_no, int* numa_no) { if (total_numa_nodes_on_machine == 1) { *numa_no = 0; return proc_no; } else { if (cpu_group_enabled_p) { // see vm\gcenv.os.cpp GroupProcNo implementation. *numa_no = proc_no >> 6; return (proc_no % 64); } else { *numa_no = proc_no / procs_per_numa_node; return (proc_no % procs_per_numa_node); } } } const int hb_log_buffer_size = 4096; static char hb_log_buffer[hb_log_buffer_size]; int last_hb_recorded_gc_index = -1; #endif //HEAP_BALANCE_INSTRUMENTATION void set_thread_affinity_for_heap (int heap_number, uint16_t proc_no) { if (!GCToOSInterface::SetThreadAffinity (proc_no)) { dprintf (1, ("Failed to set thread affinity for GC thread %d on proc #%d", heap_number, proc_no)); } } #endif //MULTIPLE_HEAPS class mark { public: uint8_t* first; size_t len; // If we want to save space we can have a pool of plug_and_gap's instead of // always having 2 allocated for each pinned plug. gap_reloc_pair saved_pre_plug; // If we decide to not compact, we need to restore the original values. gap_reloc_pair saved_pre_plug_reloc; gap_reloc_pair saved_post_plug; // Supposedly Pinned objects cannot have references but we are seeing some from pinvoke // frames. Also if it's an artificially pinned plug created by us, it can certainly // have references. // We know these cases will be rare so we can optimize this to be only allocated on demand. gap_reloc_pair saved_post_plug_reloc; // We need to calculate this after we are done with plan phase and before compact // phase because compact phase will change the bricks so relocate_address will no // longer work. uint8_t* saved_pre_plug_info_reloc_start; // We need to save this because we will have no way to calculate it, unlike the // pre plug info start which is right before this plug. uint8_t* saved_post_plug_info_start; #ifdef SHORT_PLUGS uint8_t* allocation_context_start_region; #endif //SHORT_PLUGS // How the bits in these bytes are organized: // MSB --> LSB // bit to indicate whether it's a short obj | 3 bits for refs in this short obj | 2 unused bits | bit to indicate if it's collectible | last bit // last bit indicates if there's pre or post info associated with this plug. If it's not set all other bits will be 0. BOOL saved_pre_p; BOOL saved_post_p; #ifdef _DEBUG // We are seeing this is getting corrupted for a PP with a NP after. // Save it when we first set it and make sure it doesn't change. gap_reloc_pair saved_post_plug_debug; #endif //_DEBUG size_t get_max_short_bits() { return (sizeof (gap_reloc_pair) / sizeof (uint8_t*)); } // pre bits size_t get_pre_short_start_bit () { return (sizeof (saved_pre_p) * 8 - 1 - (sizeof (gap_reloc_pair) / sizeof (uint8_t*))); } BOOL pre_short_p() { return (saved_pre_p & (1 << (sizeof (saved_pre_p) * 8 - 1))); } void set_pre_short() { saved_pre_p |= (1 << (sizeof (saved_pre_p) * 8 - 1)); } void set_pre_short_bit (size_t bit) { saved_pre_p |= 1 << (get_pre_short_start_bit() + bit); } BOOL pre_short_bit_p (size_t bit) { return (saved_pre_p & (1 << (get_pre_short_start_bit() + bit))); } #ifdef COLLECTIBLE_CLASS void set_pre_short_collectible() { saved_pre_p |= 2; } BOOL pre_short_collectible_p() { return (saved_pre_p & 2); } #endif //COLLECTIBLE_CLASS // post bits size_t get_post_short_start_bit () { return (sizeof (saved_post_p) * 8 - 1 - (sizeof (gap_reloc_pair) / sizeof (uint8_t*))); } BOOL post_short_p() { return (saved_post_p & (1 << (sizeof (saved_post_p) * 8 - 1))); } void set_post_short() { saved_post_p |= (1 << (sizeof (saved_post_p) * 8 - 1)); } void set_post_short_bit (size_t bit) { saved_post_p |= 1 << (get_post_short_start_bit() + bit); } BOOL post_short_bit_p (size_t bit) { return (saved_post_p & (1 << (get_post_short_start_bit() + bit))); } #ifdef COLLECTIBLE_CLASS void set_post_short_collectible() { saved_post_p |= 2; } BOOL post_short_collectible_p() { return (saved_post_p & 2); } #endif //COLLECTIBLE_CLASS uint8_t* get_plug_address() { return first; } BOOL has_pre_plug_info() { return saved_pre_p; } BOOL has_post_plug_info() { return saved_post_p; } gap_reloc_pair* get_pre_plug_reloc_info() { return &saved_pre_plug_reloc; } gap_reloc_pair* get_post_plug_reloc_info() { return &saved_post_plug_reloc; } void set_pre_plug_info_reloc_start (uint8_t* reloc) { saved_pre_plug_info_reloc_start = reloc; } uint8_t* get_post_plug_info_start() { return saved_post_plug_info_start; } // We need to temporarily recover the shortened plugs for compact phase so we can // copy over the whole plug and their related info (mark bits/cards). But we will // need to set the artificial gap back so compact phase can keep reading the plug info. // We also need to recover the saved info because we'll need to recover it later. // // So we would call swap_p*_plug_and_saved once to recover the object info; then call // it again to recover the artificial gap. void swap_pre_plug_and_saved() { gap_reloc_pair temp; memcpy (&temp, (first - sizeof (plug_and_gap)), sizeof (temp)); memcpy ((first - sizeof (plug_and_gap)), &saved_pre_plug_reloc, sizeof (saved_pre_plug_reloc)); saved_pre_plug_reloc = temp; } void swap_post_plug_and_saved() { gap_reloc_pair temp; memcpy (&temp, saved_post_plug_info_start, sizeof (temp)); memcpy (saved_post_plug_info_start, &saved_post_plug_reloc, sizeof (saved_post_plug_reloc)); saved_post_plug_reloc = temp; } void swap_pre_plug_and_saved_for_profiler() { gap_reloc_pair temp; memcpy (&temp, (first - sizeof (plug_and_gap)), sizeof (temp)); memcpy ((first - sizeof (plug_and_gap)), &saved_pre_plug, sizeof (saved_pre_plug)); saved_pre_plug = temp; } void swap_post_plug_and_saved_for_profiler() { gap_reloc_pair temp; memcpy (&temp, saved_post_plug_info_start, sizeof (temp)); memcpy (saved_post_plug_info_start, &saved_post_plug, sizeof (saved_post_plug)); saved_post_plug = temp; } // We should think about whether it's really necessary to have to copy back the pre plug // info since it was already copied during compacting plugs. But if a plug doesn't move // by >= 3 ptr size (the size of gap_reloc_pair), it means we'd have to recover pre plug info. size_t recover_plug_info() { // We need to calculate the size for sweep case in order to correctly record the // free_obj_space - sweep would've made these artificial gaps into free objects and // we would need to deduct the size because now we are writing into those free objects. size_t recovered_sweep_size = 0; if (saved_pre_p) { if (gc_heap::settings.compaction) { dprintf (3, ("%p: REC Pre: %p-%p", first, &saved_pre_plug_reloc, saved_pre_plug_info_reloc_start)); memcpy (saved_pre_plug_info_reloc_start, &saved_pre_plug_reloc, sizeof (saved_pre_plug_reloc)); } else { dprintf (3, ("%p: REC Pre: %p-%p", first, &saved_pre_plug, (first - sizeof (plug_and_gap)))); memcpy ((first - sizeof (plug_and_gap)), &saved_pre_plug, sizeof (saved_pre_plug)); recovered_sweep_size += sizeof (saved_pre_plug); } } if (saved_post_p) { if (gc_heap::settings.compaction) { dprintf (3, ("%p: REC Post: %p-%p", first, &saved_post_plug_reloc, saved_post_plug_info_start)); memcpy (saved_post_plug_info_start, &saved_post_plug_reloc, sizeof (saved_post_plug_reloc)); } else { dprintf (3, ("%p: REC Post: %p-%p", first, &saved_post_plug, saved_post_plug_info_start)); memcpy (saved_post_plug_info_start, &saved_post_plug, sizeof (saved_post_plug)); recovered_sweep_size += sizeof (saved_post_plug); } } return recovered_sweep_size; } }; void gc_mechanisms::init_mechanisms() { condemned_generation = 0; promotion = FALSE;//TRUE; compaction = TRUE; #ifdef FEATURE_LOH_COMPACTION loh_compaction = gc_heap::loh_compaction_requested(); #else loh_compaction = FALSE; #endif //FEATURE_LOH_COMPACTION heap_expansion = FALSE; concurrent = FALSE; demotion = FALSE; elevation_reduced = FALSE; found_finalizers = FALSE; #ifdef BACKGROUND_GC background_p = gc_heap::background_running_p() != FALSE; #endif //BACKGROUND_GC entry_memory_load = 0; entry_available_physical_mem = 0; exit_memory_load = 0; #ifdef STRESS_HEAP stress_induced = FALSE; #endif // STRESS_HEAP } void gc_mechanisms::first_init() { gc_index = 0; gen0_reduction_count = 0; should_lock_elevation = FALSE; elevation_locked_count = 0; reason = reason_empty; #ifdef BACKGROUND_GC pause_mode = gc_heap::gc_can_use_concurrent ? pause_interactive : pause_batch; #ifdef _DEBUG int debug_pause_mode = static_cast(GCConfig::GetLatencyMode()); if (debug_pause_mode >= 0) { assert (debug_pause_mode <= pause_sustained_low_latency); pause_mode = (gc_pause_mode)debug_pause_mode; } #endif //_DEBUG #else //BACKGROUND_GC pause_mode = pause_batch; #endif //BACKGROUND_GC init_mechanisms(); } void gc_mechanisms::record (gc_history_global* history) { #ifdef MULTIPLE_HEAPS history->num_heaps = gc_heap::n_heaps; #else history->num_heaps = 1; #endif //MULTIPLE_HEAPS history->condemned_generation = condemned_generation; history->gen0_reduction_count = gen0_reduction_count; history->reason = reason; history->pause_mode = (int)pause_mode; history->mem_pressure = entry_memory_load; history->global_mechanisms_p = 0; // start setting the boolean values. if (concurrent) history->set_mechanism_p (global_concurrent); if (compaction) history->set_mechanism_p (global_compaction); if (promotion) history->set_mechanism_p (global_promotion); if (demotion) history->set_mechanism_p (global_demotion); if (card_bundles) history->set_mechanism_p (global_card_bundles); if (elevation_reduced) history->set_mechanism_p (global_elevation); } /********************************** called at the beginning of GC to fix the allocated size to what is really allocated, or to turn the free area into an unused object It needs to be called after all of the other allocation contexts have been fixed since it relies on alloc_allocated. ********************************/ inline BOOL grow_mark_stack (mark*& m, size_t& len, size_t init_len) { size_t new_size = max (init_len, 2*len); mark* tmp = new (nothrow) mark [new_size]; if (tmp) { memcpy (tmp, m, len * sizeof (mark)); delete[] m; m = tmp; len = new_size; return TRUE; } else { dprintf (1, ("Failed to allocate %zd bytes for mark stack", (len * sizeof (mark)))); return FALSE; } } inline uint8_t* pinned_plug (mark* m) { return m->first; } inline size_t& pinned_len (mark* m) { return m->len; } inline void set_new_pin_info (mark* m, uint8_t* pin_free_space_start) { m->len = pinned_plug (m) - pin_free_space_start; #ifdef SHORT_PLUGS m->allocation_context_start_region = pin_free_space_start; #endif //SHORT_PLUGS } #ifdef SHORT_PLUGS inline uint8_t*& pin_allocation_context_start_region (mark* m) { return m->allocation_context_start_region; } uint8_t* get_plug_start_in_saved (uint8_t* old_loc, mark* pinned_plug_entry) { uint8_t* saved_pre_plug_info = (uint8_t*)(pinned_plug_entry->get_pre_plug_reloc_info()); uint8_t* plug_start_in_saved = saved_pre_plug_info + (old_loc - (pinned_plug (pinned_plug_entry) - sizeof (plug_and_gap))); //dprintf (2, ("detected a very short plug: %zx before PP %zx, pad %zx", // old_loc, pinned_plug (pinned_plug_entry), plug_start_in_saved)); dprintf (2, ("EP: %p(%p), %p", old_loc, pinned_plug (pinned_plug_entry), plug_start_in_saved)); return plug_start_in_saved; } #endif //SHORT_PLUGS #ifdef CARD_BUNDLE // The card bundle keeps track of groups of card words. static const size_t card_bundle_word_width = 32; // How do we express the fact that 32 bits (card_word_width) is one uint32_t? static const size_t card_bundle_size = (size_t)(GC_PAGE_SIZE / (sizeof(uint32_t)*card_bundle_word_width)); inline size_t card_bundle_word (size_t cardb) { return cardb / card_bundle_word_width; } inline uint32_t card_bundle_bit (size_t cardb) { return (uint32_t)(cardb % card_bundle_word_width); } size_t align_cardw_on_bundle (size_t cardw) { return ((size_t)(cardw + card_bundle_size - 1) & ~(card_bundle_size - 1 )); } // Get the card bundle representing a card word size_t cardw_card_bundle (size_t cardw) { return cardw / card_bundle_size; } // Get the first card word in a card bundle size_t card_bundle_cardw (size_t cardb) { return cardb * card_bundle_size; } // Takes a pointer to a card bundle table and an address, and returns a pointer that represents // where a theoretical card bundle table that represents every address (starting from 0) would // start if the bundle word representing the address were to be located at the pointer passed in. // The returned 'translated' pointer makes it convenient/fast to calculate where the card bundle // for a given address is using a simple shift operation on the address. uint32_t* translate_card_bundle_table (uint32_t* cb, uint8_t* lowest_address) { // The number of bytes of heap memory represented by a card bundle word const size_t heap_bytes_for_bundle_word = card_size * card_word_width * card_bundle_size * card_bundle_word_width; // Each card bundle word is 32 bits return (uint32_t*)((uint8_t*)cb - (((size_t)lowest_address / heap_bytes_for_bundle_word) * sizeof (uint32_t))); } #endif // CARD_BUNDLE #if defined (HOST_64BIT) #define brick_size ((size_t)4096) #else #define brick_size ((size_t)2048) #endif //HOST_64BIT inline uint8_t* align_on_brick (uint8_t* add) { return (uint8_t*)((size_t)(add + brick_size - 1) & ~(brick_size - 1)); } inline uint8_t* align_lower_brick (uint8_t* add) { return (uint8_t*)(((size_t)add) & ~(brick_size - 1)); } size_t size_brick_of (uint8_t* from, uint8_t* end) { assert (((size_t)from & (brick_size-1)) == 0); assert (((size_t)end & (brick_size-1)) == 0); return ((end - from) / brick_size) * sizeof (short); } inline uint8_t* align_on_card (uint8_t* add) { return (uint8_t*)((size_t)(add + card_size - 1) & ~(card_size - 1 )); } inline uint8_t* align_on_card_word (uint8_t* add) { return (uint8_t*) ((size_t)(add + (card_size*card_word_width)-1) & ~(card_size*card_word_width - 1)); } inline uint8_t* align_lower_card (uint8_t* add) { return (uint8_t*)((size_t)add & ~(card_size-1)); } // Returns the number of DWORDs in the card table that cover the // range of addresses [from, end[. size_t count_card_of (uint8_t* from, uint8_t* end) { return card_word (gcard_of (end - 1)) - card_word (gcard_of (from)) + 1; } // Returns the number of bytes to allocate for a card table // that covers the range of addresses [from, end[. size_t size_card_of (uint8_t* from, uint8_t* end) { return count_card_of (from, end) * sizeof(uint32_t); } // We don't store seg_mapping_table in card_table_info because there's only always one view. class card_table_info { public: unsigned recount; size_t size; uint32_t* next_card_table; uint8_t* lowest_address; uint8_t* highest_address; short* brick_table; #ifdef CARD_BUNDLE uint32_t* card_bundle_table; #endif //CARD_BUNDLE // mark_array is always at the end of the data structure because we // want to be able to make one commit call for everything before it. #ifdef BACKGROUND_GC uint32_t* mark_array; #endif //BACKGROUND_GC }; static_assert(offsetof(dac_card_table_info, size) == offsetof(card_table_info, size), "DAC card_table_info layout mismatch"); static_assert(offsetof(dac_card_table_info, next_card_table) == offsetof(card_table_info, next_card_table), "DAC card_table_info layout mismatch"); //These are accessors on untranslated cardtable inline unsigned& card_table_refcount (uint32_t* c_table) { return *(unsigned*)((char*)c_table - sizeof (card_table_info)); } inline uint8_t*& card_table_lowest_address (uint32_t* c_table) { return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->lowest_address; } uint32_t* translate_card_table (uint32_t* ct) { return (uint32_t*)((uint8_t*)ct - card_word (gcard_of (card_table_lowest_address (ct))) * sizeof(uint32_t)); } inline uint8_t*& card_table_highest_address (uint32_t* c_table) { return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->highest_address; } inline short*& card_table_brick_table (uint32_t* c_table) { return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->brick_table; } #ifdef CARD_BUNDLE inline uint32_t*& card_table_card_bundle_table (uint32_t* c_table) { return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->card_bundle_table; } #endif //CARD_BUNDLE #ifdef BACKGROUND_GC inline uint32_t*& card_table_mark_array (uint32_t* c_table) { return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->mark_array; } #ifdef HOST_64BIT #define mark_bit_pitch ((size_t)16) #else #define mark_bit_pitch ((size_t)8) #endif // HOST_64BIT #define mark_word_width ((size_t)32) #define mark_word_size (mark_word_width * mark_bit_pitch) inline uint8_t* align_on_mark_bit (uint8_t* add) { return (uint8_t*)((size_t)(add + (mark_bit_pitch - 1)) & ~(mark_bit_pitch - 1)); } inline uint8_t* align_lower_mark_bit (uint8_t* add) { return (uint8_t*)((size_t)(add) & ~(mark_bit_pitch - 1)); } inline BOOL is_aligned_on_mark_word (uint8_t* add) { return ((size_t)add == ((size_t)(add) & ~(mark_word_size - 1))); } inline uint8_t* align_on_mark_word (uint8_t* add) { return (uint8_t*)((size_t)(add + mark_word_size - 1) & ~(mark_word_size - 1)); } inline uint8_t* align_lower_mark_word (uint8_t* add) { return (uint8_t*)((size_t)(add) & ~(mark_word_size - 1)); } inline size_t mark_bit_of (uint8_t* add) { return ((size_t)add / mark_bit_pitch); } inline unsigned int mark_bit_bit (size_t mark_bit) { return (unsigned int)(mark_bit % mark_word_width); } inline size_t mark_bit_word (size_t mark_bit) { return (mark_bit / mark_word_width); } inline size_t mark_word_of (uint8_t* add) { return ((size_t)add) / mark_word_size; } uint8_t* mark_word_address (size_t wd) { return (uint8_t*)(wd*mark_word_size); } uint8_t* mark_bit_address (size_t mark_bit) { return (uint8_t*)(mark_bit*mark_bit_pitch); } inline size_t mark_bit_bit_of (uint8_t* add) { return (((size_t)add / mark_bit_pitch) % mark_word_width); } size_t size_mark_array_of (uint8_t* from, uint8_t* end) { assert (((size_t)from & ((mark_word_size)-1)) == 0); assert (((size_t)end & ((mark_word_size)-1)) == 0); return sizeof (uint32_t)*(((end - from) / mark_word_size)); } //In order to eliminate the lowest_address in the mark array //computations (mark_word_of, etc) mark_array is offset // according to the lowest_address. uint32_t* translate_mark_array (uint32_t* ma) { return (uint32_t*)((uint8_t*)ma - size_mark_array_of (0, g_gc_lowest_address)); } #endif //BACKGROUND_GC //These work on untranslated card tables inline uint32_t*& card_table_next (uint32_t* c_table) { // NOTE: The dac takes a dependency on card_table_info being right before c_table. // It's 100% ok to change this implementation detail as long as a matching change // is made to DacGCBookkeepingEnumerator::Init in daccess.cpp. return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->next_card_table; } inline size_t& card_table_size (uint32_t* c_table) { return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->size; } void own_card_table (uint32_t* c_table) { card_table_refcount (c_table) += 1; } void destroy_card_table (uint32_t* c_table); void delete_next_card_table (uint32_t* c_table) { uint32_t* n_table = card_table_next (c_table); if (n_table) { if (card_table_next (n_table)) { delete_next_card_table (n_table); } if (card_table_refcount (n_table) == 0) { destroy_card_table (n_table); card_table_next (c_table) = 0; } } } void release_card_table (uint32_t* c_table) { assert (card_table_refcount (c_table) >0); card_table_refcount (c_table) -= 1; if (card_table_refcount (c_table) == 0) { delete_next_card_table (c_table); if (card_table_next (c_table) == 0) { destroy_card_table (c_table); // sever the link from the parent if (&g_gc_card_table[card_word (gcard_of(g_gc_lowest_address))] == c_table) { g_gc_card_table = 0; #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES g_gc_card_bundle_table = 0; #endif #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP SoftwareWriteWatch::StaticClose(); #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP } else { uint32_t* p_table = &g_gc_card_table[card_word (gcard_of(g_gc_lowest_address))]; if (p_table) { while (p_table && (card_table_next (p_table) != c_table)) p_table = card_table_next (p_table); card_table_next (p_table) = 0; } } } } } void destroy_card_table (uint32_t* c_table) { // delete (uint32_t*)&card_table_refcount(c_table); size_t size = card_table_size(c_table); gc_heap::destroy_card_table_helper (c_table); GCToOSInterface::VirtualRelease (&card_table_refcount(c_table), size); dprintf (2, ("Table Virtual Free : %zx", (size_t)&card_table_refcount(c_table))); } uint8_t** make_mark_list (size_t size) { uint8_t** mark_list = new (nothrow) uint8_t* [size]; return mark_list; } #define swap(a,b){uint8_t* t; t = a; a = b; b = t;} #ifndef USE_INTROSORT void qsort1( uint8_t* *low, uint8_t* *high, unsigned int depth) { if (((low + 16) >= high) || (depth > 100)) { //insertion sort uint8_t **i, **j; for (i = low+1; i <= high; i++) { uint8_t* val = *i; for (j=i;j >low && val<*(j-1);j--) { *j=*(j-1); } *j=val; } } else { uint8_t *pivot, **left, **right; //sort low middle and high if (*(low+((high-low)/2)) < *low) swap (*(low+((high-low)/2)), *low); if (*high < *low) swap (*low, *high); if (*high < *(low+((high-low)/2))) swap (*(low+((high-low)/2)), *high); swap (*(low+((high-low)/2)), *(high-1)); pivot = *(high-1); left = low; right = high-1; while (1) { while (*(--right) > pivot); while (*(++left) < pivot); if (left < right) { swap(*left, *right); } else break; } swap (*left, *(high-1)); qsort1(low, left-1, depth+1); qsort1(left+1, high, depth+1); } } #endif //USE_INTROSORT #ifdef USE_VXSORT static void do_vxsort (uint8_t** item_array, ptrdiff_t item_count, uint8_t* range_low, uint8_t* range_high) { // above this threshold, using AVX2 for sorting will likely pay off // despite possible downclocking on some devices const ptrdiff_t AVX2_THRESHOLD_SIZE = 8 * 1024; // above this threshold, using AVX512F for sorting will likely pay off // despite possible downclocking on current devices const ptrdiff_t AVX512F_THRESHOLD_SIZE = 128 * 1024; // above this threshold, using NEON for sorting will likely pay off const ptrdiff_t NEON_THRESHOLD_SIZE = 1024; if (item_count <= 1) return; #if defined(TARGET_AMD64) if (IsSupportedInstructionSet (InstructionSet::AVX2) && (item_count > AVX2_THRESHOLD_SIZE)) { dprintf(3, ("Sorting mark lists")); // use AVX512F only if the list is large enough to pay for downclocking impact if (IsSupportedInstructionSet (InstructionSet::AVX512F) && (item_count > AVX512F_THRESHOLD_SIZE)) { do_vxsort_avx512 (item_array, &item_array[item_count - 1], range_low, range_high); } else { do_vxsort_avx2 (item_array, &item_array[item_count - 1], range_low, range_high); } } #elif defined(TARGET_ARM64) if (IsSupportedInstructionSet (InstructionSet::NEON) && (item_count > NEON_THRESHOLD_SIZE)) { dprintf(3, ("Sorting mark lists")); do_vxsort_neon (item_array, &item_array[item_count - 1], range_low, range_high); } #endif else { dprintf (3, ("Sorting mark lists")); introsort::sort (item_array, &item_array[item_count - 1], 0); } #ifdef _DEBUG // check the array is sorted for (ptrdiff_t i = 0; i < item_count - 1; i++) { assert (item_array[i] <= item_array[i + 1]); } // check that the ends of the array are indeed in range // together with the above this implies all elements are in range assert ((range_low <= item_array[0]) && (item_array[item_count - 1] <= range_high)); #endif } #endif //USE_VXSORT #ifdef MULTIPLE_HEAPS #ifdef _DEBUG #if !defined(_MSC_VER) #if !defined(__cdecl) #if defined(__i386__) #define __cdecl __attribute__((cdecl)) #else #define __cdecl #endif #endif #endif #endif // _DEBUG #else #ifdef USE_REGIONS #endif //USE_REGIONS #endif //MULTIPLE_HEAPS #ifndef USE_REGIONS class seg_free_spaces { struct seg_free_space { BOOL is_plug; void* start; }; struct free_space_bucket { seg_free_space* free_space; ptrdiff_t count_add; // Assigned when we first construct the array. ptrdiff_t count_fit; // How many items left when we are fitting plugs. }; void move_bucket (int old_power2, int new_power2) { // PREFAST warning 22015: old_power2 could be negative assert (old_power2 >= 0); assert (old_power2 >= new_power2); if (old_power2 == new_power2) { return; } seg_free_space* src_index = free_space_buckets[old_power2].free_space; for (int i = old_power2; i > new_power2; i--) { seg_free_space** dest = &(free_space_buckets[i].free_space); (*dest)++; seg_free_space* dest_index = free_space_buckets[i - 1].free_space; if (i > (new_power2 + 1)) { seg_free_space temp = *src_index; *src_index = *dest_index; *dest_index = temp; } src_index = dest_index; } free_space_buckets[old_power2].count_fit--; free_space_buckets[new_power2].count_fit++; } #ifdef _DEBUG void dump_free_space (seg_free_space* item) { uint8_t* addr = 0; size_t len = 0; if (item->is_plug) { mark* m = (mark*)(item->start); len = pinned_len (m); addr = pinned_plug (m) - len; } else { heap_segment* seg = (heap_segment*)(item->start); addr = heap_segment_plan_allocated (seg); len = heap_segment_committed (seg) - addr; } dprintf (SEG_REUSE_LOG_1, ("[%d]0x%p %zd", heap_num, addr, len)); } void dump() { seg_free_space* item = NULL; int i = 0; dprintf (SEG_REUSE_LOG_1, ("[%d]----------------------------------\nnow the free spaces look like:", heap_num)); for (i = 0; i < (free_space_bucket_count - 1); i++) { dprintf (SEG_REUSE_LOG_1, ("[%d]Free spaces for 2^%d bucket:", heap_num, (base_power2 + i))); dprintf (SEG_REUSE_LOG_1, ("[%d]%s %s", heap_num, "start", "len")); item = free_space_buckets[i].free_space; while (item < free_space_buckets[i + 1].free_space) { dump_free_space (item); item++; } dprintf (SEG_REUSE_LOG_1, ("[%d]----------------------------------", heap_num)); } dprintf (SEG_REUSE_LOG_1, ("[%d]Free spaces for 2^%d bucket:", heap_num, (base_power2 + i))); dprintf (SEG_REUSE_LOG_1, ("[%d]%s %s", heap_num, "start", "len")); item = free_space_buckets[i].free_space; while (item <= &seg_free_space_array[free_space_item_count - 1]) { dump_free_space (item); item++; } dprintf (SEG_REUSE_LOG_1, ("[%d]----------------------------------", heap_num)); } #endif //_DEBUG free_space_bucket* free_space_buckets; seg_free_space* seg_free_space_array; ptrdiff_t free_space_bucket_count; ptrdiff_t free_space_item_count; int base_power2; int heap_num; #ifdef _DEBUG BOOL has_end_of_seg; #endif //_DEBUG public: seg_free_spaces (int h_number) { heap_num = h_number; } BOOL alloc () { size_t total_prealloc_size = MAX_NUM_BUCKETS * sizeof (free_space_bucket) + MAX_NUM_FREE_SPACES * sizeof (seg_free_space); free_space_buckets = (free_space_bucket*) new (nothrow) uint8_t[total_prealloc_size]; return (!!free_space_buckets); } // We take the ordered free space array we got from the 1st pass, // and feed the portion that we decided to use to this method, ie, // the largest item_count free spaces. void add_buckets (int base, size_t* ordered_free_spaces, int bucket_count, size_t item_count) { assert (free_space_buckets); assert (item_count <= (size_t)MAX_PTR); free_space_bucket_count = bucket_count; free_space_item_count = item_count; base_power2 = base; #ifdef _DEBUG has_end_of_seg = FALSE; #endif //_DEBUG ptrdiff_t total_item_count = 0; ptrdiff_t i = 0; seg_free_space_array = (seg_free_space*)(free_space_buckets + free_space_bucket_count); for (i = 0; i < (ptrdiff_t)item_count; i++) { seg_free_space_array[i].start = 0; seg_free_space_array[i].is_plug = FALSE; } for (i = 0; i < bucket_count; i++) { free_space_buckets[i].count_add = ordered_free_spaces[i]; free_space_buckets[i].count_fit = ordered_free_spaces[i]; free_space_buckets[i].free_space = &seg_free_space_array[total_item_count]; total_item_count += free_space_buckets[i].count_add; } assert (total_item_count == (ptrdiff_t)item_count); } // If we are adding a free space before a plug we pass the // mark stack position so we can update the length; we could // also be adding the free space after the last plug in which // case start is the segment which we'll need to update the // heap_segment_plan_allocated. void add (void* start, BOOL plug_p, BOOL first_p) { size_t size = (plug_p ? pinned_len ((mark*)start) : (heap_segment_committed ((heap_segment*)start) - heap_segment_plan_allocated ((heap_segment*)start))); if (plug_p) { dprintf (SEG_REUSE_LOG_1, ("[%d]Adding a free space before plug: %zd", heap_num, size)); } else { dprintf (SEG_REUSE_LOG_1, ("[%d]Adding a free space at end of seg: %zd", heap_num, size)); #ifdef _DEBUG has_end_of_seg = TRUE; #endif //_DEBUG } if (first_p) { size_t eph_gen_starts = gc_heap::eph_gen_starts_size; size -= eph_gen_starts; if (plug_p) { mark* m = (mark*)(start); pinned_len (m) -= eph_gen_starts; } else { heap_segment* seg = (heap_segment*)start; heap_segment_plan_allocated (seg) += eph_gen_starts; } } int bucket_power2 = index_of_highest_set_bit (size); if (bucket_power2 < base_power2) { return; } free_space_bucket* bucket = &free_space_buckets[bucket_power2 - base_power2]; seg_free_space* bucket_free_space = bucket->free_space; assert (plug_p || (!plug_p && bucket->count_add)); if (bucket->count_add == 0) { dprintf (SEG_REUSE_LOG_1, ("[%d]Already have enough of 2^%d", heap_num, bucket_power2)); return; } ptrdiff_t index = bucket->count_add - 1; dprintf (SEG_REUSE_LOG_1, ("[%d]Building free spaces: adding %p; len: %zd (2^%d)", heap_num, (plug_p ? (pinned_plug ((mark*)start) - pinned_len ((mark*)start)) : heap_segment_plan_allocated ((heap_segment*)start)), size, bucket_power2)); if (plug_p) { bucket_free_space[index].is_plug = TRUE; } bucket_free_space[index].start = start; bucket->count_add--; } #ifdef _DEBUG // Do a consistency check after all free spaces are added. void check() { ptrdiff_t i = 0; int end_of_seg_count = 0; for (i = 0; i < free_space_item_count; i++) { assert (seg_free_space_array[i].start); if (!(seg_free_space_array[i].is_plug)) { end_of_seg_count++; } } if (has_end_of_seg) { assert (end_of_seg_count == 1); } else { assert (end_of_seg_count == 0); } for (i = 0; i < free_space_bucket_count; i++) { assert (free_space_buckets[i].count_add == 0); } } #endif //_DEBUG uint8_t* fit (uint8_t* old_loc, size_t plug_size REQD_ALIGN_AND_OFFSET_DCL) { if (old_loc) { #ifdef SHORT_PLUGS assert (!is_plug_padded (old_loc)); #endif //SHORT_PLUGS assert (!node_realigned (old_loc)); } size_t saved_plug_size = plug_size; #ifdef FEATURE_STRUCTALIGN // BARTOKTODO (4841): this code path is disabled (see can_fit_all_blocks_p) until we take alignment requirements into account _ASSERTE(requiredAlignment == DATA_ALIGNMENT && false); #endif // FEATURE_STRUCTALIGN size_t plug_size_to_fit = plug_size; // best fit is only done for gen1 to gen2 and we do not pad in gen2. // however we must account for requirements of large alignment. // which may result in realignment padding. #ifdef RESPECT_LARGE_ALIGNMENT plug_size_to_fit += switch_alignment_size(FALSE); #endif //RESPECT_LARGE_ALIGNMENT int plug_power2 = index_of_highest_set_bit (round_up_power2 (plug_size_to_fit + Align(min_obj_size))); ptrdiff_t i; uint8_t* new_address = 0; if (plug_power2 < base_power2) { plug_power2 = base_power2; } int chosen_power2 = plug_power2 - base_power2; retry: for (i = chosen_power2; i < free_space_bucket_count; i++) { if (free_space_buckets[i].count_fit != 0) { break; } chosen_power2++; } dprintf (SEG_REUSE_LOG_1, ("[%d]Fitting plug len %zd (2^%d) using 2^%d free space", heap_num, plug_size, plug_power2, (chosen_power2 + base_power2))); assert (i < free_space_bucket_count); seg_free_space* bucket_free_space = free_space_buckets[chosen_power2].free_space; ptrdiff_t free_space_count = free_space_buckets[chosen_power2].count_fit; size_t new_free_space_size = 0; BOOL can_fit = FALSE; size_t pad = 0; for (i = 0; i < free_space_count; i++) { size_t free_space_size = 0; pad = 0; if (bucket_free_space[i].is_plug) { mark* m = (mark*)(bucket_free_space[i].start); uint8_t* plug_free_space_start = pinned_plug (m) - pinned_len (m); if (!((old_loc == 0) || same_large_alignment_p (old_loc, plug_free_space_start))) { pad = switch_alignment_size (FALSE); } plug_size = saved_plug_size + pad; free_space_size = pinned_len (m); new_address = pinned_plug (m) - pinned_len (m); if (free_space_size >= (plug_size + Align (min_obj_size)) || free_space_size == plug_size) { new_free_space_size = free_space_size - plug_size; pinned_len (m) = new_free_space_size; #ifdef SIMPLE_DPRINTF dprintf (SEG_REUSE_LOG_0, ("[%d]FP: 0x%p->0x%p(%zx)(%zx), [0x%p (2^%d) -> [0x%p (2^%d)", heap_num, old_loc, new_address, (plug_size - pad), pad, pinned_plug (m), index_of_highest_set_bit (free_space_size), (pinned_plug (m) - pinned_len (m)), index_of_highest_set_bit (new_free_space_size))); #endif //SIMPLE_DPRINTF if (pad != 0) { set_node_realigned (old_loc); } can_fit = TRUE; } } else { heap_segment* seg = (heap_segment*)(bucket_free_space[i].start); free_space_size = heap_segment_committed (seg) - heap_segment_plan_allocated (seg); if (!((old_loc == 0) || same_large_alignment_p (old_loc, heap_segment_plan_allocated (seg)))) { pad = switch_alignment_size (FALSE); } plug_size = saved_plug_size + pad; if (free_space_size >= (plug_size + Align (min_obj_size)) || free_space_size == plug_size) { new_address = heap_segment_plan_allocated (seg); new_free_space_size = free_space_size - plug_size; heap_segment_plan_allocated (seg) = new_address + plug_size; #ifdef SIMPLE_DPRINTF dprintf (SEG_REUSE_LOG_0, ("[%d]FS: 0x%p-> 0x%p(%zd) (2^%d) -> 0x%p (2^%d)", heap_num, old_loc, new_address, (plug_size - pad), index_of_highest_set_bit (free_space_size), heap_segment_plan_allocated (seg), index_of_highest_set_bit (new_free_space_size))); #endif //SIMPLE_DPRINTF if (pad != 0) set_node_realigned (old_loc); can_fit = TRUE; } } if (can_fit) { break; } } if (!can_fit) { assert (chosen_power2 == 0); chosen_power2 = 1; goto retry; } new_address += pad; assert ((chosen_power2 && (i == 0)) || ((!chosen_power2) && (i < free_space_count))); int new_bucket_power2 = index_of_highest_set_bit (new_free_space_size); if (new_bucket_power2 < base_power2) { new_bucket_power2 = base_power2; } move_bucket (chosen_power2, new_bucket_power2 - base_power2); //dump(); return new_address; } void cleanup () { if (free_space_buckets) { delete [] free_space_buckets; } if (seg_free_space_array) { delete [] seg_free_space_array; } } }; #endif //!USE_REGIONS #define marked(i) header(i)->IsMarked() #define set_marked(i) header(i)->SetMarked() #define clear_marked(i) header(i)->ClearMarked() #define pinned(i) header(i)->IsPinned() #define set_pinned(i) header(i)->SetPinned() #define clear_pinned(i) header(i)->GetHeader()->ClrGCBit(); inline size_t my_get_size (Object* ob) { MethodTable* mT = header(ob)->GetMethodTable(); return (mT->GetBaseSize() + (mT->HasComponentSize() ? ((size_t)((CObjectHeader*)ob)->GetNumComponents() * mT->RawGetComponentSize()) : 0)); } #define size(i) my_get_size (header(i)) #define contain_pointers(i) header(i)->ContainsGCPointers() #ifdef COLLECTIBLE_CLASS #define contain_pointers_or_collectible(i) header(i)->ContainsGCPointersOrCollectible() #define get_class_object(i) GCToEEInterface::GetLoaderAllocatorObjectForGC((Object *)i) #define is_collectible(i) method_table(i)->Collectible() #else //COLLECTIBLE_CLASS #define contain_pointers_or_collectible(i) header(i)->ContainsGCPointers() #endif //COLLECTIBLE_CLASS #ifdef USE_REGIONS static GCSpinLock write_barrier_spin_lock; #endif //USE_REGIONS #ifdef WRITE_WATCH uint8_t* g_addresses [array_size+2]; // to get around the bug in GetWriteWatch #ifdef BACKGROUND_GC const size_t ww_reset_quantum = 128*1024*1024; #endif //BACKGROUND_GC #endif //WRITE_WATCH #ifdef BACKGROUND_GC void gc_heap::restart_vm() { //assert (generation_allocation_pointer (youngest_generation) == 0); dprintf (3, ("Restarting EE")); STRESS_LOG0(LF_GC, LL_INFO10000, "Concurrent GC: Restarting EE\n"); ee_proceed_event.Set(); } #endif //BACKGROUND_GC void gc_heap::suspend_EE () { dprintf (2, ("suspend_EE")); GCToEEInterface::SuspendEE (SUSPEND_FOR_GC_PREP); } void gc_heap::restart_EE () { dprintf (2, ("restart_EE")); GCToEEInterface::RestartEE (FALSE); } //Initializes PER_HEAP_ISOLATED data members. int gc_heap::init_semi_shared() { int ret = 0; #ifdef BGC_SERVO_TUNING uint32_t current_memory_load = 0; uint32_t sweep_flr_goal = 0; uint32_t sweep_flr_goal_loh = 0; #endif //BGC_SERVO_TUNING #ifndef USE_REGIONS // This is used for heap expansion - it's to fix exactly the start for gen 0 // through (max_generation-1). When we expand the heap we allocate all these // gen starts at the beginning of the new ephemeral seg. eph_gen_starts_size = (Align (min_obj_size)) * max_generation; #endif //!USE_REGIONS #ifdef MULTIPLE_HEAPS mark_list_size = min ((size_t)100*1024, max ((size_t)8192, soh_segment_size/(2*10*32))); #ifdef DYNAMIC_HEAP_COUNT if (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes) { // we'll actually start with one heap in this case g_mark_list_total_size = mark_list_size; } else #endif //DYNAMIC_HEAP_COUNT { g_mark_list_total_size = mark_list_size*n_heaps; } g_mark_list = make_mark_list (g_mark_list_total_size); min_balance_threshold = alloc_quantum_balance_units * CLR_SIZE * 2; g_mark_list_copy = make_mark_list (g_mark_list_total_size); if (!g_mark_list_copy) { goto cleanup; } #else //MULTIPLE_HEAPS mark_list_size = min((size_t)100*1024, max ((size_t)8192, soh_segment_size/(64*32))); g_mark_list_total_size = mark_list_size; g_mark_list = make_mark_list (mark_list_size); #endif //MULTIPLE_HEAPS dprintf (3, ("mark_list_size: %zd", mark_list_size)); if (!g_mark_list) { goto cleanup; } #ifdef MULTIPLE_HEAPS // gradual decommit: set size to some reasonable value per time interval max_decommit_step_size = ((DECOMMIT_SIZE_PER_MILLISECOND * DECOMMIT_TIME_STEP_MILLISECONDS) / n_heaps); // but do at least MIN_DECOMMIT_SIZE per step to make the OS call worthwhile max_decommit_step_size = max (max_decommit_step_size, MIN_DECOMMIT_SIZE); #endif //MULTIPLE_HEAPS #ifdef FEATURE_BASICFREEZE seg_table = sorted_table::make_sorted_table(); if (!seg_table) goto cleanup; #endif //FEATURE_BASICFREEZE #ifndef USE_REGIONS segment_standby_list = 0; #endif //USE_REGIONS if (!full_gc_approach_event.CreateManualEventNoThrow(FALSE)) { goto cleanup; } if (!full_gc_end_event.CreateManualEventNoThrow(FALSE)) { goto cleanup; } fgn_loh_percent = 0; full_gc_approach_event_set = false; memset (full_gc_counts, 0, sizeof (full_gc_counts)); #ifndef USE_REGIONS should_expand_in_full_gc = FALSE; #endif //!USE_REGIONS #ifdef FEATURE_LOH_COMPACTION loh_compaction_always_p = GCConfig::GetLOHCompactionMode() != 0; loh_compaction_mode = loh_compaction_default; #endif //FEATURE_LOH_COMPACTION #ifdef BGC_SERVO_TUNING memset (bgc_tuning::gen_calc, 0, sizeof (bgc_tuning::gen_calc)); memset (bgc_tuning::gen_stats, 0, sizeof (bgc_tuning::gen_stats)); memset (bgc_tuning::current_bgc_end_data, 0, sizeof (bgc_tuning::current_bgc_end_data)); // for the outer loop - the ML (memory load) loop bgc_tuning::enable_fl_tuning = (GCConfig::GetBGCFLTuningEnabled() != 0); bgc_tuning::memory_load_goal = (uint32_t)GCConfig::GetBGCMemGoal(); bgc_tuning::memory_load_goal_slack = (uint32_t)GCConfig::GetBGCMemGoalSlack(); bgc_tuning::ml_kp = (double)GCConfig::GetBGCMLkp() / 1000.0; bgc_tuning::ml_ki = (double)GCConfig::GetBGCMLki() / 1000.0; bgc_tuning::ratio_correction_step = (double)GCConfig::GetBGCG2RatioStep() / 100.0; // for the inner loop - the alloc loop which calculates the allocated bytes in gen2 before // triggering the next BGC. bgc_tuning::above_goal_kp = (double)GCConfig::GetBGCFLkp() / 1000000.0; bgc_tuning::enable_ki = (GCConfig::GetBGCFLEnableKi() != 0); bgc_tuning::above_goal_ki = (double)GCConfig::GetBGCFLki() / 1000000.0; bgc_tuning::enable_kd = (GCConfig::GetBGCFLEnableKd() != 0); bgc_tuning::above_goal_kd = (double)GCConfig::GetBGCFLkd() / 100.0; bgc_tuning::enable_smooth = (GCConfig::GetBGCFLEnableSmooth() != 0); bgc_tuning::num_gen1s_smooth_factor = (double)GCConfig::GetBGCFLSmoothFactor() / 100.0; bgc_tuning::enable_tbh = (GCConfig::GetBGCFLEnableTBH() != 0); bgc_tuning::enable_ff = (GCConfig::GetBGCFLEnableFF() != 0); bgc_tuning::above_goal_ff = (double)GCConfig::GetBGCFLff() / 100.0; bgc_tuning::enable_gradual_d = (GCConfig::GetBGCFLGradualD() != 0); sweep_flr_goal = (uint32_t)GCConfig::GetBGCFLSweepGoal(); sweep_flr_goal_loh = (uint32_t)GCConfig::GetBGCFLSweepGoalLOH(); bgc_tuning::gen_calc[0].sweep_flr_goal = ((sweep_flr_goal == 0) ? 20.0 : (double)sweep_flr_goal); bgc_tuning::gen_calc[1].sweep_flr_goal = ((sweep_flr_goal_loh == 0) ? 20.0 : (double)sweep_flr_goal_loh); bgc_tuning::available_memory_goal = (uint64_t)((double)gc_heap::total_physical_mem * (double)(100 - bgc_tuning::memory_load_goal) / 100); get_memory_info (¤t_memory_load); dprintf (BGC_TUNING_LOG, ("BTL tuning %s!!!", (bgc_tuning::enable_fl_tuning ? "enabled" : "disabled"))); #ifdef SIMPLE_DPRINTF dprintf (BGC_TUNING_LOG, ("BTL tuning parameters: mem goal: %d%%(%zd), +/-%d%%, gen2 correction factor: %.2f, sweep flr goal: %d%%, smooth factor: %.3f(%s), TBH: %s, FF: %.3f(%s), ml: kp %.5f, ki %.10f", bgc_tuning::memory_load_goal, bgc_tuning::available_memory_goal, bgc_tuning::memory_load_goal_slack, bgc_tuning::ratio_correction_step, (int)bgc_tuning::gen_calc[0].sweep_flr_goal, bgc_tuning::num_gen1s_smooth_factor, (bgc_tuning::enable_smooth ? "enabled" : "disabled"), (bgc_tuning::enable_tbh ? "enabled" : "disabled"), bgc_tuning::above_goal_ff, (bgc_tuning::enable_ff ? "enabled" : "disabled"), bgc_tuning::ml_kp, bgc_tuning::ml_ki)); dprintf (BGC_TUNING_LOG, ("BTL tuning parameters: kp: %.5f, ki: %.5f (%s), kd: %.3f (kd-%s, gd-%s), ff: %.3f", bgc_tuning::above_goal_kp, bgc_tuning::above_goal_ki, (bgc_tuning::enable_ki ? "enabled" : "disabled"), bgc_tuning::above_goal_kd, (bgc_tuning::enable_kd ? "enabled" : "disabled"), (bgc_tuning::enable_gradual_d ? "enabled" : "disabled"), bgc_tuning::above_goal_ff)); #endif //SIMPLE_DPRINTF if (bgc_tuning::enable_fl_tuning && (current_memory_load < bgc_tuning::memory_load_goal)) { uint32_t distance_to_goal = bgc_tuning::memory_load_goal - current_memory_load; bgc_tuning::stepping_interval = max (distance_to_goal / 10, 1u); bgc_tuning::last_stepping_mem_load = current_memory_load; bgc_tuning::last_stepping_bgc_count = 0; dprintf (BGC_TUNING_LOG, ("current ml: %d, %d to goal, interval: %d", current_memory_load, distance_to_goal, bgc_tuning::stepping_interval)); } else { dprintf (BGC_TUNING_LOG, ("current ml: %d, >= goal: %d, disable stepping", current_memory_load, bgc_tuning::memory_load_goal)); bgc_tuning::use_stepping_trigger_p = false; } #endif //BGC_SERVO_TUNING #ifdef BACKGROUND_GC memset (ephemeral_fgc_counts, 0, sizeof (ephemeral_fgc_counts)); bgc_alloc_spin_count = static_cast(GCConfig::GetBGCSpinCount()); bgc_alloc_spin = static_cast(GCConfig::GetBGCSpin()); { int number_bgc_threads = get_num_heaps(); if (!create_bgc_threads_support (number_bgc_threads)) { goto cleanup; } } #endif //BACKGROUND_GC memset (¤t_no_gc_region_info, 0, sizeof (current_no_gc_region_info)); #ifdef GC_CONFIG_DRIVEN compact_or_sweep_gcs[0] = 0; compact_or_sweep_gcs[1] = 0; #endif //GC_CONFIG_DRIVEN #if defined(SHORT_PLUGS) && !defined(USE_REGIONS) short_plugs_pad_ratio = (double)DESIRED_PLUG_LENGTH / (double)(DESIRED_PLUG_LENGTH - Align (min_obj_size)); #endif //SHORT_PLUGS && !USE_REGIONS generation_skip_ratio_threshold = (int)GCConfig::GetGCLowSkipRatio(); #ifdef FEATURE_EVENT_TRACE gc_time_info = new (nothrow) uint64_t[max_compact_time_type]; if (!gc_time_info) { goto cleanup; } #ifdef BACKGROUND_GC bgc_time_info = new (nothrow) uint64_t[max_bgc_time_type]; if (!bgc_time_info) { goto cleanup; } #endif //BACKGROUND_GC #ifdef FEATURE_LOH_COMPACTION loh_compact_info = new (nothrow) etw_loh_compact_info [get_num_heaps()]; if (!loh_compact_info) { goto cleanup; } #endif //FEATURE_LOH_COMPACTION #endif //FEATURE_EVENT_TRACE reset_mm_p = TRUE; ret = 1; cleanup: if (!ret) { if (full_gc_approach_event.IsValid()) { full_gc_approach_event.CloseEvent(); } if (full_gc_end_event.IsValid()) { full_gc_end_event.CloseEvent(); } } return ret; } uint32_t gc_heap::wait_for_gc_done(int32_t timeOut) { bool cooperative_mode = enable_preemptive (); uint32_t dwWaitResult = NOERROR; gc_heap* wait_heap = NULL; while (gc_heap::gc_started) { #ifdef MULTIPLE_HEAPS wait_heap = g_heaps[heap_select::select_heap(NULL)]; dprintf(2, ("waiting for the gc_done_event on heap %d", wait_heap->heap_number)); #endif // MULTIPLE_HEAPS dwWaitResult = wait_heap->gc_done_event.Wait(timeOut, FALSE); } disable_preemptive (cooperative_mode); return dwWaitResult; } void gc_heap::set_gc_done() { enter_gc_done_event_lock(); if (!gc_done_event_set) { gc_done_event_set = true; dprintf (2, ("heap %d: setting gc_done_event", heap_number)); gc_done_event.Set(); } exit_gc_done_event_lock(); } void gc_heap::reset_gc_done() { enter_gc_done_event_lock(); if (gc_done_event_set) { gc_done_event_set = false; dprintf (2, ("heap %d: resetting gc_done_event", heap_number)); gc_done_event.Reset(); } exit_gc_done_event_lock(); } void gc_heap::enter_gc_done_event_lock() { uint32_t dwSwitchCount = 0; retry: if (Interlocked::CompareExchange(&gc_done_event_lock, 0, -1) >= 0) { while (gc_done_event_lock >= 0) { if (g_num_processors > 1) { int spin_count = yp_spin_count_unit; for (int j = 0; j < spin_count; j++) { if (gc_done_event_lock < 0) break; YieldProcessor(); // indicate to the processor that we are spinning } if (gc_done_event_lock >= 0) GCToOSInterface::YieldThread(++dwSwitchCount); } else GCToOSInterface::YieldThread(++dwSwitchCount); } goto retry; } } void gc_heap::exit_gc_done_event_lock() { gc_done_event_lock = -1; } #ifndef MULTIPLE_HEAPS #ifdef RECORD_LOH_STATE int gc_heap::loh_state_index = 0; gc_heap::loh_state_info gc_heap::last_loh_states[max_saved_loh_states]; #endif //RECORD_LOH_STATE VOLATILE(int32_t) gc_heap::gc_done_event_lock; VOLATILE(bool) gc_heap::gc_done_event_set; GCEvent gc_heap::gc_done_event; #endif //!MULTIPLE_HEAPS VOLATILE(bool) gc_heap::internal_gc_done; int gc_heap::init_gc_heap (int h_number) { #ifdef MULTIPLE_HEAPS #ifdef _DEBUG memset (committed_by_oh_per_heap, 0, sizeof (committed_by_oh_per_heap)); #endif //_DEBUG g_heaps [h_number] = this; time_bgc_last = 0; #ifdef SPINLOCK_HISTORY spinlock_info_index = 0; memset (last_spinlock_info, 0, sizeof(last_spinlock_info)); #endif //SPINLOCK_HISTORY // initialize per heap members. #ifndef USE_REGIONS ephemeral_low = (uint8_t*)1; ephemeral_high = MAX_PTR; #endif //!USE_REGIONS gc_low = 0; gc_high = 0; ephemeral_heap_segment = 0; oomhist_index_per_heap = 0; freeable_uoh_segment = 0; condemned_generation_num = 0; blocking_collection = FALSE; generation_skip_ratio = 100; #ifdef FEATURE_CARD_MARKING_STEALING n_eph_soh = 0; n_gen_soh = 0; n_eph_loh = 0; n_gen_loh = 0; #endif //FEATURE_CARD_MARKING_STEALING mark_stack_tos = 0; mark_stack_bos = 0; mark_stack_array_length = 0; mark_stack_array = 0; #if defined (_DEBUG) && defined (VERIFY_HEAP) verify_pinned_queue_p = FALSE; #endif // _DEBUG && VERIFY_HEAP #ifdef FEATURE_LOH_COMPACTION loh_pinned_queue_tos = 0; loh_pinned_queue_bos = 0; loh_pinned_queue_length = 0; loh_pinned_queue_decay = LOH_PIN_DECAY; loh_pinned_queue = 0; #endif //FEATURE_LOH_COMPACTION min_overflow_address = MAX_PTR; max_overflow_address = 0; gen0_bricks_cleared = FALSE; gen0_must_clear_bricks = 0; allocation_quantum = CLR_SIZE; more_space_lock_soh = gc_lock; more_space_lock_uoh = gc_lock; loh_alloc_since_cg = 0; #ifndef USE_REGIONS new_heap_segment = NULL; ro_segments_in_range = FALSE; #endif //!USE_REGIONS gen0_allocated_after_gc_p = false; #ifdef RECORD_LOH_STATE loh_state_index = 0; #endif //RECORD_LOH_STATE #ifdef USE_REGIONS new_gen0_regions_in_plns = 0; new_regions_in_prr = 0; new_regions_in_threading = 0; special_sweep_p = false; #endif //USE_REGIONS #endif //MULTIPLE_HEAPS #ifdef MULTIPLE_HEAPS if (h_number > n_heaps) { assert (!"Number of heaps exceeded"); return 0; } heap_number = h_number; #endif //MULTIPLE_HEAPS memset (etw_allocation_running_amount, 0, sizeof (etw_allocation_running_amount)); memset (allocated_since_last_gc, 0, sizeof (allocated_since_last_gc)); memset (&oom_info, 0, sizeof (oom_info)); memset (&fgm_result, 0, sizeof (fgm_result)); memset (oomhist_per_heap, 0, sizeof (oomhist_per_heap)); if (!gc_done_event.CreateManualEventNoThrow(FALSE)) { return 0; } gc_done_event_lock = -1; gc_done_event_set = false; #ifdef DYNAMIC_HEAP_COUNT hchist_index_per_heap = 0; memset (hchist_per_heap, 0, sizeof (hchist_per_heap)); #ifdef BACKGROUND_GC bgc_hchist_index_per_heap = 0; memset (bgc_hchist_per_heap, 0, sizeof (bgc_hchist_per_heap)); #endif //BACKGROUND_GC if (h_number != 0) { if (!gc_idle_thread_event.CreateAutoEventNoThrow (FALSE)) { return 0; } #ifdef BACKGROUND_GC if (!bgc_idle_thread_event.CreateAutoEventNoThrow (FALSE)) { return 0; } #endif //BACKGROUND_GC dprintf (9999, ("creating idle events for h%d", h_number)); } #endif //DYNAMIC_HEAP_COUNT if (!init_dynamic_data()) { return 0; } uint32_t* ct = &g_gc_card_table [card_word (card_of (g_gc_lowest_address))]; own_card_table (ct); card_table = translate_card_table (ct); brick_table = card_table_brick_table (ct); highest_address = card_table_highest_address (ct); lowest_address = card_table_lowest_address (ct); #ifdef CARD_BUNDLE card_bundle_table = translate_card_bundle_table (card_table_card_bundle_table (ct), g_gc_lowest_address); assert (&card_bundle_table [card_bundle_word (cardw_card_bundle (card_word (card_of (g_gc_lowest_address))))] == card_table_card_bundle_table (ct)); #endif //CARD_BUNDLE #ifdef BACKGROUND_GC background_saved_highest_address = nullptr; background_saved_lowest_address = nullptr; if (gc_can_use_concurrent) mark_array = translate_mark_array (card_table_mark_array (&g_gc_card_table[card_word (card_of (g_gc_lowest_address))])); else mark_array = NULL; #endif //BACKGROUND_GC #ifdef USE_REGIONS #ifdef STRESS_REGIONS // Handle table APIs expect coop so we temporarily switch to coop. disable_preemptive (true); pinning_handles_for_alloc = new (nothrow) (OBJECTHANDLE[PINNING_HANDLE_INITIAL_LENGTH]); for (int i = 0; i < PINNING_HANDLE_INITIAL_LENGTH; i++) { pinning_handles_for_alloc[i] = g_gcGlobalHandleStore->CreateHandleOfType (0, HNDTYPE_PINNED); } enable_preemptive(); ph_index_per_heap = 0; pinning_seg_interval = 2; num_gen0_regions = 0; sip_seg_interval = 2; sip_seg_maxgen_interval = 3; num_condemned_regions = 0; #endif //STRESS_REGIONS end_gen0_region_space = 0; end_gen0_region_committed_space = 0; gen0_pinned_free_space = 0; gen0_large_chunk_found = false; // REGIONS PERF TODO: we should really allocate the POH regions together just so that // they wouldn't prevent us from coalescing free regions to form a large virtual address // range. if (!initial_make_soh_regions (__this) || !initial_make_uoh_regions (loh_generation, __this) || !initial_make_uoh_regions (poh_generation, __this)) { return 0; } #else //USE_REGIONS heap_segment* seg = make_initial_segment (soh_gen0, h_number, __this); if (!seg) return 0; FIRE_EVENT(GCCreateSegment_V1, heap_segment_mem(seg), (size_t)(heap_segment_reserved (seg) - heap_segment_mem(seg)), gc_etw_segment_small_object_heap); seg_mapping_table_add_segment (seg, __this); #ifdef MULTIPLE_HEAPS assert (heap_segment_heap (seg) == __this); #endif //MULTIPLE_HEAPS uint8_t* start = heap_segment_mem (seg); for (int i = max_generation; i >= 0; i--) { make_generation (i, seg, start); start += Align (min_obj_size); } heap_segment_allocated (seg) = start; alloc_allocated = start; heap_segment_used (seg) = start - plug_skew; ephemeral_heap_segment = seg; // Create segments for the large and pinned generations heap_segment* lseg = make_initial_segment(loh_generation, h_number, __this); if (!lseg) return 0; lseg->flags |= heap_segment_flags_loh; FIRE_EVENT(GCCreateSegment_V1, heap_segment_mem(lseg), (size_t)(heap_segment_reserved (lseg) - heap_segment_mem(lseg)), gc_etw_segment_large_object_heap); heap_segment* pseg = make_initial_segment (poh_generation, h_number, __this); if (!pseg) return 0; pseg->flags |= heap_segment_flags_poh; FIRE_EVENT(GCCreateSegment_V1, heap_segment_mem(pseg), (size_t)(heap_segment_reserved (pseg) - heap_segment_mem(pseg)), gc_etw_segment_pinned_object_heap); seg_mapping_table_add_segment (lseg, __this); seg_mapping_table_add_segment (pseg, __this); make_generation (loh_generation, lseg, heap_segment_mem (lseg)); make_generation (poh_generation, pseg, heap_segment_mem (pseg)); heap_segment_allocated (lseg) = heap_segment_mem (lseg) + Align (min_obj_size, get_alignment_constant (FALSE)); heap_segment_used (lseg) = heap_segment_allocated (lseg) - plug_skew; heap_segment_allocated (pseg) = heap_segment_mem (pseg) + Align (min_obj_size, get_alignment_constant (FALSE)); heap_segment_used (pseg) = heap_segment_allocated (pseg) - plug_skew; for (int gen_num = 0; gen_num < total_generation_count; gen_num++) { generation* gen = generation_of (gen_num); make_unused_array (generation_allocation_start (gen), Align (min_obj_size)); } #ifdef MULTIPLE_HEAPS assert (heap_segment_heap (lseg) == __this); assert (heap_segment_heap (pseg) == __this); #endif //MULTIPLE_HEAPS #endif //USE_REGIONS #ifdef MULTIPLE_HEAPS //initialize the alloc context heap generation_alloc_context (generation_of (soh_gen0))->set_alloc_heap(vm_heap); generation_alloc_context (generation_of (loh_generation))->set_alloc_heap(vm_heap); generation_alloc_context (generation_of (poh_generation))->set_alloc_heap(vm_heap); #endif //MULTIPLE_HEAPS generation_of (max_generation)->free_list_allocator = allocator(NUM_GEN2_ALIST, BASE_GEN2_ALIST_BITS, gen2_alloc_list, max_generation); generation_of (loh_generation)->free_list_allocator = allocator(NUM_LOH_ALIST, BASE_LOH_ALIST_BITS, loh_alloc_list); generation_of (poh_generation)->free_list_allocator = allocator(NUM_POH_ALIST, BASE_POH_ALIST_BITS, poh_alloc_list); total_alloc_bytes_soh = 0; total_alloc_bytes_uoh = 0; //needs to be done after the dynamic data has been initialized #ifdef MULTIPLE_HEAPS #ifdef STRESS_DYNAMIC_HEAP_COUNT uoh_msl_before_gc_p = false; #endif //STRESS_DYNAMIC_HEAP_COUNT #else //MULTIPLE_HEAPS allocation_running_amount = dd_min_size (dynamic_data_of (0)); #endif //!MULTIPLE_HEAPS fgn_maxgen_percent = 0; fgn_last_alloc = dd_min_size (dynamic_data_of (0)); mark* arr = new (nothrow) (mark [MARK_STACK_INITIAL_LENGTH]); if (!arr) return 0; make_mark_stack(arr); #ifdef BACKGROUND_GC for (int i = uoh_start_generation; i < total_generation_count; i++) { uoh_a_no_bgc[i - uoh_start_generation] = 0; uoh_a_bgc_marking[i - uoh_start_generation] = 0; uoh_a_bgc_planning[i - uoh_start_generation] = 0; } #ifdef BGC_SERVO_TUNING bgc_maxgen_end_fl_size = 0; #endif //BGC_SERVO_TUNING freeable_soh_segment = 0; gchist_index_per_heap = 0; if (gc_can_use_concurrent) { uint8_t** b_arr = new (nothrow) (uint8_t * [MARK_STACK_INITIAL_LENGTH]); if (!b_arr) return 0; make_background_mark_stack(b_arr); } #endif //BACKGROUND_GC #ifndef USE_REGIONS ephemeral_low = generation_allocation_start(generation_of(max_generation - 1)); ephemeral_high = heap_segment_reserved(ephemeral_heap_segment); #endif //!USE_REGIONS if (heap_number == 0) { stomp_write_barrier_initialize( #if defined(USE_REGIONS) ephemeral_low, ephemeral_high, map_region_to_generation_skewed, (uint8_t)min_segment_size_shr #elif defined(MULTIPLE_HEAPS) reinterpret_cast(1), reinterpret_cast(~0) #else ephemeral_low, ephemeral_high #endif //MULTIPLE_HEAPS || USE_REGIONS ); } #ifdef MULTIPLE_HEAPS if (!create_gc_thread ()) return 0; #endif //MULTIPLE_HEAPS #ifdef FEATURE_PREMORTEM_FINALIZATION HRESULT hr = AllocateCFinalize(&finalize_queue); if (FAILED(hr)) return 0; #endif // FEATURE_PREMORTEM_FINALIZATION #ifdef USE_REGIONS #ifdef MULTIPLE_HEAPS min_fl_list = 0; num_fl_items_rethreaded_stage2 = 0; free_list_space_per_heap = nullptr; #endif //MULTIPLE_HEAPS #else //USE_REGIONS max_free_space_items = MAX_NUM_FREE_SPACES; bestfit_seg = new (nothrow) seg_free_spaces (heap_number); if (!bestfit_seg) { return 0; } if (!bestfit_seg->alloc()) { return 0; } #endif //USE_REGIONS last_gc_before_oom = FALSE; sufficient_gen0_space_p = FALSE; #ifdef MULTIPLE_HEAPS #ifdef HEAP_ANALYZE heap_analyze_success = TRUE; internal_root_array = 0; internal_root_array_index = 0; internal_root_array_length = initial_internal_roots; current_obj = 0; current_obj_size = 0; #endif //HEAP_ANALYZE #endif // MULTIPLE_HEAPS #ifdef BACKGROUND_GC bgc_thread_id.Clear(); if (!create_bgc_thread_support()) { return 0; } bgc_alloc_lock = new (nothrow) exclusive_sync; if (!bgc_alloc_lock) { return 0; } bgc_alloc_lock->init(); bgc_thread_running = 0; bgc_thread = 0; bgc_threads_timeout_cs.Initialize(); current_bgc_state = bgc_not_in_process; background_soh_alloc_count = 0; bgc_overflow_count = 0; for (int i = uoh_start_generation; i < total_generation_count; i++) { end_uoh_size[i - uoh_start_generation] = dd_min_size (dynamic_data_of (i)); } current_sweep_pos = 0; #ifdef DOUBLY_LINKED_FL current_sweep_seg = 0; #endif //DOUBLY_LINKED_FL #endif //BACKGROUND_GC #ifdef GC_CONFIG_DRIVEN memset(interesting_data_per_heap, 0, sizeof (interesting_data_per_heap)); memset(compact_reasons_per_heap, 0, sizeof (compact_reasons_per_heap)); memset(expand_mechanisms_per_heap, 0, sizeof (expand_mechanisms_per_heap)); memset(interesting_mechanism_bits_per_heap, 0, sizeof (interesting_mechanism_bits_per_heap)); #endif //GC_CONFIG_DRIVEN return 1; } void gc_heap::destroy_semi_shared() { //TODO: will need to move this to per heap //#ifdef BACKGROUND_GC // if (c_mark_list) // delete c_mark_list; //#endif //BACKGROUND_GC if (g_mark_list) delete[] g_mark_list; #ifdef FEATURE_BASICFREEZE //destroy the segment map seg_table->delete_sorted_table(); delete[] (char*)seg_table; #endif //FEATURE_BASICFREEZE } void gc_heap::self_destroy() { #ifdef BACKGROUND_GC kill_gc_thread(); #endif //BACKGROUND_GC if (gc_done_event.IsValid()) { gc_done_event.CloseEvent(); } // destroy every segment for (int i = get_start_generation_index(); i < total_generation_count; i++) { heap_segment* seg = heap_segment_rw (generation_start_segment (generation_of (i))); _ASSERTE(seg != NULL); while (seg) { heap_segment* next_seg = heap_segment_next_rw (seg); delete_heap_segment (seg); seg = next_seg; } } // get rid of the card table release_card_table (card_table); // destroy the mark stack delete[] mark_stack_array; #ifdef FEATURE_PREMORTEM_FINALIZATION if (finalize_queue) delete finalize_queue; #endif // FEATURE_PREMORTEM_FINALIZATION } void gc_heap::destroy_gc_heap(gc_heap* heap) { heap->self_destroy(); delete heap; } enum { CORINFO_EXCEPTION_GC = 0xE0004743 // 'GC' }; #define mark_stack_empty_p() (mark_stack_base == mark_stack_tos) #ifdef USE_REGIONS #ifdef DYNAMIC_HEAP_COUNT // check that the fields of a decommissioned heap have their expected values, // i.e. were not inadvertently modified #define DECOMMISSIONED_VALUE 0xdec0dec0dec0dec0 static const size_t DECOMMISSIONED_SIZE_T = DECOMMISSIONED_VALUE; static const ptrdiff_t DECOMMISSIONED_PTRDIFF_T = (ptrdiff_t)DECOMMISSIONED_VALUE; static const ptrdiff_t DECOMMISSIONED_UINT64_T = (uint64_t)DECOMMISSIONED_VALUE; static uint8_t* const DECOMMISSIONED_UINT8_T_P = (uint8_t*)DECOMMISSIONED_VALUE; static uint8_t** const DECOMMISSIONED_UINT8_T_PP = (uint8_t**)DECOMMISSIONED_VALUE; static PTR_heap_segment const DECOMMISSIONED_REGION_P = (PTR_heap_segment)DECOMMISSIONED_VALUE; static mark* const DECOMMISSIONED_MARK_P = (mark*)DECOMMISSIONED_VALUE; static const BOOL DECOMMISSIONED_BOOL = 0xdec0dec0; static const BOOL DECOMMISSIONED_INT = (int)0xdec0dec0; static const float DECOMMISSIONED_FLOAT = (float)DECOMMISSIONED_VALUE; static const ptrdiff_t UNINITIALIZED_VALUE = 0xbaadbaadbaadbaad; float log_with_base (float x, float base) { assert (x > base); return (float)(log(x) / log(base)); } float mean (float* arr, int size) { float sum = 0.0; for (int i = 0; i < size; i++) { sum += arr[i]; } return (sum / size); } // Change it to a desired number if you want to print. int max_times_to_print_tcp = 0; // Return the slope, and the average values in the avg arg. float gc_heap::dynamic_heap_count_data_t::slope (float* y, int n, float* avg) { assert (n > 0); if (n == 1) { dprintf (6666, ("only 1 tcp: %.3f, no slope", y[0])); *avg = y[0]; return 0.0; } int sum_x = 0; for (int i = 0; i < n; i++) { sum_x += i; if (max_times_to_print_tcp >= 0) { dprintf (6666, ("%.3f, ", y[i])); } } float avg_x = (float)sum_x / n; float avg_y = mean (y, n); *avg = avg_y; float numerator = 0.0; float denominator = 0.0; for (int i = 0; i < n; ++i) { numerator += ((float)i - avg_x) * (y[i] - avg_y); denominator += ((float)i - avg_x) * (i - avg_x); } max_times_to_print_tcp--; return (numerator / denominator); } #endif //DYNAMIC_HEAP_COUNT #endif //USE_REGIONS #ifdef MULTIPLE_HEAPS #ifdef GC_CONFIG_DRIVEN #define m_boundary(o) {if (mark_list_index <= mark_list_end) {*mark_list_index = o;mark_list_index++;} else {mark_list_index++;}} #else //GC_CONFIG_DRIVEN #define m_boundary(o) {if (mark_list_index <= mark_list_end) {*mark_list_index = o;mark_list_index++;}} #endif //GC_CONFIG_DRIVEN #define m_boundary_fullgc(o) {} #else //MULTIPLE_HEAPS #ifdef GC_CONFIG_DRIVEN #define m_boundary(o) {if (mark_list_index <= mark_list_end) {*mark_list_index = o;mark_list_index++;} else {mark_list_index++;} if (slow > o) slow = o; if (shigh < o) shigh = o;} #else #define m_boundary(o) {if (mark_list_index <= mark_list_end) {*mark_list_index = o;mark_list_index++;}if (slow > o) slow = o; if (shigh < o) shigh = o;} #endif //GC_CONFIG_DRIVEN #define m_boundary_fullgc(o) {if (slow > o) slow = o; if (shigh < o) shigh = o;} #endif //MULTIPLE_HEAPS #ifdef USE_REGIONS inline bool is_in_heap_range (uint8_t* o) { #ifdef FEATURE_BASICFREEZE // we may have frozen objects in read only segments // outside of the reserved address range of the gc heap assert (((g_gc_lowest_address <= o) && (o < g_gc_highest_address)) || (o == nullptr) || (ro_segment_lookup (o) != nullptr)); return ((g_gc_lowest_address <= o) && (o < g_gc_highest_address)); #else //FEATURE_BASICFREEZE // without frozen objects, every non-null pointer must be // within the heap assert ((o == nullptr) || (g_gc_lowest_address <= o) && (o < g_gc_highest_address)); return (o != nullptr); #endif //FEATURE_BASICFREEZE } #endif //USE_REGIONS #define new_start() {if (ppstop <= start) {break;} else {parm = start}} #define ignore_start 0 #define use_start 1 #define go_through_object(mt,o,size,parm,start,start_useful,limit,exp) \ { \ CGCDesc* map = CGCDesc::GetCGCDescFromMT((MethodTable*)(mt)); \ CGCDescSeries* cur = map->GetHighestSeries(); \ ptrdiff_t cnt = (ptrdiff_t) map->GetNumSeries(); \ \ if (cnt >= 0) \ { \ CGCDescSeries* last = map->GetLowestSeries(); \ uint8_t** parm = 0; \ do \ { \ assert (parm <= (uint8_t**)((o) + cur->GetSeriesOffset())); \ parm = (uint8_t**)((o) + cur->GetSeriesOffset()); \ uint8_t** ppstop = \ (uint8_t**)((uint8_t*)parm + cur->GetSeriesSize() + (size));\ if (!start_useful || (uint8_t*)ppstop > (start)) \ { \ if (start_useful && (uint8_t*)parm < (start)) parm = (uint8_t**)(start);\ while (parm < ppstop) \ { \ {exp} \ parm++; \ } \ } \ cur--; \ \ } while (cur >= last); \ } \ else \ { \ /* Handle the repeating case - array of valuetypes */ \ uint8_t** parm = (uint8_t**)((o) + cur->startoffset); \ if (start_useful && start > (uint8_t*)parm) \ { \ ptrdiff_t cs = mt->RawGetComponentSize(); \ parm = (uint8_t**)((uint8_t*)parm + (((start) - (uint8_t*)parm)/cs)*cs); \ } \ while ((uint8_t*)parm < ((o)+(size)-plug_skew)) \ { \ for (ptrdiff_t __i = 0; __i > cnt; __i--) \ { \ HALF_SIZE_T skip = (cur->val_serie + __i)->skip; \ HALF_SIZE_T nptrs = (cur->val_serie + __i)->nptrs; \ uint8_t** ppstop = parm + nptrs; \ if (!start_useful || (uint8_t*)ppstop > (start)) \ { \ if (start_useful && (uint8_t*)parm < (start)) parm = (uint8_t**)(start); \ do \ { \ {exp} \ parm++; \ } while (parm < ppstop); \ } \ parm = (uint8_t**)((uint8_t*)ppstop + skip); \ } \ } \ } \ } #define go_through_object_nostart(mt,o,size,parm,exp) {go_through_object(mt,o,size,parm,o,ignore_start,(o + size),exp); } // 1 thing to note about this macro: // 1) you can use *parm safely but in general you don't want to use parm // because for the collectible types it's not an address on the managed heap. #ifndef COLLECTIBLE_CLASS #define go_through_object_cl(mt,o,size,parm,exp) \ { \ if (header(o)->ContainsGCPointers()) \ { \ go_through_object_nostart(mt,o,size,parm,exp); \ } \ } #else //COLLECTIBLE_CLASS #define go_through_object_cl(mt,o,size,parm,exp) \ { \ if (header(o)->Collectible()) \ { \ uint8_t* class_obj = get_class_object (o); \ uint8_t** parm = &class_obj; \ do {exp} while (false); \ } \ if (header(o)->ContainsGCPointers()) \ { \ go_through_object_nostart(mt,o,size,parm,exp); \ } \ } #endif //COLLECTIBLE_CLASS // enable on processors known to have a useful prefetch instruction #if defined(TARGET_AMD64) || defined(TARGET_X86) || defined(TARGET_ARM64) || defined(TARGET_RISCV64) #define PREFETCH #endif #ifdef PREFETCH inline void Prefetch(void* addr) { #ifdef TARGET_WINDOWS #if defined(TARGET_AMD64) || defined(TARGET_X86) #ifndef _MM_HINT_T0 #define _MM_HINT_T0 1 #endif _mm_prefetch((const char*)addr, _MM_HINT_T0); #elif defined(TARGET_ARM64) __prefetch((const char*)addr); #endif //defined(TARGET_AMD64) || defined(TARGET_X86) #elif defined(TARGET_UNIX) __builtin_prefetch(addr); #else //!(TARGET_WINDOWS || TARGET_UNIX) UNREFERENCED_PARAMETER(addr); #endif //TARGET_WINDOWS } #else //PREFETCH inline void Prefetch (void* addr) { UNREFERENCED_PARAMETER(addr); } #endif //PREFETCH #define stolen 2 #define partial 1 #define partial_object 3 inline BOOL stolen_p (uint8_t* r) { return (((size_t)r&2) && !((size_t)r&1)); } inline BOOL ready_p (uint8_t* r) { return ((size_t)r != 1); } inline BOOL partial_p (uint8_t* r) { return (((size_t)r&1) && !((size_t)r&2)); } inline BOOL straight_ref_p (uint8_t* r) { return (!stolen_p (r) && !partial_p (r)); } inline BOOL partial_object_p (uint8_t* r) { return (((size_t)r & partial_object) == partial_object); } #ifdef MULTIPLE_HEAPS static VOLATILE(BOOL) s_fUnpromotedHandles = FALSE; static VOLATILE(BOOL) s_fUnscannedPromotions = FALSE; static VOLATILE(BOOL) s_fScanRequired; #else //MULTIPLE_HEAPS #endif //MULTIPLE_HEAPS #ifdef FEATURE_STRUCTALIGN // // The word with left child, right child, and align info is laid out as follows: // // | upper short word | lower short word | // |<------------> <----->|<------------> <----->| // | left child info hi| right child info lo| // x86: | 10 bits 6 bits| 10 bits 6 bits| // // where left/right child are signed values and concat(info hi, info lo) is unsigned. // // The "align info" encodes two numbers: the required alignment (a power of two) // and the misalignment (the number of machine words the destination address needs // to be adjusted by to provide alignment - so this number is always smaller than // the required alignment). Thus, the two can be represented as the "logical or" // of the two numbers. Note that the actual pad is computed from the misalignment // by adding the alignment iff the misalignment is non-zero and less than min_obj_size. // // The number of bits in a brick. #if defined (TARGET_AMD64) #define brick_bits (12) #else #define brick_bits (11) #endif //TARGET_AMD64 static_assert(brick_size == (1 << brick_bits)); // The number of bits needed to represent the offset to a child node. // "brick_bits + 1" allows us to represent a signed offset within a brick. #define child_bits (brick_bits + 1 - LOG2_PTRSIZE) // The number of bits in each of the pad hi, pad lo fields. #define pad_bits (sizeof(short) * 8 - child_bits) #define child_from_short(w) (((signed short)(w) / (1 << (pad_bits - LOG2_PTRSIZE))) & ~((1 << LOG2_PTRSIZE) - 1)) #define pad_mask ((1 << pad_bits) - 1) #define pad_from_short(w) ((size_t)(w) & pad_mask) #else // FEATURE_STRUCTALIGN #define child_from_short(w) (w) #endif // FEATURE_STRUCTALIGN inline short node_left_child(uint8_t* node) { return child_from_short(((plug_and_pair*)node)[-1].m_pair.left); } inline void set_node_left_child(uint8_t* node, ptrdiff_t val) { assert (val > -(ptrdiff_t)brick_size); assert (val < (ptrdiff_t)brick_size); assert (Aligned (val)); #ifdef FEATURE_STRUCTALIGN size_t pad = pad_from_short(((plug_and_pair*)node)[-1].m_pair.left); ((plug_and_pair*)node)[-1].m_pair.left = ((short)val << (pad_bits - LOG2_PTRSIZE)) | (short)pad; #else // FEATURE_STRUCTALIGN ((plug_and_pair*)node)[-1].m_pair.left = (short)val; #endif // FEATURE_STRUCTALIGN assert (node_left_child (node) == val); } inline short node_right_child(uint8_t* node) { return child_from_short(((plug_and_pair*)node)[-1].m_pair.right); } inline void set_node_right_child(uint8_t* node, ptrdiff_t val) { assert (val > -(ptrdiff_t)brick_size); assert (val < (ptrdiff_t)brick_size); assert (Aligned (val)); #ifdef FEATURE_STRUCTALIGN size_t pad = pad_from_short(((plug_and_pair*)node)[-1].m_pair.right); ((plug_and_pair*)node)[-1].m_pair.right = ((short)val << (pad_bits - LOG2_PTRSIZE)) | (short)pad; #else // FEATURE_STRUCTALIGN ((plug_and_pair*)node)[-1].m_pair.right = (short)val; #endif // FEATURE_STRUCTALIGN assert (node_right_child (node) == val); } #ifdef FEATURE_STRUCTALIGN void node_aligninfo (uint8_t* node, int& requiredAlignment, ptrdiff_t& pad) { // Extract the single-number aligninfo from the fields. short left = ((plug_and_pair*)node)[-1].m_pair.left; short right = ((plug_and_pair*)node)[-1].m_pair.right; ptrdiff_t pad_shifted = (pad_from_short(left) << pad_bits) | pad_from_short(right); ptrdiff_t aligninfo = pad_shifted * DATA_ALIGNMENT; // Replicate the topmost bit into all lower bits. ptrdiff_t x = aligninfo; x |= x >> 8; x |= x >> 4; x |= x >> 2; x |= x >> 1; // Clear all bits but the highest. requiredAlignment = (int)(x ^ (x >> 1)); pad = aligninfo - requiredAlignment; pad += AdjustmentForMinPadSize(pad, requiredAlignment); } inline ptrdiff_t node_alignpad (uint8_t* node) { int requiredAlignment; ptrdiff_t alignpad; node_aligninfo (node, requiredAlignment, alignpad); return alignpad; } void clear_node_aligninfo (uint8_t* node) { ((plug_and_pair*)node)[-1].m_pair.left &= ~0 << pad_bits; ((plug_and_pair*)node)[-1].m_pair.right &= ~0 << pad_bits; } void set_node_aligninfo (uint8_t* node, int requiredAlignment, ptrdiff_t pad) { // Encode the alignment requirement and alignment offset as a single number // as described above. ptrdiff_t aligninfo = (size_t)requiredAlignment + (pad & (requiredAlignment-1)); assert (Aligned (aligninfo)); ptrdiff_t aligninfo_shifted = aligninfo / DATA_ALIGNMENT; assert (aligninfo_shifted < (1 << (pad_bits + pad_bits))); ptrdiff_t hi = aligninfo_shifted >> pad_bits; assert (pad_from_short(((plug_and_gap*)node)[-1].m_pair.left) == 0); ((plug_and_pair*)node)[-1].m_pair.left |= hi; ptrdiff_t lo = aligninfo_shifted & pad_mask; assert (pad_from_short(((plug_and_gap*)node)[-1].m_pair.right) == 0); ((plug_and_pair*)node)[-1].m_pair.right |= lo; #ifdef _DEBUG int requiredAlignment2; ptrdiff_t pad2; node_aligninfo (node, requiredAlignment2, pad2); assert (requiredAlignment == requiredAlignment2); assert (pad == pad2); #endif // _DEBUG } #endif // FEATURE_STRUCTALIGN inline void loh_set_node_relocation_distance(uint8_t* node, ptrdiff_t val) { ptrdiff_t* place = &(((loh_obj_and_pad*)node)[-1].reloc); *place = val; } inline ptrdiff_t loh_node_relocation_distance(uint8_t* node) { return (((loh_obj_and_pad*)node)[-1].reloc); } inline ptrdiff_t node_relocation_distance (uint8_t* node) { return (((plug_and_reloc*)(node))[-1].reloc & ~3); } inline void set_node_relocation_distance(uint8_t* node, ptrdiff_t val) { assert (val == (val & ~3)); ptrdiff_t* place = &(((plug_and_reloc*)node)[-1].reloc); //clear the left bit and the relocation field *place &= 1; *place |= val; } #define node_left_p(node) (((plug_and_reloc*)(node))[-1].reloc & 2) #define set_node_left(node) ((plug_and_reloc*)(node))[-1].reloc |= 2; #ifndef FEATURE_STRUCTALIGN void set_node_realigned(uint8_t* node) { ((plug_and_reloc*)(node))[-1].reloc |= 1; } void clear_node_realigned(uint8_t* node) { #ifdef RESPECT_LARGE_ALIGNMENT ((plug_and_reloc*)(node))[-1].reloc &= ~1; #else UNREFERENCED_PARAMETER(node); #endif //RESPECT_LARGE_ALIGNMENT } #endif // FEATURE_STRUCTALIGN inline size_t node_gap_size (uint8_t* node) { return ((plug_and_gap *)node)[-1].gap; } void set_gap_size (uint8_t* node, size_t size) { assert (Aligned (size)); // clear the 2 uint32_t used by the node. ((plug_and_gap *)node)[-1].reloc = 0; ((plug_and_gap *)node)[-1].lr =0; ((plug_and_gap *)node)[-1].gap = size; assert ((size == 0 )||(size >= sizeof(plug_and_reloc))); } /***************************** Called after compact phase to fix all generation gaps ********************************/ #ifdef USE_REGIONS void heap_segment::thread_free_obj (uint8_t* obj, size_t s) { //dprintf (REGIONS_LOG, ("threading SIP free obj %zx-%zx(%zd)", obj, (obj + s), s)); if (s >= min_free_list) { free_list_slot (obj) = 0; if (free_list_head) { assert (free_list_tail); free_list_slot (free_list_tail) = obj; } else { free_list_head = obj; } free_list_tail = obj; free_list_size += s; } else { free_obj_size += s; } } #endif //USE_REGIONS inline uint8_t* tree_search (uint8_t* tree, uint8_t* old_address) { uint8_t* candidate = 0; int cn; while (1) { if (tree < old_address) { if ((cn = node_right_child (tree)) != 0) { assert (candidate < tree); candidate = tree; tree = tree + cn; Prefetch (&((plug_and_pair*)tree)[-1].m_pair.left); continue; } else break; } else if (tree > old_address) { if ((cn = node_left_child (tree)) != 0) { tree = tree + cn; Prefetch (&((plug_and_pair*)tree)[-1].m_pair.left); continue; } else break; } else break; } if (tree <= old_address) return tree; else if (candidate) return candidate; else return tree; } #ifdef MULTIPLE_HEAPS #ifdef _MSC_VER #pragma warning(push) #pragma warning(disable:4702) // C4702: unreachable code: gc_thread_function may not return #endif //_MSC_VER void gc_heap::gc_thread_stub (void* arg) { gc_heap* heap = (gc_heap*)arg; if (!gc_thread_no_affinitize_p) { // We are about to set affinity for GC threads. It is a good place to set up NUMA and // CPU groups because the process mask, processor number, and group number are all // readily available. set_thread_affinity_for_heap (heap->heap_number, heap_select::find_proc_no_from_heap_no (heap->heap_number)); } // server GC threads run at a higher priority than normal. GCToOSInterface::BoostThreadPriority(); void* tmp = _alloca (256*heap->heap_number); heap->gc_thread_function(); } #ifdef _MSC_VER #pragma warning(pop) #endif //_MSC_VER #endif //MULTIPLE_HEAPS #ifdef BACKGROUND_GC #ifdef MULTIPLE_HEAPS void gc_heap::bgc_suspend_EE () { for (int i = 0; i < n_heaps; i++) { gc_heap::g_heaps[i]->reset_gc_done(); } gc_started = TRUE; dprintf (2, ("bgc_suspend_EE")); GCToEEInterface::SuspendEE(SUSPEND_FOR_GC_PREP); gc_started = FALSE; for (int i = 0; i < n_heaps; i++) { gc_heap::g_heaps[i]->set_gc_done(); } } #else void gc_heap::bgc_suspend_EE () { reset_gc_done(); gc_started = TRUE; dprintf (2, ("bgc_suspend_EE")); GCToEEInterface::SuspendEE(SUSPEND_FOR_GC_PREP); gc_started = FALSE; set_gc_done(); } #endif //MULTIPLE_HEAPS #ifdef BGC_SERVO_TUNING #endif //BGC_SERVO_TUNING #endif //BACKGROUND_GC //because of heap expansion, computing end is complicated. uint8_t* compute_next_end (heap_segment* seg, uint8_t* low) { if ((low >= heap_segment_mem (seg)) && (low < heap_segment_allocated (seg))) return low; else return heap_segment_allocated (seg); } #ifdef FEATURE_CARD_MARKING_STEALING bool card_marking_enumerator::move_next(heap_segment* seg, uint8_t*& low, uint8_t*& high) { if (segment == nullptr) return false; uint32_t chunk_index = old_chunk_index; old_chunk_index = INVALID_CHUNK_INDEX; if (chunk_index == INVALID_CHUNK_INDEX) chunk_index = Interlocked::Increment((volatile int32_t *)chunk_index_counter); while (true) { uint32_t chunk_index_within_seg = chunk_index - segment_start_chunk_index; uint8_t* start = heap_segment_mem(segment); uint8_t* end = compute_next_end(segment, gc_low); uint8_t* aligned_start = (uint8_t*)((size_t)start & ~(CARD_MARKING_STEALING_GRANULARITY - 1)); size_t seg_size = end - aligned_start; uint32_t chunk_count_within_seg = (uint32_t)((seg_size + (CARD_MARKING_STEALING_GRANULARITY - 1)) / CARD_MARKING_STEALING_GRANULARITY); if (chunk_index_within_seg < chunk_count_within_seg) { if (seg == segment) { low = (chunk_index_within_seg == 0) ? start : (aligned_start + (size_t)chunk_index_within_seg * CARD_MARKING_STEALING_GRANULARITY); high = (chunk_index_within_seg + 1 == chunk_count_within_seg) ? end : (aligned_start + (size_t)(chunk_index_within_seg + 1) * CARD_MARKING_STEALING_GRANULARITY); chunk_high = high; dprintf (3, ("cme:mn ci: %u, low: %p, high: %p", chunk_index, low, high)); return true; } else { // we found the correct segment, but it's not the segment our caller is in // our caller should still be in one of the previous segments #ifdef _DEBUG for (heap_segment* cur_seg = seg; cur_seg != segment; cur_seg = heap_segment_next_in_range(cur_seg)) { assert(cur_seg); } #endif //_DEBUG // keep the chunk index for later old_chunk_index = chunk_index; dprintf (3, ("cme:mn oci: %u, seg mismatch seg: %p, segment: %p", old_chunk_index, heap_segment_mem (segment), heap_segment_mem (seg))); return false; } } segment = heap_segment_next_in_range(segment); segment_start_chunk_index += chunk_count_within_seg; if (segment == nullptr) { // keep the chunk index for later old_chunk_index = chunk_index; dprintf (3, ("cme:mn oci: %u no more segments", old_chunk_index)); return false; } } } #endif // FEATURE_CARD_MARKING_STEALING //----------------------------------------------------------------------------- // // VM Specific support // //----------------------------------------------------------------------------- //Static member variables. VOLATILE(BOOL) GCHeap::GcInProgress = FALSE; GCEvent *GCHeap::WaitForGCEvent = NULL; unsigned GCHeap::GcCondemnedGeneration = 0; size_t GCHeap::totalSurvivedSize = 0; #ifdef FEATURE_PREMORTEM_FINALIZATION CFinalize* GCHeap::m_Finalize = 0; BOOL GCHeap::GcCollectClasses = FALSE; VOLATILE(int32_t) GCHeap::m_GCFLock = 0; #ifndef FEATURE_NATIVEAOT // NativeAOT forces relocation a different way #ifdef STRESS_HEAP #ifndef MULTIPLE_HEAPS OBJECTHANDLE GCHeap::m_StressObjs[NUM_HEAP_STRESS_OBJS]; int GCHeap::m_CurStressObj = 0; #endif // !MULTIPLE_HEAPS #endif // STRESS_HEAP #endif // FEATURE_NATIVEAOT #endif //FEATURE_PREMORTEM_FINALIZATION #ifdef VERIFY_HEAP void gc_heap::verify_free_lists () { for (int gen_num = 0; gen_num < total_generation_count; gen_num++) { dprintf (3, ("Verifying free list for gen:%d", gen_num)); allocator* gen_alloc = generation_allocator (generation_of (gen_num)); size_t sz = gen_alloc->first_bucket_size(); bool verify_undo_slot = (gen_num != 0) && (gen_num <= max_generation) && !gen_alloc->discard_if_no_fit_p(); for (unsigned int a_l_number = 0; a_l_number < gen_alloc->number_of_buckets(); a_l_number++) { uint8_t* free_list = gen_alloc->alloc_list_head_of (a_l_number); uint8_t* prev = 0; while (free_list) { if (!((CObjectHeader*)free_list)->IsFree()) { dprintf (1, ("Verifiying Heap: curr free list item %zx isn't a free object", (size_t)free_list)); FATAL_GC_ERROR(); } if (((a_l_number < (gen_alloc->number_of_buckets()-1))&& (unused_array_size (free_list) >= sz)) || ((a_l_number != 0) && (unused_array_size (free_list) < sz/2))) { dprintf (1, ("Verifiying Heap: curr free list item %zx isn't in the right bucket", (size_t)free_list)); FATAL_GC_ERROR(); } if (verify_undo_slot && (free_list_undo (free_list) != UNDO_EMPTY)) { dprintf (1, ("Verifiying Heap: curr free list item %zx has non empty undo slot", (size_t)free_list)); FATAL_GC_ERROR(); } if ((gen_num <= max_generation) && (object_gennum (free_list)!= gen_num)) { dprintf (1, ("Verifiying Heap: curr free list item %zx is in the wrong generation free list", (size_t)free_list)); FATAL_GC_ERROR(); } #ifdef DOUBLY_LINKED_FL uint8_t* prev_free_item = free_list_prev (free_list); if (gen_num == max_generation) { if (prev_free_item != prev) { dprintf (1, ("%p prev should be: %p, actual: %p", free_list, prev_free_item, prev)); FATAL_GC_ERROR(); } } #endif //DOUBLY_LINKED_FL #if defined(USE_REGIONS) && defined(MULTIPLE_HEAPS) heap_segment* region = region_of (free_list); if ((region->heap != this) && ((gen_num != max_generation) || (!trigger_bgc_for_rethreading_p))) { // The logic in change_heap_count depends on the coming BGC (or blocking gen 2) to rebuild the gen 2 free list. // In that case, before the rebuild happens, the gen2 free list is expected to contain free list items that do not belong to the right heap. dprintf (1, ("curr free item %p should be on heap %d, but actually is on heap %d: %d", free_list, this->heap_number, region->heap->heap_number)); FATAL_GC_ERROR(); } #endif //USE_REGIONS && MULTIPLE_HEAPS prev = free_list; free_list = free_list_slot (free_list); } //verify the sanity of the tail uint8_t* tail = gen_alloc->alloc_list_tail_of (a_l_number); if (!((tail == 0) || (tail == prev))) { dprintf (1, ("Verifying Heap: tail of free list is not correct, tail %p, prev %p", tail, prev)); FATAL_GC_ERROR(); } if (tail == 0) { uint8_t* head = gen_alloc->alloc_list_head_of (a_l_number); if ((head != 0) && (free_list_slot (head) != 0)) { dprintf (1, ("Verifying Heap: head of free list is not correct, head %p -> %p", head, free_list_slot (head))); FATAL_GC_ERROR(); } } sz *=2; } } } #endif //VERIFY_HEAP // Wait until a garbage collection is complete // returns NOERROR if wait was OK, other error code if failure. // WARNING: This will not undo the must complete state. If you are // in a must complete when you call this, you'd better know what you're // doing. #ifdef FEATURE_PREMORTEM_FINALIZATION static HRESULT AllocateCFinalize(CFinalize **pCFinalize) { *pCFinalize = new (nothrow) CFinalize(); if (*pCFinalize == NULL || !(*pCFinalize)->Initialize()) return E_OUTOFMEMORY; return S_OK; } #endif // FEATURE_PREMORTEM_FINALIZATION #ifndef FEATURE_NATIVEAOT // NativeAOT forces relocation a different way #ifdef STRESS_HEAP void StressHeapDummy (); #endif // STRESS_HEAP #endif // !FEATURE_NATIVEAOT #ifdef FEATURE_PREMORTEM_FINALIZATION #define REGISTER_FOR_FINALIZATION(_object, _size) \ hp->finalize_queue->RegisterForFinalization (0, (_object), (_size)) #else // FEATURE_PREMORTEM_FINALIZATION #define REGISTER_FOR_FINALIZATION(_object, _size) true #endif // FEATURE_PREMORTEM_FINALIZATION #define CHECK_ALLOC_AND_POSSIBLY_REGISTER_FOR_FINALIZATION(_object, _size, _register) do { \ if ((_object) == NULL || ((_register) && !REGISTER_FOR_FINALIZATION(_object, _size))) \ { \ STRESS_LOG_OOM_STACK(_size); \ return NULL; \ } \ } while (false) #if defined(WRITE_BARRIER_CHECK) && !defined (SERVER_GC) // This code is designed to catch the failure to update the write barrier // The way it works is to copy the whole heap right after every GC. The write // barrier code has been modified so that it updates the shadow as well as the // real GC heap. Before doing the next GC, we walk the heap, looking for pointers // that were updated in the real heap, but not the shadow. A mismatch indicates // an error. The offending code can be found by breaking after the correct GC, // and then placing a data breakpoint on the Heap location that was updated without // going through the write barrier. // Called at process shutdown void deleteGCShadow() { if (g_GCShadow != 0) GCToOSInterface::VirtualRelease (g_GCShadow, g_GCShadowEnd - g_GCShadow); g_GCShadow = 0; g_GCShadowEnd = 0; } // Called at startup and right after a GC, get a snapshot of the GC Heap void initGCShadow() { if (!(GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_BARRIERCHECK)) return; uint8_t* highest = nullptr; #ifdef USE_REGIONS highest = global_region_allocator.get_left_used_unsafe(); #else highest = g_gc_highest_address; #endif size_t len = g_gc_highest_address - g_gc_lowest_address; size_t commit_len = highest - g_gc_lowest_address; if (len > (size_t)(g_GCShadowEnd - g_GCShadow)) { deleteGCShadow(); g_GCShadowEnd = g_GCShadow = (uint8_t *)GCToOSInterface::VirtualReserve (len, 0, VirtualReserveFlags::None); if (g_GCShadow == NULL || !GCToOSInterface::VirtualCommit (g_GCShadow, commit_len)) { _ASSERTE(!"Not enough memory to run HeapVerify level 2"); // If after the assert we decide to allow the program to continue // running we need to be in a state that will not trigger any // additional AVs while we fail to allocate a shadow segment, i.e. // ensure calls to updateGCShadow() checkGCWriteBarrier() don't AV deleteGCShadow(); return; } g_GCShadowEnd += commit_len; } // save the value of g_gc_lowest_address at this time. If this value changes before // the next call to checkGCWriteBarrier() it means we extended the heap (with a // large object segment most probably), and the whole shadow segment is inconsistent. g_shadow_lowest_address = g_gc_lowest_address; //****** Copy the whole GC heap ****** // // NOTE: This is the one situation where the combination of heap_segment_rw(gen_start_segment()) // can produce a NULL result. This is because the initialization has not completed. // for (int i = get_start_generation_index(); i < total_generation_count; i++) { generation* gen = gc_heap::generation_of (i); heap_segment* seg = heap_segment_rw (generation_start_segment (gen)); ptrdiff_t delta = g_GCShadow - g_gc_lowest_address; while (seg) { // Copy the segment uint8_t* start = heap_segment_mem (seg); uint8_t* end = heap_segment_allocated (seg); memcpy (start + delta, start, end - start); seg = heap_segment_next_rw (seg); } } } #define INVALIDGCVALUE (void*)((size_t)0xcccccccd) // test to see if 'ptr' was only updated via the write barrier. inline void testGCShadow(Object** ptr) { Object** shadow = (Object**) &g_GCShadow[((uint8_t*) ptr - g_gc_lowest_address)]; if (*ptr != 0 && (uint8_t*) shadow < g_GCShadowEnd && *ptr != *shadow) { // If you get this assertion, someone updated a GC pointer in the heap without // using the write barrier. To find out who, check the value of // dd_collection_count (dynamic_data_of (0)). Also // note the value of 'ptr'. Rerun the App that the previous GC just occurred. // Then put a data breakpoint for the value of 'ptr' Then check every write // to pointer between the two GCs. The last one is not using the write barrier. // If the memory of interest does not exist at system startup, // you need to set the data breakpoint right after the memory gets committed // Set a breakpoint at the end of grow_heap_segment, and put the value of 'ptr' // in the memory window. run until the memory gets mapped. Then you can set // your breakpoint // Note a recent change, we've identified race conditions when updating the gc shadow. // Throughout the runtime, code will update an address in the gc heap, then erect the // write barrier, which calls updateGCShadow. With an app that pounds one heap location // from multiple threads, you can hit this assert even though all involved are using the // write barrier properly. Thusly, we detect the race and set this location to INVALIDGCVALUE. // TODO: the code in jithelp.asm doesn't call updateGCShadow, and hasn't been // TODO: fixed to detect the race. We've only seen this race from VolatileWritePtr, // TODO: so elect not to fix jithelp.asm at this time. It should be done if we start hitting // TODO: erroneous asserts in here. if(*shadow!=INVALIDGCVALUE) { #ifdef FEATURE_BASICFREEZE // Write barriers for stores of references to frozen objects may be optimized away. if (!g_theGCHeap->IsInFrozenSegment (*ptr)) #endif // FEATURE_BASICFREEZE { _ASSERTE(!"Pointer updated without using write barrier"); } } /* else { printf("saw a INVALIDGCVALUE. (just to let you know)\n"); } */ } } void testGCShadowHelper (uint8_t* x) { size_t s = size (x); if (contain_pointers (x)) { go_through_object_nostart (method_table(x), x, s, oo, { testGCShadow((Object**) oo); }); } } // Walk the whole heap, looking for pointers that were not updated with the write barrier. void checkGCWriteBarrier() { // g_shadow_lowest_address != g_gc_lowest_address means the GC heap was extended by a segment // and the GC shadow segment did not track that change! if (g_GCShadowEnd <= g_GCShadow || g_shadow_lowest_address != g_gc_lowest_address) { // No shadow heap, nothing to check. return; } for (int i = get_start_generation_index(); i < total_generation_count; i++) { int alignment = get_alignment_constant(i <= max_generation); { generation* gen = gc_heap::generation_of (i); heap_segment* seg = heap_segment_rw (generation_start_segment (gen)); _ASSERTE(seg != NULL); while(seg) { uint8_t* x = heap_segment_mem (seg); while (x < heap_segment_allocated (seg)) { size_t s = size (x); testGCShadowHelper (x); x = x + Align (s, alignment); } seg = heap_segment_next_rw (seg); } } } } #endif //WRITE_BARRIER_CHECK && !SERVER_GC #ifdef GC_DESCRIPTOR extern "C" { struct ContractDescriptor; extern ContractDescriptor GCContractDescriptorWKS; #if FEATURE_SVR_GC extern ContractDescriptor GCContractDescriptorSVR; #endif // FEATURE_SVR_GC } #endif // GC_DESCRIPTOR void PopulateDacVars(GcDacVars *gcDacVars) { bool v2 = gcDacVars->minor_version_number >= 2; bool v4 = gcDacVars->minor_version_number >= 4; bool v6 = gcDacVars->minor_version_number >= 6; bool v8 = gcDacVars->minor_version_number >= 8; #define DEFINE_FIELD(field_name, field_type) offsetof(CLASS_NAME, field_name), #define DEFINE_DPTR_FIELD(field_name, field_type) offsetof(CLASS_NAME, field_name), #define DEFINE_ARRAY_FIELD(field_name, field_type, array_length) offsetof(CLASS_NAME, field_name), #define DEFINE_MISSING_FIELD(field_name) -1, #ifdef MULTIPLE_HEAPS static int gc_heap_field_offsets[] = { #define CLASS_NAME gc_heap #include "dac_gcheap_fields.h" #undef CLASS_NAME }; #endif //MULTIPLE_HEAPS static int generation_field_offsets[] = { #define CLASS_NAME generation #include "dac_generation_fields.h" #undef CLASS_NAME #undef DEFINE_MISSING_FIELD #undef DEFINE_ARRAY_FIELD #undef DEFINE_DPTR_FIELD #undef DEFINE_FIELD }; assert(gcDacVars != nullptr); // Note: These version numbers do not need to be checked in the .Net dac/SOS because // we always match the compiled dac and GC to the version used. NativeAOT's SOS may // work differently than .Net SOS. When making breaking changes here you may need to // find NativeAOT's equivalent of SOS_BREAKING_CHANGE_VERSION and increment it. gcDacVars->major_version_number = 2; gcDacVars->minor_version_number = 8; if (v2) { gcDacVars->total_bookkeeping_elements = total_bookkeeping_elements; gcDacVars->card_table_info_size = sizeof(card_table_info); } g_build_variant = 0; #ifdef USE_REGIONS g_build_variant |= build_variant_use_region; if (v2) { gcDacVars->count_free_region_kinds = count_free_region_kinds; gcDacVars->global_regions_to_decommit = reinterpret_cast(&gc_heap::global_regions_to_decommit); gcDacVars->global_free_huge_regions = reinterpret_cast(&gc_heap::global_free_huge_regions); } #endif //USE_REGIONS #ifdef BACKGROUND_GC g_build_variant |= build_variant_background_gc; #endif //BACKGROUND_GC #ifdef DYNAMIC_HEAP_COUNT g_build_variant |= build_variant_dynamic_heap_count; #endif //DYNAMIC_HEAP_COUNT gcDacVars->built_with_svr = &g_built_with_svr_gc; gcDacVars->build_variant = &g_build_variant; gcDacVars->gc_structures_invalid_cnt = const_cast(&GCScan::m_GcStructuresInvalidCnt); gcDacVars->generation_size = sizeof(generation); gcDacVars->total_generation_count = total_generation_count; gcDacVars->max_gen = &g_max_generation; #ifdef BACKGROUND_GC gcDacVars->current_c_gc_state = const_cast(&gc_heap::current_c_gc_state); #else //BACKGROUND_GC gcDacVars->current_c_gc_state = 0; #endif //BACKGROUND_GC #ifndef MULTIPLE_HEAPS gcDacVars->ephemeral_heap_segment = reinterpret_cast(&gc_heap::ephemeral_heap_segment); #ifdef USE_REGIONS if (v2) { gcDacVars->free_regions = reinterpret_cast(&gc_heap::free_regions); } #endif #ifdef BACKGROUND_GC gcDacVars->mark_array = &gc_heap::mark_array; gcDacVars->background_saved_lowest_address = &gc_heap::background_saved_lowest_address; gcDacVars->background_saved_highest_address = &gc_heap::background_saved_highest_address; if (v2) { gcDacVars->freeable_soh_segment = reinterpret_cast(&gc_heap::freeable_soh_segment); gcDacVars->freeable_uoh_segment = reinterpret_cast(&gc_heap::freeable_uoh_segment); } gcDacVars->next_sweep_obj = &gc_heap::next_sweep_obj; #ifdef USE_REGIONS gcDacVars->saved_sweep_ephemeral_seg = 0; gcDacVars->saved_sweep_ephemeral_start = 0; #else gcDacVars->saved_sweep_ephemeral_seg = reinterpret_cast(&gc_heap::saved_sweep_ephemeral_seg); gcDacVars->saved_sweep_ephemeral_start = &gc_heap::saved_sweep_ephemeral_start; #endif //USE_REGIONS #else //BACKGROUND_GC gcDacVars->mark_array = 0; gcDacVars->background_saved_lowest_address = 0; gcDacVars->background_saved_highest_address = 0; if (v2) { gcDacVars->freeable_soh_segment = 0; gcDacVars->freeable_uoh_segment = 0; } gcDacVars->next_sweep_obj = 0; gcDacVars->saved_sweep_ephemeral_seg = 0; gcDacVars->saved_sweep_ephemeral_start = 0; #endif //BACKGROUND_GC gcDacVars->alloc_allocated = &gc_heap::alloc_allocated; gcDacVars->oom_info = &gc_heap::oom_info; gcDacVars->finalize_queue = reinterpret_cast(&gc_heap::finalize_queue); gcDacVars->generation_table = reinterpret_cast(&gc_heap::generation_table); #ifdef GC_CONFIG_DRIVEN gcDacVars->gc_global_mechanisms = reinterpret_cast(&gc_global_mechanisms); gcDacVars->interesting_data_per_heap = reinterpret_cast(&gc_heap::interesting_data_per_heap); gcDacVars->compact_reasons_per_heap = reinterpret_cast(&gc_heap::compact_reasons_per_heap); gcDacVars->expand_mechanisms_per_heap = reinterpret_cast(&gc_heap::expand_mechanisms_per_heap); gcDacVars->interesting_mechanism_bits_per_heap = reinterpret_cast(&gc_heap::interesting_mechanism_bits_per_heap); #endif // GC_CONFIG_DRIVEN #ifdef HEAP_ANALYZE gcDacVars->internal_root_array = &gc_heap::internal_root_array; gcDacVars->internal_root_array_index = &gc_heap::internal_root_array_index; gcDacVars->heap_analyze_success = &gc_heap::heap_analyze_success; #endif // HEAP_ANALYZE #else gcDacVars->n_heaps = &gc_heap::n_heaps; gcDacVars->g_heaps = reinterpret_cast(&gc_heap::g_heaps); gcDacVars->gc_heap_field_offsets = reinterpret_cast(&gc_heap_field_offsets); #endif // MULTIPLE_HEAPS gcDacVars->generation_field_offsets = reinterpret_cast(&generation_field_offsets); if (v2) { gcDacVars->bookkeeping_start = &gc_heap::bookkeeping_start; } if (v4) { #ifdef DYNAMIC_HEAP_COUNT gcDacVars->dynamic_adaptation_mode = &gc_heap::dynamic_adaptation_mode; #else gcDacVars->dynamic_adaptation_mode = nullptr; #endif //DYNAMIC_HEAP_COUNT } if (v6) { #ifdef GC_DESCRIPTOR #ifdef MULTIPLE_HEAPS gcDacVars->gc_descriptor = (void*)&GCContractDescriptorSVR; #else // MULTIPLE_HEAPS gcDacVars->gc_descriptor = (void*)&GCContractDescriptorWKS; #endif // MULTIPLE_HEAPS #endif // GC_DESCRIPTOR } if (v8) { #ifdef MULTIPLE_HEAPS gcDacVars->g_totalCpuCount = &::g_totalCpuCount; #endif // MULTIPLE_HEAPS } } inline BOOL gc_heap::ephemeral_pointer_p (uint8_t* o) { #ifdef USE_REGIONS int gen_num = object_gennum ((uint8_t*)o); assert (gen_num >= 0); return (gen_num < max_generation); #else return ((o >= ephemeral_low) && (o < ephemeral_high)); #endif //USE_REGIONS } // This needs to check the range that's covered by bookkeeping because find_object will // need to look at the brick table. inline bool gc_heap::is_in_find_object_range (uint8_t* o) { if (o == nullptr) { return false; } #if defined(USE_REGIONS) && defined(FEATURE_CONSERVATIVE_GC) return ((o >= g_gc_lowest_address) && (o < bookkeeping_covered_committed)); #else //USE_REGIONS && FEATURE_CONSERVATIVE_GC if ((o >= g_gc_lowest_address) && (o < g_gc_highest_address)) { #ifdef USE_REGIONS assert ((o >= g_gc_lowest_address) && (o < bookkeeping_covered_committed)); #endif //USE_REGIONS return true; } else { return false; } #endif //USE_REGIONS && FEATURE_CONSERVATIVE_GC } #ifdef USE_REGIONS // This assumes o is guaranteed to be in a region. inline bool gc_heap::is_in_condemned_gc (uint8_t* o) { assert ((o >= g_gc_lowest_address) && (o < g_gc_highest_address)); int condemned_gen = settings.condemned_generation; if (condemned_gen < max_generation) { int gen = get_region_gen_num (o); if (gen > condemned_gen) { return false; } } return true; } #endif //USE_REGIONS #if defined (_MSC_VER) && defined (TARGET_X86) #pragma optimize("y", on) // Small critical routines, don't put in EBP frame #endif //_MSC_VER && TARGET_X86 // return the generation number of an object. // It is assumed that the object is valid. // Note that this will return max_generation for UOH objects int gc_heap::object_gennum (uint8_t* o) { #ifdef USE_REGIONS return get_region_gen_num (o); #else if (in_range_for_segment (o, ephemeral_heap_segment) && (o >= generation_allocation_start (generation_of (max_generation - 1)))) { // in an ephemeral generation. for ( int i = 0; i < max_generation-1; i++) { if ((o >= generation_allocation_start (generation_of (i)))) return i; } return max_generation-1; } else { return max_generation; } #endif //USE_REGIONS } int gc_heap::object_gennum_plan (uint8_t* o) { #ifdef USE_REGIONS return get_region_plan_gen_num (o); #else if (in_range_for_segment (o, ephemeral_heap_segment)) { for (int i = 0; i < ephemeral_generation_count; i++) { uint8_t* plan_start = generation_plan_allocation_start (generation_of (i)); if (plan_start && (o >= plan_start)) { return i; } } } return max_generation; #endif //USE_REGIONS } #if defined(_MSC_VER) && defined(TARGET_X86) #pragma optimize("", on) // Go back to command line default optimizations #endif //_MSC_VER && TARGET_X86 heap_segment* gc_heap::find_segment (uint8_t* interior, BOOL small_segment_only_p) { heap_segment* seg = seg_mapping_table_segment_of (interior); if (seg) { if (small_segment_only_p && heap_segment_uoh_p (seg)) return 0; } return seg; } #ifdef MULTIPLE_HEAPS gc_heap* seg_mapping_table_heap_of (uint8_t* o) { if ((o < g_gc_lowest_address) || (o >= g_gc_highest_address)) return 0; return seg_mapping_table_heap_of_worker (o); } #endif //MULTIPLE_HEAPS #ifdef MULTIPLE_HEAPS gc_heap* seg_mapping_table_heap_of_gc (uint8_t* o) { #ifdef FEATURE_BASICFREEZE if ((o < g_gc_lowest_address) || (o >= g_gc_highest_address)) return 0; #endif //FEATURE_BASICFREEZE return seg_mapping_table_heap_of_worker (o); } #endif //MULTIPLE_HEAPS #if !defined(_DEBUG) && !defined(__GNUC__) inline // This causes link errors if global optimization is off #endif //!_DEBUG && !__GNUC__ gc_heap* gc_heap::heap_of (uint8_t* o) { #ifdef MULTIPLE_HEAPS if (o == 0) return g_heaps [0]; gc_heap* hp = seg_mapping_table_heap_of (o); return (hp ? hp : g_heaps[0]); #else //MULTIPLE_HEAPS UNREFERENCED_PARAMETER(o); return __this; #endif //MULTIPLE_HEAPS } inline gc_heap* gc_heap::heap_of_gc (uint8_t* o) { #ifdef MULTIPLE_HEAPS if (o == 0) return g_heaps [0]; gc_heap* hp = seg_mapping_table_heap_of_gc (o); return (hp ? hp : g_heaps[0]); #else //MULTIPLE_HEAPS UNREFERENCED_PARAMETER(o); return __this; #endif //MULTIPLE_HEAPS } // will find all heap objects (large and small) // // Callers of this method need to guarantee the interior pointer is within the heap range. // // If you need it to be stricter, eg if you only want to find an object in ephemeral range, // you should make sure interior is within that range before calling this method. uint8_t* gc_heap::find_object (uint8_t* interior) { assert (interior != 0); if (!gen0_bricks_cleared) { #ifdef MULTIPLE_HEAPS assert (!"Should have already been done in server GC"); #endif //MULTIPLE_HEAPS clear_gen0_bricks(); } //indicate that in the future this needs to be done during allocation gen0_must_clear_bricks = FFIND_DECAY; int brick_entry = get_brick_entry(brick_of (interior)); if (brick_entry == 0) { // this is a pointer to a UOH object heap_segment* seg = find_segment (interior, FALSE); if (seg) { #ifdef FEATURE_CONSERVATIVE_GC if (interior >= heap_segment_allocated(seg)) return 0; #endif // If interior falls within the first free object at the beginning of a generation, // we don't have brick entry for it, and we may incorrectly treat it as on large object heap. int align_const = get_alignment_constant (heap_segment_read_only_p (seg) #ifdef FEATURE_CONSERVATIVE_GC || (GCConfig::GetConservativeGC() && !heap_segment_uoh_p (seg)) #endif ); assert (interior < heap_segment_allocated (seg)); uint8_t* o = heap_segment_mem (seg); while (o < heap_segment_allocated (seg)) { uint8_t* next_o = o + Align (size (o), align_const); assert (next_o > o); if ((o <= interior) && (interior < next_o)) return o; o = next_o; } return 0; } else { return 0; } } else { heap_segment* seg = find_segment (interior, TRUE); if (seg) { #ifdef FEATURE_CONSERVATIVE_GC if (interior >= heap_segment_allocated (seg)) return 0; #else assert (interior < heap_segment_allocated (seg)); #endif uint8_t* o = find_first_object (interior, heap_segment_mem (seg)); return o; } else return 0; } } size_t gc_heap::get_generation_start_size (int gen_number) { #ifdef USE_REGIONS return 0; #else return Align (size (generation_allocation_start (generation_of (gen_number))), get_alignment_constant (gen_number <= max_generation)); #endif //!USE_REGIONS } inline int gc_heap::get_num_heaps() { #ifdef MULTIPLE_HEAPS return n_heaps; #else return 1; #endif //MULTIPLE_HEAPS } void stomp_write_barrier_resize(bool is_runtime_suspended, bool requires_upper_bounds_check) { WriteBarrierParameters args = {}; args.operation = WriteBarrierOp::StompResize; args.is_runtime_suspended = is_runtime_suspended; args.requires_upper_bounds_check = requires_upper_bounds_check; args.card_table = g_gc_card_table; #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES args.card_bundle_table = g_gc_card_bundle_table; #endif args.lowest_address = g_gc_lowest_address; args.highest_address = g_gc_highest_address; #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP if (SoftwareWriteWatch::IsEnabledForGCHeap()) { args.write_watch_table = g_gc_sw_ww_table; } #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP GCToEEInterface::StompWriteBarrier(&args); } // Category-specific gc_heap method files #include "region_allocator.cpp" #include "region_free_list.cpp" #include "finalization.cpp" #include "interface.cpp" #include "allocation.cpp" #include "mark_phase.cpp" #include "plan_phase.cpp" #include "relocate_compact.cpp" #include "sweep.cpp" #include "background.cpp" #include "regions_segments.cpp" #include "card_table.cpp" #include "memory.cpp" #include "diagnostics.cpp" #include "dynamic_tuning.cpp" #include "no_gc.cpp" #include "dynamic_heap_count.cpp" #include "init.cpp" #include "collect.cpp" }