1. 设计臆测
1.1 驱逐总体流程
1.2 基于观察者模式的观测
在驱逐管理器中基于观察者模式实现对特殊资源驱逐管理的支持,比如内存资源,在某些情况下即时当前的资源水位没有达到阈值,也可能会触发oom killer这个时候,对应的ThresholdNotifier也会尝试进行状态同步,从而确定是否要主动的驱逐一些节点, 目前这个特性应该还没开, 同理如果这个资源有专属的阈值监测,则在后续驱逐Pod的时候,如果是由该资源触发的驱逐,则优先级会被降低
2. 核心数据结构
2.1 阈值与信号
2.1.1 Signal
type Signal string const ( // 省略其他资源 SignalMemoryAvailable Signal = "memory.available" SignalNodeFsAvailable Signal = "nodefs.available" )
2.1.2 Threshold
type Threshold struct { Signal Signal Operator ThresholdOperator Value ThresholdValue GracePeriod time.Duration MinReclaim *ThresholdValue }
3. 驱逐核心流程
3.1 资源准备阶段
资源准备阶段,主要是为后面的决策做准备,首先要获取当前所有活跃的pod信息(后面从中挑选可以驱逐的Pod), 然后还要获取当前的统计信息、阈值
thresholds := m.config.Thresholds // 获取Pod活跃信息 activePods := podFunc() updateStats := true // 获取统计信息 summary, err := m.summaryProvider.Get(updateStats)
3.2 周期性阈值监测
if m.clock.Since(m.thresholdsLastUpdated) > notifierRefreshInterval { m.thresholdsLastUpdated = m.clock.Now() for _, notifier := range m.thresholdNotifiers { // 更新summary通知 if err := notifier.UpdateThreshold(summary); err != nil { klog.Warningf("eviction manager: failed to update %s: %v", notifier.Description(), err) } } }
3.3 获取观测状态
observations, statsFunc := makeSignalObservations(summary)
func makeSignalObservations(summary *statsapi.Summary) (signalObservations, statsFunc) { statsFunc := cachedStatsFunc(summary.Pods) result := signalObservations{} if memory := summary.Node.Memory; memory != nil && memory.AvailableBytes != nil && memory.WorkingSetBytes != nil { result[evictionapi.SignalMemoryAvailable] = signalObservation{ available: resource.NewQuantity(int64(*memory.AvailableBytes), resource.BinarySI), capacity: resource.NewQuantity(int64(*memory.AvailableBytes+*memory.WorkingSetBytes), resource.BinarySI), time: memory.Time, } } // 省略大部分信息 return result, statsFunc }
3.4 获取被处罚的阈值
通过当前的观测状态和之前的所有配置的阈值配置,来进行获取那些被处罚的阈值, 这里最后一个false表示当前这是第一次检测,先不要使用强制最小回收
thresholds = thresholdsMet(thresholds, observations, false)
3.5 阈值检测实现
func thresholdsMet(thresholds []evictionapi.Threshold, observations signalObservations, enforceMinReclaim bool) []evictionapi.Threshold { results := []evictionapi.Threshold{} for i := range thresholds { threshold := thresholds[i] // 获取当前阈值信号的观测状态 observed, found := observations[threshold.Signal] if !found { klog.Warningf("eviction manager: no observation found for eviction signal %v", threshold.Signal) continue } // 确定是否达到阈值 thresholdMet := false // 计算 quantity := evictionapi.GetThresholdQuantity(threshold.Value, observed.capacity) // 如果指定了enforceMinReclaim,则比较相对于值-minreclaim if enforceMinReclaim && threshold.MinReclaim != nil { // 强制最小回收,其实就是在之前阈值的基础上,在加上最小回收的资源 quantity.Add(*evictionapi.GetThresholdQuantity(*threshold.MinReclaim, observed.capacity)) } thresholdResult := quantity.Cmp(*observed.available) switch threshold.Operator { case evictionapi.OpLessThan: thresholdMet = thresholdResult > 0 } if thresholdMet { results = append(results, threshold) } } return results }
3.6 前后阈值合并
if len(m.thresholdsMet) > 0 { // 没有被解决的thresholdMet, 进行合并 thresholdsNotYetResolved := thresholdsMet(m.thresholdsMet, observations, true) thresholds = mergeThresholds(thresholds, thresholdsNotYetResolved) }
3.7 NodeConditions
nodeConditions := nodeConditions(thresholds) if len(nodeConditions) > 0 { klog.V(3).Infof("eviction manager: node conditions - observed: %v", nodeConditions) } nodeConditionsLastObservedAt := nodeConditionsLastObservedAt(nodeConditions, m.nodeConditionsLastObservedAt, now) nodeConditions = nodeConditionsObservedSince(nodeConditionsLastObservedAt, m.config.PressureTransitionPeriod, now) if len(nodeConditions) > 0 { klog.V(3).Infof("eviction manager: node conditions - transition period not met: %v", nodeConditions) }
3.8 保存内部状态
// update internal state m.Lock() m.nodeConditions = nodeConditions m.thresholdsFirstObservedAt = thresholdsFirstObservedAt m.nodeConditionsLastObservedAt = nodeConditionsLastObservedAt m.thresholdsMet = thresholds // determine the set of thresholds whose stats have been updated since the last sync thresholds = thresholdsUpdatedStats(thresholds, observations, m.lastObservations) debugLogThresholdsWithObservation("thresholds - updated stats", thresholds, observations) m.lastObservations = observations m.Unlock()
3.9 本地临时存储驱逐策略
if utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) { if evictedPods := m.localStorageEviction(summary, activePods); len(evictedPods) > 0 { return evictedPods } }
3.10 最小化驱逐阈值驱逐策略
thresholdToReclaim, resourceToReclaim, foundAny := getReclaimableThreshold(thresholds) if !foundAny { return nil }
3.11 优先级排序
// rank the thresholds by eviction priority sort.Sort(byEvictionPriority(thresholds)) rank, ok := m.signalToRankFunc[thresholdToReclaim.Signal] if !ok { klog.Errorf("eviction manager: no ranking function for signal %s", thresholdToReclaim.Signal) return nil } // the only candidates viable for eviction are those pods that had anything running. if len(activePods) == 0 { klog.Errorf("eviction manager: eviction thresholds have been met, but no pods are active to evict") return nil } // 对指定资源的要逐出的正在运行的pod进行排序 rank(activePods, statsFunc)
3.12 尝试驱逐Pod
for i := range activePods { pod := activePods[i] gracePeriodOverride := int64(0) if !isHardEvictionThreshold(thresholdToReclaim) { gracePeriodOverride = m.config.MaxPodGracePeriodSeconds } message, annotations := evictionMessage(resourceToReclaim, pod, statsFunc) // 驱逐pod if m.evictPod(pod, gracePeriodOverride, message, annotations) { metrics.Evictions.WithLabelValues(string(thresholdToReclaim.Signal)).Inc() return []*v1.Pod{pod} } }
3.13 内存资源优先级 排序 算法
3.13.1 内存超量对比
func exceedMemoryRequests(stats statsFunc) cmpFunc { return func(p1, p2 *v1.Pod) int { p1Stats, p1Found := stats(p1) p2Stats, p2Found := stats(p2) if !p1Found || !p2Found { return cmpBool(!p1Found, !p2Found) } p1Memory := memoryUsage(p1Stats.Memory) p2Memory := memoryUsage(p2Stats.Memory) p1ExceedsRequests := p1Memory.Cmp(v1resource.GetResourceRequestQuantity(p1, v1.ResourceMemory)) == 1 p2ExceedsRequests := p2Memory.Cmp(v1resource.GetResourceRequestQuantity(p2, v1.ResourceMemory)) == 1 // prioritize evicting the pod which exceeds its requests return cmpBool(p1ExceedsRequests, p2ExceedsRequests) } }
3.13.2 资源时机超量计算
func memory(stats statsFunc) cmpFunc { return func(p1, p2 *v1.Pod) int { p1Stats, p1Found := stats(p1) p2Stats, p2Found := stats(p2) if !p1Found || !p2Found { // prioritize evicting the pod for which no stats were found return cmpBool(!p1Found, !p2Found) } // adjust p1, p2 usage relative to the request (if any) p1Memory := memoryUsage(p1Stats.Memory) p1Request := v1resource.GetResourceRequestQuantity(p1, v1.ResourceMemory) p1Memory.Sub(p1Request) p2Memory := memoryUsage(p2Stats.Memory) p2Request := v1resource.GetResourceRequestQuantity(p2, v1.ResourceMemory) p2Memory.Sub(p2Request) // prioritize evicting the pod which has the larger consumption of memory return p2Memory.Cmp(*p1Memory) } }
3.13.3 优先级策略对比
func priority(p1, p2 *v1.Pod) int { priority1 := pod.GetPodPriority(p1) priority2 := pod.GetPodPriority(p2) if priority1 == priority2 { return 0 } if priority1 > priority2 { return 1 } return -1 }
k8s源码阅读电子书地址: https://www.yuque.com/baxiaoshi/tyado3
以上所述就是小编给大家介绍的《图解kubernetes Kubelet驱逐管理关键实现》,希望对大家有所帮助,如果大家有任何疑问请给我留言,小编会及时回复大家的。在此也非常感谢大家对 码农网 的支持!
【英】Douglas G. McIlwraith(道格拉斯 G. 麦基尔雷思)、【美】Haralambos Marmanis(哈若拉玛 玛若曼尼斯)、【美】Dmitry Babenko(德米特里•巴邦科) / 达观数据、陈运文 等 / 电子工业出版社 / 2017-7 / 69.00
机器学习一直是人工智能研究领域的重要方向,而在大数据时代,来自Web 的数据采集、挖掘、应用技术又越来越受到瞩目,并创造着巨大的价值。本书是有关Web数据挖掘和机器学习技术的一本知名的著作,第2 版进一步加入了本领域最新的研究内容和应用案例,介绍了统计学、结构建模、推荐系统、数据分类、点击预测、深度学习、效果评估、数据采集等众多方面的内容。《智能Web算法(第2版)》内容翔实、案例生动,有很高的阅......一起来看看 《智能Web算法(第2版)》 这本书的介绍吧!