const ( // The minimum node score that may be returned by a ScorePlugin (after NormalizeScore). MinNodeScore int64 = 0 // The maximum node score that may be returned by a ScorePlugin (after NormalizeScore). MaxNodeScore int64 = 100 SortStage = "Sort" SampleNodesStage = "SampleNodes" PreFilterStage = "PreFilter" FilterStage = "Filter" PreScoreStage = "PreScore" ScoreStage = "Score" ReserveStage = "Reserve" SamplingStrategyStage = "SamplingStrategy" // Sampling pipeline only // Binding Pipeline stages CheckConflictsStage = "CheckConflicts" )
func IsSuccessStatus(status Status) bool
Returns true if the status represents a Success, otherwise false. A nil status also represents a Success.
func ReadTypedStateData[T StateData](schedCtx SchedulingContext, key string) (T, bool, error)
Convenience function to read StateData from a SchedulingContext and casting it to a specific type. Returns
func StatusCodeAsString(status Status) string
Returns the string version of the specified Status' statusCode. This also works if status is nil (which also represents a Success status).
Represents an instance of the Polaris ClusterAgent Binding Pipeline, encompassing all its stages.
A Binding Pipeline executes on a single goroutine and there is only a single pod traversing the pipeline at a time.
type BindingPipeline interface { // Runs the binding pipeline stages and, if all stages succeed, commits the scheduling decision to the cluster. // Depending on the outcome of the pipeline, the queuedPod is either committed or removed from the queue. CommitSchedulingDecision(ctx SchedulingContext, schedDecision *client.ClusterSchedulingDecision, queuedPod client.PodQueuedOnNode) (*client.CommitSchedulingDecisionSuccess, Status) }
Contains plugin instances for a single BindingPipeline instance.
If a plugin ties into multiple stages, the same plugin instance is used for all of them.
type BindingPipelinePlugins struct { CheckConflicts []CheckConflictsPlugin }
Used to instantiate binding plugins.
type BindingPluginsFactory interface { // Creates a new set of instances of the plugins configured for the binding Pipeline. NewBindingPipelinePlugins(clusterAgentServices ClusterAgentServices) (*BindingPipelinePlugins, error) }
A CheckConflictsPlugin is used by the binding pipeline to check if committing this scheduling decision would cause any conflicts (e.g., overprovisioning of resources).
type CheckConflictsPlugin interface { Plugin // Checks if committing this scheduling decision would cause any conflicts (e.g., overprovisioning of resources). // All CheckConflicts plugins must return a success status for the binding to proceed. CheckForConflicts(ctx SchedulingContext, decision *SchedulingDecision) Status }
Defines a factory function for creating plugins for the PolarisClusterAgent, i.e., sampling and binding pipeline plugins.
type ClusterAgentPluginFactoryFunc PluginFactoryFunc[ClusterAgentServices]
Provides ClusterAgent plugins access to services that they may use.
type ClusterAgentServices interface { // Gets the config used by this ClusterAgent. Config() *config.ClusterAgentConfig // Gets the LocalClusterClient used by this ClusterAgent. ClusterClient() client.LocalClusterClient // The nodes cache used by this ClusterAgent. NodesCache() client.NodesCache // Gets the logger used by this ClusterAgent. Logger() *logr.Logger }
Represents an instance of the Polaris Scheduler Decision Pipeline, encompassing all stages from PreFilter until (including) Reserve.
A Decision Pipeline executes on a single goroutine and there is only a single pod traversing the pipeline at a time.
type DecisionPipeline interface { // Executes the Decision Pipeline and returns a sorted list of SchedulingDecisions, i.e., the commit candidates, and a Status. // The SchedulingDecisions are nil in case the pod could not be scheduled or if an error occurred. DecideCommitCandidates(podInfo *SampledPodInfo, commitCandidatesCount int) ([]*SchedulingDecision, Status) }
Contains plugin instances for a single DecisionPipeline instance.
If a plugin ties into multiple stages, e.g., PreFilter, Filter, and Score, the same plugin instance is used for all of them.
type DecisionPipelinePlugins struct { PreFilter []PreFilterPlugin Filter []FilterPlugin PreScore []PreScorePlugin Score []*ScorePluginWithExtensions Reserve []ReservePlugin }
A FilterPlugin determines if a particular node is suitable for hosting a pod. FilterPlugins are called after the PreFilterState. At the beginning of the Filter stage all nodes from the SampleNodes stage are used. This list may be reduced by every FilterPlugin. Once a node is deemed to be unsuitable to host a pod, it is not passed to any other FilterPlugin.
type FilterPlugin interface { Plugin // Filter is called to determine if the pod described by podInfo can be hosted on the node described by NodeInfo. // The podInfo and nodeInfo objects must be treated as immutable. // // Returns a "Success" Status is the node can host the pod, an "Unschedulable" Status if this is not the case, // or an "InternalError" Status if an unexpected error occurred during evaluation. Filter(ctx SchedulingContext, podInfo *PodInfo, nodeInfo *NodeInfo) Status }
Describes a pod that has just been received and that is added to the channel of a PodSource.
type IncomingPod struct { // The pod that should be scheduled. Pod *core.Pod // The timestamp, when the pod was received. ReceivedAt time.Time }
NodeInfo stores a ClusterNode and additional scheduling-relevant information about it.
type NodeInfo struct { // The Node described by this NodeInfo. Node *client.ClusterNode `json:"node" yaml:"node"` // The accumulated score computed by the Score plugins of the sampling pipeline. // This is nil if no sampling score plugins are configured. SamplingScore *SamplingScore // The name of the cluster that the node is part of. ClusterName string `json:"clusterName" yaml:"clusterName"` }
func NewNodeInfo(clusterName string, node *client.ClusterNode) *NodeInfo
Creates a new NodeInfo object and computes its resources.
NodeScore describes the score of a particular node.
type NodeScore struct { Node *NodeInfo Score int64 }
Plugin is the parent interface for all Polaris pipeline plugins. In the Polaris Distributed Scheduling Framework, there are three pipelines:
The Polaris scheduling pipeline runs as part of the Polaris Scheduler. It is the main pipeline of the Scheduling Framework and is responsible for sorting the incoming pods and for each pod obtain node samples from all clusters (using the sampling pipeline in the Cluster Agents), and decide on which node to schedule the pod. The scheduling pipeline and consists of the following stages:
The stages from PreFilter up to (including) Reserve are called the "Decision Pipeline". For Decision Pipeline plugins it is common to tie into multiple stages of the pipeline. For each pipeline instance only a single instance of each plugin will be created, even if it ties into multiple stages (e.g., PreFilter, Filter, and Score).
Multiple Decision Pipeline instances may execute in parallel, but each instance will execute on a single goroutine and only be traversed by a single pod at a time.
The Polaris sampling pipeline runs as part of the Polaris Cluster Agent. It is responsible for collecting a set of nodes (the sample), which are capable of running the pod, for which they are requested. The sampling pipeline consists of the following stages:
The Polaris binding pipeline runs as part of the Polaris Cluster Agent. It is responsible for detecting any conflicts that a scheduling decision may create on the target node (e.g., overprovisioning of resources). The binding pipeline consists of the following stages:
ToDo: Extend binding pipeline with stages that allow provisioning of a volume (this may require multiple stages, e.g., before creating the pod in the orchestrator and after creating the pod).
type Plugin interface { // Gets the name of this plugin. // // The returned name must be usable as a URI component. Name() string }
Defines a factory function for creating Polaris plugins with a generic owner services type.
type PluginFactoryFunc[O PolarisPluginOwnerServices] func(pluginConfig config.PluginConfig, ownerServices O) (Plugin, error)
Contains the factory functions for all available plugins. The generic type parameter O defines the owner services type of the created plugins (i.e., PolarisScheduler or PolarisNodeSampler).
type PluginsRegistry[O PolarisPluginOwnerServices] struct { // contains filtered or unexported fields }
func NewPluginsRegistry[O PolarisPluginOwnerServices](factories map[string]PluginFactoryFunc[O]) *PluginsRegistry[O]
func (pr *PluginsRegistry[O]) GetPluginFactory(pluginName string) PluginFactoryFunc[O]
Returns the SchedulingPluginFactoryFunc for the specified plugin name or nil, if no factory is registered for this name.
PodInfo stores a Pod and additional pre-computed scheduling-relevant information about it.
type PodInfo struct { // The Pod to be scheduled. Pod *core.Pod `json:"pod" yaml:"pod"` // The number of times, we had to retry scheduling this pod after committing the scheduling decision had failed. SchedulingRetryCount int }
Supplies new pods that need to be scheduled to the scheduling pipeline.
type PodSource interface { // Returns a channel that emits the incoming pods that need to be scheduled. IncomingPods() chan *IncomingPod }
Main service that is responsible for sampling nodes.
This service is responsible for managing the REST interface and the nodes watch.
type PolarisNodeSampler interface { ClusterAgentServices // Starts the node sampler service. // // Note that, depending on the actual implementation, the REST interface may need to be started by // the caller after Start() returns. // // The context can be used to stop the sampler. // Returns nil if the sampler has started successfully. Start(ctx context.Context) error // Gets the sampling strategies available in this sampler. SamplingStrategies() []SamplingStrategyPlugin }
Interface for services provided by the owner of a Polaris plugin.
type PolarisPluginOwnerServices interface{}
Provides access to the polaris-scheduler instance.
type PolarisScheduler interface { // Gets the scheduler configuration. Config() *config.SchedulerConfig // Gets the ClusterClientsManager for communicating with the node clusters. ClusterClientsManager() client.ClusterClientsManager // Starts the scheduling goroutines and then returns nil // or an error, if any occurred. Start(ctx context.Context) error // Stops the scheduling goroutines. Stop() error // Gets the logger used by this scheduler. Logger() *logr.Logger // Returns true if the scheduling process has been started. IsActive() bool // Returns the number of queued pods. PodsInQueueCount() int // Returns the number of pods, for which nodes are currently being sampled. PodsInNodeSamplingCount() int // Returns the number of pods, for which nodes have been sampled, and which are // now waiting to enter the decision pipeline. PodsWaitingForDecisionPipelineCount() int // Returns the number of pods currently in the decision pipeline. PodsInDecisionPipelineCount() int }
A PreFilterPlugin is called once per Pod and can be used to pre-compute information that will be needed by a FilterPlugin. PreFilterPlugins are called after nodes have been sampled.
type PreFilterPlugin interface { Plugin // PreFilter is called once per Pod and can be used to pre-compute information that will be needed by a FilterPlugin. // The podInfo object must be treated as immutable. // // All PreFilterPlugins must return Success, otherwise the pod is marked as Unschedulable. PreFilter(ctx SchedulingContext, podInfo *PodInfo) Status }
A PreFilterPlugin is called once per Pod, after the Filter stage has completed, and can be used to pre-compute information that will be needed by a ScorePlugin.
type PreScorePlugin interface { Plugin // PreScore is called once per Pod and can be used to pre-compute information that will be needed by a ScorePlugin. // eligibleNodes contains all nodes that have been deemed suitable to host the pod by the Filter stage plugins. // The podInfo and eligibleNodes objects must be treated as immutable. // // All PreScorePlugins must return Success, otherwise the pod is marked as Unschedulable. PreScore(ctx SchedulingContext, podInfo *PodInfo, eligibleNodes []*NodeInfo) Status }
Represents information about a queued pod.
type QueuedPodInfo struct { *PodInfo // The SchedulingContext of this queued pod. Ctx SchedulingContext }
func NewQueuedPodInfo(pod *core.Pod, ctx SchedulingContext, schedulingRetryCount int) *QueuedPodInfo
Creates a new QueuedPodInfo from a pod.
func (q *QueuedPodInfo) GetKey() string
Returns a key that can be used to identify this pod in a map. The key is generated according to the following scheme: "<namespace>.<name>"
A ReservePlugin is called after the scheduling pipeline has chosen the final target node after the Score stage. It may be used to update 3rd party data structures.
ToDo: ReservePlugin_MultiBind - Modify ReservePlugin to account for the new MultiBinding mechanism. Currently the ReserveStage is disabled.
type ReservePlugin interface { Plugin // Reserve is called after the scheduling pipeline has chosen the final target node after the Score stage. // It may be used to update 3rd party data structures. // If any ReservePlugin returns a non Success Status, the pod will not be scheduled to that node and // Unreserve will be called on all ReservePlugins. // The podInfo object must be treated as immutable. Reserve(ctx SchedulingContext, podInfo *PodInfo, targetNode *NodeInfo) Status // Unreserve is called if an error occurs during the Reserve stage or if another ReservePlugin rejects the pod. // It may be used to update 3rd party data structures. // This method must be idempotent and may be called by the scheduling pipeline even if Reserve() was not // previously called. // The podInfo object must be treated as immutable. Unreserve(ctx SchedulingContext, podInfo *PodInfo, targetNode *NodeInfo) }
A SampleNodesPlugin is used to obtain a sample of nodes from the entire supercluster as hosting candidates for the pod. This plugin is called when a pod enters the scheduling pipeline.
type SampleNodesPlugin interface { Plugin // Samples nodes across the entire supercluster to act as hosting candidates for the pod. // The podInfo object must be treated as immutable. // // Returns an array of NodeInfos that describe the sampled nodes and a Status. SampleNodes(ctx SchedulingContext, podInfo *PodInfo) ([]*NodeInfo, Status) }
Represents information about a pod, for which nodes have already been sampled, and which is, thus, ready for entering the Decision Pipeline.
type SampledPodInfo struct { *QueuedPodInfo // The nodes that have been sampled for this pod. SampledNodes []*NodeInfo }
Represents an instance of the Polaris ClusterAgent Sampling Pipeline, encompassing all its stages.
A Sampling Pipeline executes on a single goroutine and there is only a single pod traversing the pipeline at a time.
The SamplingStrategyPlugin instances are shared across all pipelines and, thus, must be thread-safe. All other plugins execute only on this pipeline's goroutine. The "singleton approach" for SamplingStrategyPlugins is needed to avoid sampling the same nodes in multiple pipeline instances, i.e., if there were multiple instances of a deterministic SamplingStrategyPlugin (e.g., Round-Robin sampling), each instance would return the same nodes upon the first invocation.
type SamplingPipeline interface { // Executes the sampling pipeline for the specified pod using the specified sampling strategy. // The number of nodes to be sampled is specified as basis points (bp) of the total number of nodes. SampleNodes(ctx SchedulingContext, samplingStrategy SamplingStrategyPlugin, podInfo *PodInfo, nodesToSampleBp int) ([]*NodeInfo, Status) }
Contains plugin instances for a single SamplingPipeline instance.
If a plugin ties into multiple stages, e.g., PreFilter, Filter, and Score, the same plugin instance is used for all of them.
type SamplingPipelinePlugins struct { PreFilter []PreFilterPlugin Filter []FilterPlugin PreScore []PreScorePlugin Score []*ScorePluginWithExtensions }
Used to instantiate sampling plugins.
type SamplingPluginsFactory interface { // Creates instances of all configured SamplingStrategyPlugins. NewSamplingStrategiesPlugins(clusterAgentServices ClusterAgentServices) ([]SamplingStrategyPlugin, error) // Creates a new set of instances of the plugins configured for the Sampling Pipeline. NewSamplingPipelinePlugins(clusterAgentServices ClusterAgentServices) (*SamplingPipelinePlugins, error) }
SamplingScore describes the accumulated score from all sampling score plugins.
type SamplingScore struct { // The accumulated score of all sampling score plugins. AccumulatedScore int64 `json:"accumulatedScore" yaml:"accumulatedScore"` // The number score plugins that contributed to the accumulated score. ScorePluginsCount int `json:"scorePluginsCount" yaml:"scorePluginsCount"` }
Encapsulates a node sampling strategy in the sampling pipeline.
A SamplingStrategyPlugin must be thread-safe, because a single instance is shared across all sampling pipelines. This "singleton approach" is needed to avoid sampling the same nodes in multiple pipeline instances, i.e., if there were multiple instances of a deterministic SamplingStrategyPlugin (e.g., Round-Robin sampling), each instance would return the same nodes upon the first invocation.
type SamplingStrategyPlugin interface { Plugin // Returns the name of the sampling strategy in a URI component compatible form. StrategyName() string // Executes the sampling strategy and returns a sample of nodes and a status. // The podInfo object must be treated as immutable. // // Important: This method may be called concurrently on multiple goroutines, so its implementation must be thread-safe. SampleNodes(ctx SchedulingContext, podInfo *PodInfo, sampleSize int) ([]*NodeInfo, Status) }
SchedulingContext is used to carry state information between stages of the scheduling pipeline. All plugins can access the information in the SchedulingContext - they are all assumed to be trusted.
type SchedulingContext interface { // Gets the context.Context that this SchedulingContext is associated with. // // This may change between plugin executions, so it should always be read directly // from the SchedulingContext. // // This method is thread-safe. Context() context.Context // Reads state data from the SchedulingContext. // Returns the StateData stored under the given key and a boolean indicating if the key was found. // // This method is thread-safe. Read(key string) (StateData, bool) // Writes state data to the SchedulingContext and stores it under the given key. // // This method is thread-safe. Write(key string, data StateData) }
func NewSchedulingContext(ctx context.Context) SchedulingContext
Creates a new SchedulingContext
Represents a scheduling decision made by the Decision Pipeline.
type SchedulingDecision struct { // The pod, for which the node has been selected. Pod *PodInfo // The node that has been selected for the pod. // // Binding pipeline plugins can assume that this NodeInfo has been updated at the beginning // of the binding pipeline. TargetNode *NodeInfo }
Defines a factory function for creating Polaris scheduling pipeline plugins.
type SchedulingPluginFactoryFunc PluginFactoryFunc[PolarisScheduler]
Used to instantiate scheduler plugins
type SchedulingPluginsFactory interface { // Creates a new instance of the configured SortPlugin. NewSortPlugin(scheduler PolarisScheduler) (SortPlugin, error) // Creates a new instance of the configured SampleNodesPlugin. NewSampleNodesPlugin(scheduler PolarisScheduler) (SampleNodesPlugin, error) // Creates a new set of instances of the plugins configured for the Decision Pipeline. NewDecisionPipelinePlugins(scheduler PolarisScheduler) (*DecisionPipelinePlugins, error) }
Allows defining optional actions supported by a ScorePlugin
type ScoreExtensions interface { // Called to normalize the node scores returned by the associated ScorePlugin to a range between MinNodeScore and MaxNodeScore. // This method should update the scores list (without changing the order or the number of elements) and return a Success Status. // The podInfo object must be treated as immutable. NormalizeScores(ctx SchedulingContext, podInfo *PodInfo, scores []NodeScore) Status }
A ScorePlugin has to assign a score to every node that came out of the Filter stage. The scores from all ScorePlugins are accumulated by the scheduling pipeline and used to rank the eligible nodes.
The node with the highest score is assigned to host the pod. If multiple nodes have the same high score, a random node is picked from this set of winners.
type ScorePlugin interface { Plugin // Score needs to compute a score for the node that describes "how suitable" it is to host the pod. // These scores are used to rank the nodes. // All ScorePlugins must return a Success Status, otherwise the pod is rejected. // The podInfo and nodeInfo objects must be treated as immutable. Score(ctx SchedulingContext, podInfo *PodInfo, nodeInfo *NodeInfo) (int64, Status) // Returns the ScoreExtensions, if they are implemented by this plugin, otherwise nil. ScoreExtensions() ScoreExtensions }
Combines a ScorePlugin with its ScoreExtensions.
type ScorePluginWithExtensions struct { // The actual score plugin instance. ScorePlugin // The ScoreExtensions supplied by the ScorePlugin or nil, if the plugin does not have any. ScoreExtensions // The weight assigned to this score plugin. Weight int32 }
A SortPlugin is used to establish the order, in which incoming pods will be handled by the scheduling pipeline.
type SortPlugin interface { Plugin // Less returns true if podA should be scheduled before podB. // Otherwise, it returns false. Less(podA *QueuedPodInfo, podB *QueuedPodInfo) bool }
Generic interface for the state data that can be stored in a SchedulingContext.
type StateData interface{}
Reports the result of a pod's journey through the Polaris scheduling pipeline. A nil Status is also considered a success.
type Status interface { // Gets the StatusCode. Code() StatusCode // Gets the StatusCode as a string. CodeAsString() string // Gets the error that occurred, if any, otherwise returns nil. Error() error // Gets array of reasons for the current status. // This may also be nil. Reasons() []string // Gets the plugin that has caused scheduling to fail. // This is set by the framework and is nil, if all plugins returned success. FailedPlugin() Plugin // Gets the stage of the scheduling pipeline that caused scheduling to fail. // This is set by the framework and is an empty string, if all plugins returned success. FailedStage() string // Sets the the plugin that has caused the scheduling pipeline to fail. // This should be done by the scheduling pipeline only. SetFailedPlugin(plugin Plugin, stage string) // Gets the reasons for the current status as a single string. Message() string }
func NewInternalErrorStatus(err error) Status
func NewStatus(code StatusCode, reasons ...string) Status
func NewSuccessStatus() Status
Describes a polaris-scheduler Status.
type StatusCode int
const ( // Success means that the plugin has executed correctly and deemed the pod to be schedulable. // A nil Status is also considered as a Success. Success StatusCode = iota // Designates an internal plugin error, such as unexpected input, etc. // This should NOT be used when a pod is deemed to be unschedulable. InternalError // Unschedulable means that the plugin cannot find a node (within the plugin's scope) to place the pod. // The Reasons array should be set to the reason for the unschedulability. Unschedulable )