Skip to content

Commit

Permalink
fix: make deleting existing jobs more robust
Browse files Browse the repository at this point in the history
Current logic always attempts to delete an existing job, whether one
exists or not. The new logic checks for an existing job first, and uses
an improved implementation that should be slightly more robust. The
timeout waiting for the job to be deleted gets extended to 120 seconds.
  • Loading branch information
thomasjo committed Jul 20, 2020
1 parent 9ee6d73 commit 694b932
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 10 deletions.
32 changes: 23 additions & 9 deletions cmd/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,16 +50,10 @@ var runCmd = &cobra.Command{
// TODO: Reconsider this? Many reasons to avoid this; should be challenged.
k8s.OverrideJobSpec(job)

fmt.Println("Deleting existing job...")
err = kubectx.DeleteJob(job.Name)
if err != nil {
if err := deletePreviousJob(job); err != nil {
return fmt.Errorf("unable to delete previous job: %w", err)
}

if err := waitUntilDeleted(job); err != nil {
return err
}

// Try to create the job using retry.
// This handles scenarios where an existing job is still being terminated, etc.
fmt.Println("Creating job...")
Expand All @@ -80,7 +74,6 @@ var runCmd = &cobra.Command{
}

if req == nil {
// TODO: Inform user we did not get any logs?
return fmt.Errorf("unable to get logs: request not returned (nil)")
}

Expand Down Expand Up @@ -112,8 +105,29 @@ func init() {
flags.BoolVarP(&follow, "follow", "f", false, "wait for job to start, then stream logs")
}

func deletePreviousJob(job *batchv1.Job) error {
oldJob, err := kubectx.GetJob(job.Name)
if err != nil {
return fmt.Errorf("unable to get previous job: %w", err)
}

if oldJob != nil {
fmt.Println("Deleting previous job...")
err = kubectx.DeleteJob(oldJob.Name)
if err != nil {
return err
}

if err := waitUntilDeleted(oldJob); err != nil {
return err
}
}

return nil
}

func waitUntilDeleted(job *batchv1.Job) error {
err := wait.Poll(100*time.Millisecond, 30*time.Second, func() (bool, error) {
err := wait.Poll(100*time.Millisecond, 120*time.Second, func() (bool, error) {
oldJob, err := kubectx.GetJob(job.Name)
if err != nil {
return false, err
Expand Down
6 changes: 5 additions & 1 deletion internal/k8s/jobs.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,11 @@ func (kubectx *KubeContext) ListJobs() (*batchv1.JobList, error) {
func (kubectx *KubeContext) GetJob(name string) (*batchv1.Job, error) {
getOptions := metav1.GetOptions{}
job, err := kubectx.Client.BatchV1().Jobs(kubectx.Namespace).Get(name, getOptions)
if err != nil && !apierrors.IsNotFound(err) {
if err != nil {
if apierrors.IsNotFound(err) {
return nil, nil
}

return nil, err
}

Expand Down

0 comments on commit 694b932

Please sign in to comment.