From ea3aa60dcc0eaa65bb9a8d30faee446e34a90df3 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Tue, 23 Nov 2021 22:09:06 -0800 Subject: [PATCH] More improvements to output for CircleCI (#9201) Summary: More follow-up to https://github.com/facebook/rocksdb/issues/9193 + https://github.com/facebook/rocksdb/issues/9188 * Even though we need to print ETA updates to avoid hitting the 10min timeout, we need to avoid printing an update if there's no actual progress, so that hung tests will timeout after 10 min rather than 5 hours. * When there is a hung test, it's really annoying to track down which test is hung, so if no progress is observed for 1 minute, we run ps once to show what is running. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9201 Test Plan: manual and CircleCI Reviewed By: jay-zhuang Differential Revision: D32612028 Pulled By: pdillinger fbshipit-source-id: 00f8ea70fc5fec9ede28ff74287d90fc73854aad --- build_tools/gnu_parallel | 44 +++++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/build_tools/gnu_parallel b/build_tools/gnu_parallel index 06a560fcf..6b7849b0e 100755 --- a/build_tools/gnu_parallel +++ b/build_tools/gnu_parallel @@ -1842,7 +1842,6 @@ sub start_another_job { } $opt::min_progress_interval = 0; -$opt::progress_sep = "\r"; sub init_progress { # Uses: @@ -1852,7 +1851,6 @@ sub init_progress { $|=1; if (not $Global::is_terminal) { $opt::min_progress_interval = 30; - $opt::progress_sep = "\n"; } if($opt::bar) { return("",""); @@ -1878,7 +1876,9 @@ sub drain_job_queue { } my $last_header=""; my $sleep = 0.2; + my $last_left = 1000000000; my $last_progress_time = 0; + my $ps_reported = 0; do { while($Global::total_running > 0) { debug($Global::total_running, "==", scalar @@ -1889,15 +1889,38 @@ sub drain_job_queue { close $job->fh(0,"w"); } } - if($opt::progress and (time() - $last_progress_time) >= $opt::min_progress_interval) { - $last_progress_time = time(); + # When not connected to terminal, assume CI (e.g. CircleCI). In + # that case we want occasional progress output to prevent abort + # due to timeout with no output, but we also need to stop sending + # progress output if there has been no actual progress, so that + # the job can time out appropriately (CirecleCI: 10m) in case of + # a hung test. But without special output, it is extremely + # annoying to diagnose which test is hung, so we add that using + # `ps` below. + if($opt::progress and + ($Global::is_terminal or (time() - $last_progress_time) >= 30)) { my %progress = progress(); if($last_header ne $progress{'header'}) { print $Global::original_stderr "\n", $progress{'header'}, "\n"; $last_header = $progress{'header'}; } - print $Global::original_stderr $opt::progress_sep,$progress{'status'}; - flush $Global::original_stderr; + if ($Global::is_terminal) { + print $Global::original_stderr "\r",$progress{'status'}; + } + if ($last_left > $Global::left) { + if (not $Global::is_terminal) { + print $Global::original_stderr $progress{'status'},"\n"; + } + $last_progress_time = time(); + $ps_reported = 0; + } elsif (not $ps_reported and (time() - $last_progress_time) >= 60) { + # No progress in at least 60 seconds: run ps + print $Global::original_stderr "\n"; + system("ps", "-wf"); + $ps_reported = 1; + } + $last_left = $Global::left; + flush $Global::original_stderr; } if($Global::total_running < $Global::max_jobs_running and not $Global::JobQueue->empty()) { @@ -1964,10 +1987,11 @@ sub progress { my $eta = ""; my ($status,$header)=("",""); if($opt::eta) { - my($total, $completed, $left, $pctcomplete, $avgtime, $this_eta) = - compute_eta(); - $eta = sprintf("ETA: %ds Left: %d AVG: %.2fs ", - $this_eta, $left, $avgtime); + my($total, $completed, $left, $pctcomplete, $avgtime, $this_eta) = + compute_eta(); + $eta = sprintf("ETA: %ds Left: %d AVG: %.2fs ", + $this_eta, $left, $avgtime); + $Global::left = $left; } my $termcols = terminal_columns(); my @workers = sort keys %Global::host;