Skip to content

Commit

Permalink
#Fix Windows silent cleanup issue
Browse files Browse the repository at this point in the history
# Why is this change needed?
Customers are suffering from "Agent occasionally stops working" issue.

# How does it address the issue?
Issue is caused by Windows silent cleanup of %WinDir%\Temp folder. Move certificate location to %PROGRAMDATA%

# How was this tested ?
bb release
+ windows integration tests

cr https://code.amazon.com/reviews/CR-20016442
  • Loading branch information
Alena Kastsiukavets authored and Helen1987 committed Feb 27, 2020
1 parent 940e774 commit 93602fa
Show file tree
Hide file tree
Showing 8 changed files with 192 additions and 26 deletions.
3 changes: 2 additions & 1 deletion bin/update
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,8 @@ EOF
end

def setup_windows_certificates
cert_dir = File.expand_path(File.join(File.dirname(__FILE__), '..\certs'))
app_root_folder = File.join(ENV['PROGRAMDATA'], "Amazon/CodeDeploy")
cert_dir = File.expand_path(File.join(app_root_folder, 'certs'))
@log.info("Setting up windows certificates from cert directory #{cert_dir}")
ENV['AWS_SSL_CA_DIRECTORY'] = File.join(cert_dir, 'ca-bundle.crt')
ENV['SSL_CERT_FILE'] = File.join(cert_dir, 'ca-bundle.crt')
Expand Down
3 changes: 2 additions & 1 deletion features/step_definitions/step_constants.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ def self.current_aws_account
end

def self.configure_windows_certificate
cert_dir = File.expand_path(File.join(File.dirname(__FILE__), '..\..\certs'))
app_root_folder = File.join(ENV['PROGRAMDATA'], "Amazon/CodeDeploy")
cert_dir = File.expand_path(File.join(app_root_folder, 'certs'))
Aws.config[:ssl_ca_bundle] = File.join(cert_dir, 'windows-ca-bundle.crt')
ENV['AWS_SSL_CA_DIRECTORY'] = File.join(cert_dir, 'windows-ca-bundle.crt')
ENV['SSL_CERT_FILE'] = File.join(cert_dir, 'windows-ca-bundle.crt')
Expand Down
3 changes: 2 additions & 1 deletion lib/aws/codedeploy/local/deployer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ def initialize(configuration_file_location = CONF_DEFAULT_LOCATION)
end

def self.configure_windows_certificate
cert_dir = File.expand_path(File.join(File.dirname(__FILE__), '..\..\..\..\certs'))
app_root_folder = File.join(ENV['PROGRAMDATA'], "Amazon/CodeDeploy")
cert_dir = File.join(app_root_folder, 'certs')
Aws.config[:ssl_ca_bundle] = File.join(cert_dir, 'windows-ca-bundle.crt')
ENV['AWS_SSL_CA_DIRECTORY'] = File.join(cert_dir, 'windows-ca-bundle.crt')
ENV['SSL_CERT_FILE'] = File.join(cert_dir, 'windows-ca-bundle.crt')
Expand Down
8 changes: 5 additions & 3 deletions lib/instance_agent/agent/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,11 @@ def run
perform
@error_count = 0
rescue Seahorse::Client::NetworkingError => e
log(:error, "Failed to execute the command. Your certificates might have been deleted" )
# TODO: verify error message is "certificate verify failed"
raise e
log(:error, "Network error: " + e.inspect)
if e.message.include? "certificate verify failed"
log(:error, "Failed to execute the command. Your certificates might have been deleted" )
end
@error_count = @error_count.to_i + 1
rescue Aws::Errors::MissingCredentialsError
log(:error, "Missing credentials - please check if this instance was started with an IAM instance profile")
@error_count = @error_count.to_i + 1
Expand Down
29 changes: 9 additions & 20 deletions lib/winagent.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def initialize
@app_root_folder = File.join(ENV['PROGRAMDATA'], "Amazon/CodeDeploy")
InstanceAgent::Platform.util = InstanceAgent::WindowsUtil

cert_dir = File.expand_path(File.join(File.dirname(__FILE__), '..\certs'))
cert_dir = File.join(@app_root_folder, 'certs')
Aws.config[:ssl_ca_bundle] = File.join(cert_dir, 'ca-bundle.crt')
ENV['AWS_SSL_CA_DIRECTORY'] = File.join(cert_dir, 'ca-bundle.crt')
ENV['SSL_CERT_FILE'] = File.join(cert_dir, 'ca-bundle.crt')
Expand All @@ -34,10 +34,9 @@ def description

def service_main
read_config
@attempt_count = 0
log(:info, 'started')
shutdown_flag = false
while running? && !shutdown_flag
shutdown_flag = false
while running? && !shutdown_flag
with_error_handling do
# Initialize the poller only once
begin
Expand All @@ -61,15 +60,15 @@ def service_stop
log(:info, 'stopping the agent')
@polling_mutex.synchronize do
@runner.graceful_shutdown
log(:info, 'agent exiting now')
log(:info, 'command execution threads shutdown, agent exiting now')
end
end

def log(severity, message)
raise ArgumentError, "Unknown severity #{severity.inspect}" unless InstanceAgent::Log::SEVERITIES.include?(severity.to_s)
InstanceAgent::Log.send(severity.to_sym, "#{description}: #{message}")
raise ArgumentError, "Unknown severity #{severity.inspect}" unless InstanceAgent::Log::SEVERITIES.include?(severity.to_s)
InstanceAgent::Log.send(severity.to_sym, "#{description}: #{message}")
end

def expand_conf_path(key)
tmp = InstanceAgent::Config.config[key.to_sym]
InstanceAgent::Config.config(key.to_sym => File.join(ENV['PROGRAMDATA'], tmp)) unless Pathname.new(tmp).absolute?
Expand All @@ -78,7 +77,7 @@ def expand_conf_path(key)
def read_config
default_config = File.join(@app_root_folder, "conf.yml")
InstanceAgent::Config.config({:config_file => default_config,
:on_premises_config_file => File.join(default_root, "conf.onpremises.yml")})
:on_premises_config_file => File.join(@app_root_folder, "conf.onpremises.yml")})
InstanceAgent::Config.load_config

expand_conf_path(:root_dir)
Expand All @@ -89,23 +88,13 @@ def read_config

def with_error_handling
yield
rescue Seahorse::Client::NetworkingError => e
@attempt_count = @attempt_count + 1
if @attempt_count > 3
log(:error, "Failed to recover after certificate issue:" + e.inspect)
exit
end
log(:error, "Custom:" + e.inspect)
# try to copy certs from application root folder
@certs_backup_folder = File.join(@app_root_folder, "certs/.")
FileUtils.cp_r(@certs_backup_folder, @cert_dir)
rescue SocketError => e
log(:info, "#{description}: failed to run as the connection failed! #{e.class} - #{e.message} - #{e.backtrace.join("\n")}")
sleep InstanceAgent::Config.config[:wait_after_connection_problem]
rescue Exception => e
if (e.message.to_s.match(/throttle/i) || e.message.to_s.match(/rateexceeded/i) rescue false)
log(:error, "#{description}: ran into throttling - waiting for #{InstanceAgent::Config.config[:wait_after_throttle_error]}s until retrying")
sleep InstanceAgent::Config.config[:wait_after_throttle_error]
sleep InstanceAgent::Config.config[:wait_after_throttle_error]
else
log(:error, "#{description}: error during start or run: #{e.class} - #{e.message} - #{e.backtrace.join("\n")}")
sleep 5
Expand Down
23 changes: 23 additions & 0 deletions test/instance_agent/agent/base_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -58,5 +58,28 @@ class InstanceAgentBaseTest < InstanceAgentTestCase
assert_nothing_raised { @base.run }
end
end

context 'when certificates are lost' do

should 'continue to execute' do
CERT_VERIFICATION_FAILED_MESSAGE = 'SSL_connect returned=1 errno=0 state=error: certificate verify failed'
@base.stubs(:perform).raises(Seahorse::Client::NetworkingError.new(Exception.new(CERT_VERIFICATION_FAILED_MESSAGE)))

@base.run
end

end

context 'when network error is thrown' do

should 'continue to execute' do
@base.stubs(:perform).raises(Seahorse::Client::NetworkingError.new(Exception.new('random message')))

@base.run
end

end

end

end
44 changes: 44 additions & 0 deletions test/instance_agent/plugins/windows/winagent_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
require 'test_helper'

class Daemon
def running?
true
end

def self.mainloop *args, &block
false
end
end

require 'wrapper/test_wrapper_winagent'

class WinAgentTestClass < InstanceAgentTestCase
context 'Win agent shell try to start agent' do

setup do
ENV.expects(:[]).at_least_once.returns("")

@fake_runner = mock()
InstanceAgent::Plugins::CodeDeployPlugin::CommandPoller.stubs(:runner).returns(@fake_runner)

logger_mock = mock()
::ProcessManager::Log.stubs(:init).returns(logger_mock)

InstanceAgent::Config.expects(:load_config)
InstanceAgent::Config.config.expects(:[]).with(:wait_between_runs).at_most(5).returns("0")
InstanceAgent::Config.config.expects(:[]).at_least_once.returns("")
end

should 'starts succesfully' do
@fake_runner.stubs(:run).times(2)
FileUtils.expects(:cp_r).never
@fake_runner.expects(:graceful_shutdown).never

agent = InstanceAgentService.new
agent.expects(:running?).times(3).returns(true, true, false)

agent.service_main
end

end
end
105 changes: 105 additions & 0 deletions test/wrapper/test_wrapper_winagent.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# we can't test cross-platform so remove mentioning of windows libraries for tests https://github.com/chefspec/chefspec/issues/405
#require 'win32/daemon'
require 'core_ext'
require 'aws-sdk-core'
require 'process_manager'

# There's something strange about how Orca handles implicit requires.
# We have to explicitly require everything in advance or we'll get uninitialized constant failures.
require 'instance_agent/agent/base'
require 'instance_agent/config'
require 'instance_agent/log'
require 'instance_agent/platform'
require 'instance_agent/platform/windows_util'
require 'instance_agent/plugins/codedeploy/register_plugin'
require 'pathname'

#include Win32

class InstanceAgentService < Daemon

def initialize
@app_root_folder = File.join(ENV['PROGRAMDATA'], "Amazon/CodeDeploy")
InstanceAgent::Platform.util = InstanceAgent::WindowsUtil
cert_dir = File.join(@app_root_folder, 'certs')
Aws.config[:ssl_ca_bundle] = File.join(cert_dir, 'ca-bundle.crt')
ENV['AWS_SSL_CA_DIRECTORY'] = File.join(cert_dir, 'ca-bundle.crt')
ENV['SSL_CERT_FILE'] = File.join(cert_dir, 'ca-bundle.crt')
@polling_mutex = Mutex.new
end

def description
"CodeDeploy Instance Agent Service"
end

def service_main
read_config
log(:info, 'started')
shutdown_flag = false
while running? && !shutdown_flag
with_error_handling do
# Initialize the poller only once
begin
@polling_mutex.synchronize do
@runner ||= InstanceAgent::Plugins::CodeDeployPlugin::CommandPoller.runner
@runner.run
end
rescue SystemExit
service_stop
shutdown_flag = true
end
sleep InstanceAgent::Config.config[:wait_between_runs].to_i
end
end
if shutdown_flag
exit!
end
end

def service_stop
log(:info, 'stopping the agent')
@polling_mutex.synchronize do
@runner.graceful_shutdown
log(:info, 'command execution threads shutdown, agent exiting now')
end
end

def log(severity, message)
raise ArgumentError, "Unknown severity #{severity.inspect}" unless InstanceAgent::Log::SEVERITIES.include?(severity.to_s)
InstanceAgent::Log.send(severity.to_sym, "#{description}: #{message}")
end

def expand_conf_path(key)
tmp = InstanceAgent::Config.config[key.to_sym]
InstanceAgent::Config.config(key.to_sym => File.join(ENV['PROGRAMDATA'], tmp)) unless Pathname.new(tmp).absolute?
end

def read_config
default_config = File.join(@app_root_folder, "conf.yml")
InstanceAgent::Config.config({:config_file => default_config,
:on_premises_config_file => File.join(@app_root_folder, "conf.onpremises.yml")})
InstanceAgent::Config.load_config

expand_conf_path(:root_dir)
expand_conf_path(:log_dir)

InstanceAgent::Log.init(File.join(InstanceAgent::Config.config[:log_dir], "codedeploy-agent-log.txt"))
end

def with_error_handling
yield
rescue SocketError => e
log(:info, "#{description}: failed to run as the connection failed! #{e.class} - #{e.message} - #{e.backtrace.join("\n")}")
sleep InstanceAgent::Config.config[:wait_after_connection_problem]
rescue Exception => e
if (e.message.to_s.match(/throttle/i) || e.message.to_s.match(/rateexceeded/i) rescue false)
log(:error, "#{description}: ran into throttling - waiting for #{InstanceAgent::Config.config[:wait_after_throttle_error]}s until retrying")
sleep InstanceAgent::Config.config[:wait_after_throttle_error]
else
log(:error, "#{description}: error during start or run: #{e.class} - #{e.message} - #{e.backtrace.join("\n")}")
sleep 5
end
end
end

InstanceAgentService.mainloop unless defined?(Ocra)

0 comments on commit 93602fa

Please sign in to comment.