From 4355797d40bce670a275290b9f3eb29e0f460534 Mon Sep 17 00:00:00 2001 From: Marco Dinis Date: Wed, 16 Oct 2024 12:01:52 +0100 Subject: [PATCH] Add debugging steps for DiscoverEC2 User Task issues This PR adds a new field when getting an User Task. Description contains a markdown document that should be displayed to the user which helps them fix the issue with the task. --- .../ec2-ssm-agent-connection-lost.md | 25 +++++++++++++++++++ .../ec2-ssm-agent-not-registered.md | 25 +++++++++++++++++++ .../ec2-ssm-invocation-failure.md | 19 ++++++++++++++ .../descriptions/ec2-ssm-script-failure.md | 3 +++ .../descriptions/ec2-ssm-unsupported-os.md | 3 +++ api/types/usertasks/object.go | 25 ++++++++++++++++--- api/types/usertasks/object_test.go | 6 +++++ lib/web/ui/usertask.go | 6 ++++- 8 files changed, 107 insertions(+), 5 deletions(-) create mode 100644 api/types/usertasks/descriptions/ec2-ssm-agent-connection-lost.md create mode 100644 api/types/usertasks/descriptions/ec2-ssm-agent-not-registered.md create mode 100644 api/types/usertasks/descriptions/ec2-ssm-invocation-failure.md create mode 100644 api/types/usertasks/descriptions/ec2-ssm-script-failure.md create mode 100644 api/types/usertasks/descriptions/ec2-ssm-unsupported-os.md diff --git a/api/types/usertasks/descriptions/ec2-ssm-agent-connection-lost.md b/api/types/usertasks/descriptions/ec2-ssm-agent-connection-lost.md new file mode 100644 index 000000000000..d261a5986faa --- /dev/null +++ b/api/types/usertasks/descriptions/ec2-ssm-agent-connection-lost.md @@ -0,0 +1,25 @@ +Auto enrolling EC2 instances requires the SSM Agent to be installed and running on them. +Some instances appear to have lost connection to Amazon Systems Manager. + +You can see which instances lost connection using the [SSM Fleet Manager](https://console.aws.amazon.com/systems-manager/fleet-manager/managed-nodes). + +The most common issues for instances losing connection: + +**SSM Agent is not running** + +Ensure the SSM Agent is running in the instance and is not reporting any error. +Please check the instructions [here](https://docs.aws.amazon.com/systems-manager/latest/userguide/ssm-agent-status-and-restart.html). + +**SSM Agent can't reach the Amazon Systems Manager service** + +Ensure the instance's security groups allows outbound connections to Amazon Systems Manager endpoints. +Allowing outbound on port 443 is enough for the agent to connect to AWS. + +**Instance is missing IAM policy** + +The SSM Agent requires the `AmazonSSMManagedInstanceCore` managed policy. +Ensure the instance has an IAM Profile and that it includes the above policy. +For more information please refer to [this page](https://docs.aws.amazon.com/systems-manager/latest/userguide/session-manager-getting-started-instance-profile.html). + +After following the steps above, you can mark the task as resolved. +Teleport will try to auto-enroll these instances again. \ No newline at end of file diff --git a/api/types/usertasks/descriptions/ec2-ssm-agent-not-registered.md b/api/types/usertasks/descriptions/ec2-ssm-agent-not-registered.md new file mode 100644 index 000000000000..276e2e146a0f --- /dev/null +++ b/api/types/usertasks/descriptions/ec2-ssm-agent-not-registered.md @@ -0,0 +1,25 @@ +Auto enrolling EC2 instances requires the SSM Agent to be installed and running on them. +Some instances failed to connect to Amazon Systems Manager. + +You can see which instances were able to connect by opening the [SSM Fleet Manager](https://console.aws.amazon.com/systems-manager/fleet-manager/managed-nodes). + +The most common issues for instances not being visible are: + +**SSM Agent is not running** + +Ensure the SSM Agent is installed and running in the instance. +Please check the instructions [here](https://docs.aws.amazon.com/systems-manager/latest/userguide/ssm-agent-status-and-restart.html). + +**SSM Agent can't reach the Amazon Systems Manager service** + +Ensure the instance's security groups allows outbound connections to Amazon Systems Manager endpoints. +Allowing outbound on port 443 is enough for the agent to connect to AWS. + +**Instance is missing IAM policy** + +The SSM Agent requires the `AmazonSSMManagedInstanceCore` managed policy. +Ensure the instance has an IAM Profile and that it includes the above policy. +For more information please refer to [this page](https://docs.aws.amazon.com/systems-manager/latest/userguide/session-manager-getting-started-instance-profile.html). + +After following the steps above, you can mark the task as resolved. +Teleport will try to auto-enroll these instances again. \ No newline at end of file diff --git a/api/types/usertasks/descriptions/ec2-ssm-invocation-failure.md b/api/types/usertasks/descriptions/ec2-ssm-invocation-failure.md new file mode 100644 index 000000000000..d46087d30b66 --- /dev/null +++ b/api/types/usertasks/descriptions/ec2-ssm-invocation-failure.md @@ -0,0 +1,19 @@ +Teleport failed to access the SSM Agent to auto enroll the instance. +Some instances failed to communicate with the AWS Systems Manager service to execute the install script. + +Usually this happens when: + +**Missing policies** + +The IAM Role used by the integration might be missing some required permissions. +Ensure the following actions are allowed in the IAM Role used by the integration: +- `ec2:DescribeInstances` +- `ssm:DescribeInstanceInformation` +- `ssm:GetCommandInvocation` +- `ssm:ListCommandInvocations` +- `ssm:SendCommand` + +**SSM Document is invalid** + +Teleport uses an SSM Document to run an installation script. +If the document is changed or removed, it might no longer work. \ No newline at end of file diff --git a/api/types/usertasks/descriptions/ec2-ssm-script-failure.md b/api/types/usertasks/descriptions/ec2-ssm-script-failure.md new file mode 100644 index 000000000000..18dc28e0ab12 --- /dev/null +++ b/api/types/usertasks/descriptions/ec2-ssm-script-failure.md @@ -0,0 +1,3 @@ +Teleport was able to reach the SSM Agent inside the EC2 instance, however the install script returned an error. + +You can click below in the Invocation URL and get further details on why the script failed. \ No newline at end of file diff --git a/api/types/usertasks/descriptions/ec2-ssm-unsupported-os.md b/api/types/usertasks/descriptions/ec2-ssm-unsupported-os.md new file mode 100644 index 000000000000..5e92a376d5d3 --- /dev/null +++ b/api/types/usertasks/descriptions/ec2-ssm-unsupported-os.md @@ -0,0 +1,3 @@ +Auto enrolling EC2 instances requires a compatible Operating System. + +Teleport only supports Linux instances when auto-enrolling them into the cluster. \ No newline at end of file diff --git a/api/types/usertasks/object.go b/api/types/usertasks/object.go index 72a71fe05d25..c2582b6dc6fc 100644 --- a/api/types/usertasks/object.go +++ b/api/types/usertasks/object.go @@ -19,7 +19,9 @@ package usertasks import ( + "embed" "encoding/binary" + "fmt" "slices" "time" @@ -117,8 +119,23 @@ const ( AutoDiscoverEC2IssueSSMInvocationFailure = "ec2-ssm-invocation-failure" ) -// discoverEC2IssueTypes is a list of issue types that can occur when trying to auto enroll EC2 instances. -var discoverEC2IssueTypes = []string{ +//go:embed descriptions/*.md +var descriptionsFS embed.FS + +// DescriptionForDiscoverEC2Issue returns the description of the issue and fixing steps. +// The returned string contains a markdown document. +// If issue type is not recognized or doesn't have a specific description, them an empty string is returned. +func DescriptionForDiscoverEC2Issue(issueType string) string { + filename := fmt.Sprintf("descriptions/%s.md", issueType) + bs, err := descriptionsFS.ReadFile(filename) + if err != nil { + return "" + } + return string(bs) +} + +// DiscoverEC2IssueTypes is a list of issue types that can occur when trying to auto enroll EC2 instances. +var DiscoverEC2IssueTypes = []string{ AutoDiscoverEC2IssueSSMInstanceNotRegistered, AutoDiscoverEC2IssueSSMInstanceConnectionLost, AutoDiscoverEC2IssueSSMInstanceUnsupportedOS, @@ -184,8 +201,8 @@ func validateDiscoverEC2TaskType(ut *usertasksv1.UserTask) error { ) } - if !slices.Contains(discoverEC2IssueTypes, ut.GetSpec().IssueType) { - return trace.BadParameter("invalid issue type state, allowed values: %v", discoverEC2IssueTypes) + if !slices.Contains(DiscoverEC2IssueTypes, ut.GetSpec().IssueType) { + return trace.BadParameter("invalid issue type state, allowed values: %v", DiscoverEC2IssueTypes) } if len(ut.Spec.DiscoverEc2.Instances) == 0 { diff --git a/api/types/usertasks/object_test.go b/api/types/usertasks/object_test.go index d5fa6c43744f..156af98ac59d 100644 --- a/api/types/usertasks/object_test.go +++ b/api/types/usertasks/object_test.go @@ -269,3 +269,9 @@ func TestNewDiscoverEC2UserTask(t *testing.T) { }) } } + +func TestAllDescriptions(t *testing.T) { + for _, issueType := range usertasks.DiscoverEC2IssueTypes { + require.NotEmpty(t, usertasks.DescriptionForDiscoverEC2Issue(issueType), "issue type %q is missing descriptions/%s.md file", issueType, issueType) + } +} diff --git a/lib/web/ui/usertask.go b/lib/web/ui/usertask.go index 603867a66b01..b6ee2a0c6657 100644 --- a/lib/web/ui/usertask.go +++ b/lib/web/ui/usertask.go @@ -22,6 +22,7 @@ import ( "github.com/gravitational/trace" usertasksv1 "github.com/gravitational/teleport/api/gen/proto/go/teleport/usertasks/v1" + "github.com/gravitational/teleport/api/types/usertasks" ) // UserTask describes UserTask fields. @@ -41,8 +42,10 @@ type UserTask struct { // UserTaskDetail contains all the details for a User Task. type UserTaskDetail struct { - // UserTask has the basic fields that all taks include. + // UserTask has the basic fields that all tasks include. UserTask + // Description is a markdown document that explains the issue and how to fix it. + Description string `json:"description,omitempty"` // DiscoverEC2 contains the task details for the DiscoverEC2 tasks. DiscoverEC2 *usertasksv1.DiscoverEC2 `json:"discoverEc2,omitempty"` } @@ -87,6 +90,7 @@ func MakeUserTasks(uts []*usertasksv1.UserTask) []UserTask { func MakeDetailedUserTask(ut *usertasksv1.UserTask) UserTaskDetail { return UserTaskDetail{ UserTask: MakeUserTask(ut), + Description: usertasks.DescriptionForDiscoverEC2Issue(ut.GetSpec().GetIssueType()), DiscoverEC2: ut.GetSpec().GetDiscoverEc2(), } }