Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Standardize on ARN for ID with ec2 instances, keypairs, nics, securitygroups, subnets , volumes #1147

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 40 additions & 24 deletions cartography/intel/aws/ec2/instances.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from cartography.client.core.tx import load
from cartography.graph.job import GraphJob
from cartography.intel.aws.ec2.util import get_botocore_config
from cartography.intel.aws.util.arns import build_arn
from cartography.models.aws.ec2.instances import EC2InstanceSchema
from cartography.models.aws.ec2.keypairs import EC2KeyPairSchema
from cartography.models.aws.ec2.networkinterfaces import EC2NetworkInterfaceSchema
Expand All @@ -19,6 +20,7 @@
from cartography.models.aws.ec2.subnets import EC2SubnetSchema
from cartography.models.aws.ec2.volumes import EBSVolumeSchema
from cartography.util import aws_handle_regions
from cartography.util import run_cleanup_job
from cartography.util import timeit

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -65,10 +67,12 @@ def transform_ec2_instances(reservations: List[Dict[str, Any]], region: str, cur
})
for instance in reservation['Instances']:
instance_id = instance['InstanceId']
instance_arn = build_arn('ec2', current_aws_account_id, 'instance', instance_id, region)
launch_time = instance.get("LaunchTime")
launch_time_unix = str(time.mktime(launch_time.timetuple())) if launch_time else None
instance_list.append(
{
'Arn': instance_arn,
'InstanceId': instance_id,
'ReservationId': reservation_id,
'PublicDnsName': instance.get("PublicDnsName"),
Expand Down Expand Up @@ -97,51 +101,63 @@ def transform_ec2_instances(reservations: List[Dict[str, Any]], region: str, cur
if subnet_id:
subnet_list.append(
{
'Arn': build_arn('ec2', current_aws_account_id, 'subnet', subnet_id, region),
'SubnetId': subnet_id,
'InstanceId': instance_id,
'InstanceArn': instance_arn,
},
)

if instance.get("KeyName"):
key_name = instance["KeyName"]
key_pair_arn = f'arn:aws:ec2:{region}:{current_aws_account_id}:key-pair/{key_name}'
keypair_list.append({
'KeyPairArn': key_pair_arn,
'KeyName': key_name,
'InstanceId': instance_id,
})

if instance.get("SecurityGroups"):
for group in instance["SecurityGroups"]:
sg_list.append(
{
'GroupId': group['GroupId'],
'InstanceId': instance_id,
},
)

for network_interface in instance['NetworkInterfaces']:
for security_group in network_interface['Groups']:
network_interface_id = network_interface['NetworkInterfaceId']
network_interface_list.append({
'NetworkInterfaceId': network_interface['NetworkInterfaceId'],
'Arn': build_arn(
'ec2',
current_aws_account_id,
'network-interface',
network_interface_id,
region,
),
'NetworkInterfaceId': network_interface_id,
'Status': network_interface['Status'],
'MacAddress': network_interface['MacAddress'],
'Description': network_interface['Description'],
'PrivateDnsName': network_interface['PrivateDnsName'],
'PrivateIpAddress': network_interface['PrivateIpAddress'],
'InstanceId': instance_id,
'InstanceArn': instance_arn,
# Match using the subnet_id and not the arn so that we allow for subnet_id to be None.
'SubnetId': subnet_id,
'GroupId': security_group['GroupId'],
})

if instance.get("SecurityGroups"):
for group in instance["SecurityGroups"]:
group_id = group['GroupId']
sg_list.append(
{
'Arn': build_arn('ec2', current_aws_account_id, 'security-group', group_id, region),
'GroupId': group_id,
'InstanceArn': instance_arn,
},
)

if instance.get("KeyName"):
key_name = instance["KeyName"]
keypair_list.append({
'Arn': build_arn('ec2', current_aws_account_id, 'key-pair', key_name, region),
'KeyName': key_name,
'InstanceArn': instance_arn,
})

if 'BlockDeviceMappings' in instance and len(instance['BlockDeviceMappings']) > 0:
for mapping in instance['BlockDeviceMappings']:
if 'VolumeId' in mapping['Ebs']:
volume_id = mapping['Ebs']['VolumeId']
instance_ebs_volumes_list.append({
'InstanceId': instance_id,
'VolumeId': mapping['Ebs']['VolumeId'],
'Arn': build_arn('ec2', current_aws_account_id, 'volume', volume_id, region),
'InstanceArn': instance_arn,
'VolumeId': volume_id,
'DeleteOnTermination': mapping['Ebs']['DeleteOnTermination'],
# 'SnapshotId': mapping['Ebs']['SnapshotId'], # TODO check on this
})

return Ec2Data(
Expand Down
120 changes: 55 additions & 65 deletions cartography/intel/aws/ec2/volumes.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@

from cartography.graph.job import GraphJob
from cartography.models.aws.ec2.volumes import EBSVolumeSchema
from cartography.client.core.tx import load
from cartography.graph.job import GraphJob
from cartography.intel.aws.util.arns import build_arn
from cartography.models.aws.ec2.volumes import EBSVolumeSchema
from cartography.util import aws_handle_regions
from cartography.util import timeit

Expand All @@ -16,7 +20,7 @@

@timeit
@aws_handle_regions
def get_volumes(boto3_session: boto3.session.Session, region: str) -> List[Dict]:
def get_volumes(boto3_session: boto3.session.Session, region: str) -> List[Dict[str, Any]]:
client = boto3_session.client('ec2', region_name=region)
paginator = client.get_paginator('describe_volumes')
volumes: List[Dict] = []
Expand All @@ -26,90 +30,76 @@ def get_volumes(boto3_session: boto3.session.Session, region: str) -> List[Dict]


def transform_volumes(volumes: List[Dict[str, Any]], region: str, current_aws_account_id: str) -> List[Dict[str, Any]]:
result = []
for volume in volumes:
volume['VolumeArn'] = f"arn:aws:ec2:{region}:{current_aws_account_id}:volume/{volume['VolumeId']}"
volume['CreateTime'] = str(volume['CreateTime'])
return volumes
attachments = volume.get('Attachments', [])
active_attachments = [a for a in attachments if a['State'] == 'attached']

volume_id = volume['VolumeId']
raw_vol = ({
'Arn': build_arn('ec2', current_aws_account_id, 'volume', volume_id, region),
'AvailabilityZone': volume['AvailabilityZone'],
'CreateTime': volume['CreateTime'],
'Encrypted': volume['Encrypted'],
'Size': volume['Size'],
'State': volume['State'],
'OutpostArn': volume['OutpostArn'],
'SnapshotId': volume['SnapshotId'],
'Iops': volume['Iops'],
'FastRestored': volume['FastRestored'],
'MultiAttachEnabled': volume['MultiAttachEnabled'],
'VolumeType': volume['VolumeType'],
'VolumeId': volume_id,
'KmsKeyId': volume['KmsKeyId'],
})

if not active_attachments:
result.append(raw_vol)
continue

for attachment in active_attachments:
vol_with_attachment = raw_vol.copy()
vol_with_attachment['InstanceId'] = attachment['InstanceId']
result.append(vol_with_attachment)

return result


@timeit
def load_volumes(
neo4j_session: neo4j.Session, data: List[Dict], region: str, current_aws_account_id: str, update_tag: int,
neo4j_session: neo4j.Session,
ebs_data: List[Dict[str, Any]],
region: str,
current_aws_account_id: str,
update_tag: int,
) -> None:
ingest_volumes = """
UNWIND $volumes_list as volume
MERGE (vol:EBSVolume{id: volume.VolumeId})
ON CREATE SET vol.firstseen = timestamp()
SET vol.arn = volume.VolumeArn,
vol.lastupdated = $update_tag,
vol.availabilityzone = volume.AvailabilityZone,
vol.createtime = volume.CreateTime,
vol.encrypted = volume.Encrypted,
vol.size = volume.Size,
vol.state = volume.State,
vol.outpostarn = volume.OutpostArn,
vol.snapshotid = volume.SnapshotId,
vol.iops = volume.Iops,
vol.fastrestored = volume.FastRestored,
vol.multiattachenabled = volume.MultiAttachEnabled,
vol.type = volume.VolumeType,
vol.kmskeyid = volume.KmsKeyId,
vol.region=$Region
WITH vol
MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID})
MERGE (aa)-[r:RESOURCE]->(vol)
ON CREATE SET r.firstseen = timestamp()
SET r.lastupdated = $update_tag
"""

neo4j_session.run(
ingest_volumes,
volumes_list=data,
AWS_ACCOUNT_ID=current_aws_account_id,
load(
neo4j_session,
EBSVolumeSchema(),
ebs_data,
Region=region,
update_tag=update_tag,
AWS_ID=current_aws_account_id,
lastupdated=update_tag,
)


def load_volume_relationships(
neo4j_session: neo4j.Session,
volumes: List[Dict[str, Any]],
aws_update_tag: int,
) -> None:
add_relationship_query = """
MATCH (volume:EBSVolume{arn: $VolumeArn})
WITH volume
MATCH (instance:EC2Instance{instanceid: $InstanceId})
MERGE (volume)-[r:ATTACHED_TO_EC2_INSTANCE]->(instance)
ON CREATE SET r.firstseen = timestamp()
SET r.lastupdated = $aws_update_tag
"""
for volume in volumes:
for attachment in volume.get('Attachments', []):
if attachment['State'] != 'attached':
continue
neo4j_session.run(
add_relationship_query,
VolumeArn=volume['VolumeArn'],
InstanceId=attachment['InstanceId'],
aws_update_tag=aws_update_tag,
)


@timeit
def cleanup_volumes(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None:
def cleanup_volumes(neo4j_session: neo4j.Session, common_job_parameters: Dict[str, Any]) -> None:
GraphJob.from_node_schema(EBSVolumeSchema(), common_job_parameters).run(neo4j_session)


@timeit
def sync_ebs_volumes(
neo4j_session: neo4j.Session, boto3_session: boto3.session.Session, regions: List[str],
current_aws_account_id: str, update_tag: int, common_job_parameters: Dict,
neo4j_session: neo4j.Session,
boto3_session: boto3.session.Session,
regions: List[str],
current_aws_account_id: str,
update_tag: int,
common_job_parameters: Dict[str, Any],
) -> None:
for region in regions:
logger.debug("Syncing volumes for region '%s' in account '%s'.", region, current_aws_account_id)
data = get_volumes(boto3_session, region)
transformed_data = transform_volumes(data, region, current_aws_account_id)
load_volumes(neo4j_session, transformed_data, region, current_aws_account_id, update_tag)
load_volume_relationships(neo4j_session, transformed_data, update_tag)
cleanup_volumes(neo4j_session, common_job_parameters)
10 changes: 5 additions & 5 deletions cartography/intel/aws/resourcegroupstaggingapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,17 +66,17 @@ def get_short_id_from_lb2_arn(alb_arn: str) -> str:
TAG_RESOURCE_TYPE_MAPPINGS: Dict = {
'autoscaling:autoScalingGroup': {'label': 'AutoScalingGroup', 'property': 'arn'},
'dynamodb:table': {'label': 'DynamoDBTable', 'property': 'id'},
'ec2:instance': {'label': 'EC2Instance', 'property': 'id', 'id_func': get_short_id_from_ec2_arn},
'ec2:instance': {'label': 'EC2Instance', 'property': 'id'},
'ec2:internet-gateway': {'label': 'AWSInternetGateway', 'property': 'id', 'id_func': get_short_id_from_ec2_arn},
'ec2:key-pair': {'label': 'EC2KeyPair', 'property': 'id'},
'ec2:network-interface': {'label': 'NetworkInterface', 'property': 'id', 'id_func': get_short_id_from_ec2_arn},
'ec2:network-interface': {'label': 'NetworkInterface', 'property': 'id'},
'ecr:repository': {'label': 'ECRRepository', 'property': 'id'},
'ec2:security-group': {'label': 'EC2SecurityGroup', 'property': 'id', 'id_func': get_short_id_from_ec2_arn},
'ec2:subnet': {'label': 'EC2Subnet', 'property': 'subnetid', 'id_func': get_short_id_from_ec2_arn},
'ec2:security-group': {'label': 'EC2SecurityGroup', 'property': 'id'},
'ec2:subnet': {'label': 'EC2Subnet', 'property': 'id'},
'ec2:transit-gateway': {'label': 'AWSTransitGateway', 'property': 'id'},
'ec2:transit-gateway-attachment': {'label': 'AWSTransitGatewayAttachment', 'property': 'id'},
'ec2:vpc': {'label': 'AWSVpc', 'property': 'id', 'id_func': get_short_id_from_ec2_arn},
'ec2:volume': {'label': 'EBSVolume', 'property': 'id', 'id_func': get_short_id_from_ec2_arn},
'ec2:volume': {'label': 'EBSVolume', 'property': 'id'},
'ec2:elastic-ip-address': {'label': 'ElasticIPAddress', 'property': 'id', 'id_func': get_short_id_from_ec2_arn},
'ecs:cluster': {'label': 'ECSCluster', 'property': 'id'},
'ecs:container': {'label': 'ECSContainer', 'property': 'id'},
Expand Down
8 changes: 4 additions & 4 deletions cartography/intel/aws/ssm.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@ def get_instance_ids(neo4j_session: neo4j.Session, region: str, current_aws_acco
get_instances_query = """
MATCH (:AWSAccount{id: $AWS_ACCOUNT_ID})-[:RESOURCE]->(i:EC2Instance)
WHERE i.region = $Region
RETURN i.id
RETURN i.instanceid
"""
results = neo4j_session.run(get_instances_query, AWS_ACCOUNT_ID=current_aws_account_id, Region=region)
instance_ids = []
for r in results:
instance_ids.append(r['i.id'])
instance_ids.append(r['i.instanceid'])
return instance_ids


Expand Down Expand Up @@ -105,7 +105,7 @@ def load_instance_information(
ON CREATE SET r.firstseen = timestamp()
SET r.lastupdated = $aws_update_tag
WITH i
MATCH (owner:AWSAccount{id: $AWS_ACCOUNT_ID})-[:RESOURCE]->(ec2_instance:EC2Instance{id: i.instance_id})
MATCH (owner:AWSAccount{id: $AWS_ACCOUNT_ID})-[:RESOURCE]->(ec2_instance:EC2Instance{instanceid: i.instance_id})
MERGE (ec2_instance)-[r2:HAS_INFORMATION]->(i)
ON CREATE SET r2.firstseen = timestamp()
SET r2.lastupdated = $aws_update_tag
Expand Down Expand Up @@ -153,7 +153,7 @@ def load_instance_patches(
ON CREATE SET r.firstseen = timestamp()
SET r.lastupdated = $aws_update_tag
WITH p
MATCH (owner:AWSAccount{id: $AWS_ACCOUNT_ID})-[:RESOURCE]->(ec2_instance:EC2Instance{id: p.instance_id})
MATCH (owner:AWSAccount{id: $AWS_ACCOUNT_ID})-[:RESOURCE]->(ec2_instance:EC2Instance{instanceid: p.instance_id})
MERGE (ec2_instance)-[r2:HAS_PATCH]->(p)
ON CREATE SET r2.firstseen = timestamp()
SET r2.lastupdated = $aws_update_tag
Expand Down
18 changes: 18 additions & 0 deletions cartography/intel/aws/util/arns.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from typing import Optional


def build_arn(
resource: str,
account: str,
typename: str,
name: str,
region: Optional[str] = None,
partition: Optional[str] = None,
) -> str:
if not partition:
# TODO: support partitions from others. Please file an issue on this if needed, would love to hear from you
partition = 'aws'
if not region:
# Some resources are present in all regions, e.g. IAM policies
region = ""
return f"arn:{partition}:{resource}:{region}:{account}:{typename}/{name}"
6 changes: 3 additions & 3 deletions cartography/models/aws/ec2/instances.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@

@dataclass(frozen=True)
class EC2InstanceNodeProperties(CartographyNodeProperties):
# TODO arn: PropertyRef = PropertyRef('Arn', extra_index=True)
id: PropertyRef = PropertyRef('InstanceId')
arn: PropertyRef = PropertyRef('Arn', extra_index=True)
id: PropertyRef = PropertyRef('Arn')
instanceid: PropertyRef = PropertyRef('InstanceId', extra_index=True)
publicdnsname: PropertyRef = PropertyRef('PublicDnsName', extra_index=True)
privateipaddress: PropertyRef = PropertyRef('PrivateIpAddress')
Expand Down Expand Up @@ -64,7 +64,7 @@ class EC2InstanceToEC2ReservationRelProperties(CartographyRelProperties):
class EC2InstanceToEC2Reservation(CartographyRelSchema):
target_node_label: str = 'EC2Reservation'
target_node_matcher: TargetNodeMatcher = make_target_node_matcher(
{'reservationid': PropertyRef('ReservationId')},
{'id': PropertyRef('ReservationId')},
)
direction: LinkDirection = LinkDirection.OUTWARD
rel_label: str = "MEMBER_OF_EC2_RESERVATION"
Expand Down
8 changes: 4 additions & 4 deletions cartography/models/aws/ec2/keypairs.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@

@dataclass(frozen=True)
class EC2KeyPairNodeProperties(CartographyNodeProperties):
id: PropertyRef = PropertyRef('KeyPairArn')
arn: PropertyRef = PropertyRef('KeyPairArn', extra_index=True)
keyname: PropertyRef = PropertyRef('KeyName')
id: PropertyRef = PropertyRef('Arn')
arn: PropertyRef = PropertyRef('Arn', extra_index=True)
keyname: PropertyRef = PropertyRef('KeyName', extra_index=True)
region: PropertyRef = PropertyRef('Region', set_in_kwargs=True)
lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True)

Expand Down Expand Up @@ -45,7 +45,7 @@ class EC2KeyPairToEC2InstanceRelProperties(CartographyRelProperties):
class EC2KeyPairToEC2Instance(CartographyRelSchema):
target_node_label: str = 'EC2Instance'
target_node_matcher: TargetNodeMatcher = make_target_node_matcher(
{'id': PropertyRef('InstanceId')},
{'id': PropertyRef('InstanceArn')},
)
direction: LinkDirection = LinkDirection.OUTWARD
rel_label: str = "SSH_LOGIN_TO"
Expand Down
Loading