diff --git a/Dockerfile b/Dockerfile index 1d5ee3f..1d5d395 100644 --- a/Dockerfile +++ b/Dockerfile @@ -32,4 +32,4 @@ WORKDIR /swarm_cloud/servers EXPOSE 8000 # Command to run the application -CMD ["python3.10", "-m", "uvicorn", "main:cogvlm", "--host", "0.0.0.0", "--port", "80"] \ No newline at end of file +CMD ["python3.10", "-m", "uvicorn", "main:cogvlm", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/scripts/main.py b/scripts/main.py new file mode 100644 index 0000000..a6fbab5 --- /dev/null +++ b/scripts/main.py @@ -0,0 +1,82 @@ +import os +from fastapi import FastAPI, File, Form, UploadFile, HTTPException +import uvicorn +from typing import Optional, List +from PIL import Image +import torchvision.transforms as transforms +import torch +from run import QWenInfer, vit_process # Adjust import paths as necessary + +app = FastAPI() +# Directory to save pre-processed images as tensors +TENSOR_DIR = "./tensor_images" +os.makedirs(TENSOR_DIR, exist_ok=True) +temp_dir = "./tempfiles" +# Configuration for model and inference +vit_engine_dir = "./plan" +qwen_infer = QWenInfer( + tokenizer_dir="./Qwen-VL-Chat", + qwen_engine_dir="./trt_engines/Qwen-VL-7B-Chat-int4-gptq", + log_level="info", + output_csv=None, + output_npy=None, + num_beams=1 +) +qwen_infer.qwen_model_init() + +def load_and_transform_image(image_file: UploadFile): + """Load an image file, transform it, and save as a tensor.""" + image = Image.open(image_file.file).convert("RGB") + transform = transforms.Compose([ + transforms.Resize((448, 448)), # Resize image to expected dimensions + transforms.ToTensor(), # Convert to tensor + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), # Normalize + ]) + image_tensor = transform(image).unsqueeze(0) # Add batch dimension + print(f'image_tensor {image_tensor.shape}') + # Save the tensor for inference + tensor_file_path = os.path.join(TENSOR_DIR, f'{image_file.filename}.pt') + + # file = f'{image_file.filename}.pt' + torch.save(image_tensor, tensor_file_path) + + return tensor_file_path + +@app.post("/infer/") +async def infer( + image: UploadFile = File(...), + input_text: str = Form(...), + max_new_tokens: int = Form(1024), + history: Optional[List[str]] = Form(None) +): + try: + temp_image_path = os.path.join(temp_dir, image.filename) + with open(temp_image_path, 'wb') as f: + contents = await image.read() + f.write(contents) + print(input_text) + print(type(input_text)) + transformed_img_path = load_and_transform_image(image) + images = [{'image': transformed_img_path}] + stream = torch.cuda.current_stream().cuda_stream + image_embeds = vit_process(images, vit_engine_dir, stream) + print(temp_image_path) + history = [] + content_list = images + content_list.append({'text': input_text}) + print("content list") + output_text = qwen_infer.qwen_infer( + input_vit=image_embeds, + images_path=images, + input_text=input_text, + max_new_tokens=max_new_tokens, + history=history + ) + print(output_text) + os.remove(temp_image_path) + os.remove(transformed_img_path) + return {"output_text": output_text} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) +if __name__ == "__main__": + uvicorn.run(app, host="0.0.0.0", port=80) diff --git a/scripts/test.tf b/scripts/test.tf new file mode 100644 index 0000000..f4852ee --- /dev/null +++ b/scripts/test.tf @@ -0,0 +1,395 @@ +provider "aws" { + region = "us-east-1" +} +resource "aws_cloudwatch_log_group" "ecs_logs" { + name = "/ecs/app-task-logs" + retention_in_days = 14 +} +resource "aws_vpc" "test_vpc" { + cidr_block = "10.0.0.0/16" + enable_dns_support = true + enable_dns_hostnames = true +} +resource "aws_subnet" "test_subnet" { + count = 1 + vpc_id = aws_vpc.test_vpc.id + cidr_block = "10.0.1.${count.index * 64}/26" + map_public_ip_on_launch = true + availability_zone = element(["us-east-1a"], count.index) +} + +resource "aws_internet_gateway" "test_igw" { + vpc_id = aws_vpc.test_vpc.id +} + +resource "aws_route_table" "test_route_table" { + vpc_id = aws_vpc.test_vpc.id + + route { + cidr_block = "0.0.0.0/0" + gateway_id = aws_internet_gateway.test_igw.id + } +} + +resource "aws_route_table_association" "test_rta" { + count = length(aws_subnet.test_subnet.*.id) + subnet_id = element(aws_subnet.test_subnet.*.id, count.index) + route_table_id = aws_route_table.test_route_table.id +} + +resource "aws_security_group" "test_sg" { + name = "test-sg" + description = "Security group for testing with all ports open" + vpc_id = aws_vpc.test_vpc.id + + ingress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } +} + +resource "aws_ecs_cluster" "test_cluster" { + name = "test-cluster" +} + +resource "aws_ecs_service" "test_service" { + name = "test-service" + cluster = aws_ecs_cluster.test_cluster.id + task_definition = aws_ecs_task_definition.app_task.arn + desired_count = 1 + launch_type = "EC2" + + network_configuration { + subnets = [aws_subnet.test_subnet[0].id] + security_groups = [aws_security_group.test_sg.id] + } +} + +resource "aws_efs_file_system" "example" { + creation_token = "my-product" + + tags = { + Name = "MyProduct" + } +} +resource "aws_efs_mount_target" "example" { + file_system_id = aws_efs_file_system.example.id + subnet_id = aws_subnet.test_subnet[0].id + security_groups = [aws_security_group.test_sg.id] +} +# IAM Role for EC2 Instances (if not already defined) +resource "aws_iam_role" "ecs_instance_role" { + name = "ecs_instance_role" + + assume_role_policy = jsonencode({ + Version = "2012-10-17", + Statement = [ + { + Action = "sts:AssumeRole", + Effect = "Allow", + Principal = { + Service = "ec2.amazonaws.com", + }, + Sid = "", + }, + ], + }) +} + +# Attach the necessary policies to the ECS Instance Role +resource "aws_iam_role_policy_attachment" "ecs_instance_role_policy_attach" { + role = aws_iam_role.ecs_instance_role.name + policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role" +} + +resource "aws_iam_instance_profile" "app_instance_profile" { + name = "app_instance_profile" + role = aws_iam_role.ecs_instance_role.name +} + + +resource "aws_launch_template" "app_launch_template" { + name_prefix = "ecs-launch-template3" + description = "Launch Template for EC2 instances running the application" + image_id = "ami-0c574b811be1b656f" + instance_type = "p3.2xlarge" # Choose an appropriate instance type + + #vpc_security_group_ids = [aws_security_group.test_sg.id] + + # Specify the IAM Instance Profile if required + iam_instance_profile { + name = aws_iam_instance_profile.app_instance_profile.name + } + + user_data = base64encode(<> /etc/ecs/ecs.config +EOF + ) + + # Ensure instances are placed in the VPC + network_interfaces { + security_groups = [aws_security_group.test_sg.id] + associate_public_ip_address = true + } + + block_device_mappings { + device_name = "/dev/xvda" + ebs { + volume_size = 70 + delete_on_termination = true + volume_type = "gp2" # General Purpose SSD + } + } + tag_specifications { + resource_type = "instance" + tags = { + Name = "AppInstance" + } + } +} + +resource "aws_iam_policy" "ecs_logging" { + name = "ecsLoggingPolicy" + description = "IAM policy for ECS logging to CloudWatch Logs" + + policy = jsonencode({ + Version = "2012-10-17", + Statement = [ + { + Action = [ + "logs:CreateLogStream", + "logs:PutLogEvents", + ], + Effect = "Allow", + Resource = aws_cloudwatch_log_group.ecs_logs.arn, + }, + ], + }) +} + +resource "aws_autoscaling_group" "app_asg" { + name_prefix = "app-asg-" + max_size = 3 + min_size = 1 + desired_capacity = 1 + health_check_type = "EC2" + launch_template { + id = aws_launch_template.app_launch_template.id + version = "$Latest" + } + + vpc_zone_identifier = [aws_subnet.test_subnet[0].id] + + tag { + key = "Name" + value = "AppInstance" + propagate_at_launch = true + } +} + +# Add your ECS Task Definition and Service here, using the aws_ecs_task_definition and aws_ecs_service resources. +resource "aws_iam_role" "ecs_task_execution_role" { + name = "ecs_task_execution_role" + + assume_role_policy = jsonencode({ + Version = "2012-10-17", + Statement = [ + { + Action = "sts:AssumeRole", + Effect = "Allow", + Principal = { + Service = "ecs-tasks.amazonaws.com", + }, + Sid = "", + }, + ], + }) +} +resource "aws_iam_policy" "s3_access_policy" { + name = "s3AccessPolicy" + description = "Policy to access specific S3 bucket" + + policy = jsonencode({ + Version = "2012-10-17", + Statement = [ + { + Action = [ + "s3:GetObject", + ], + Effect = "Allow", + Resource = [ + "arn:aws:s3:::qwenvlchat/*", + ], + }, + ], + }) +} + +resource "aws_iam_policy" "efs_permissions" { + name = "EFSPermissions" + description = "Policy that allows EFS actions" + policy = jsonencode({ + Version = "2012-10-17", + Statement = [ + { + Effect = "Allow", + Action = [ + "elasticfilesystem:ClientMount", + "elasticfilesystem:ClientWrite", + "elasticfilesystem:Describe*", + // Include any additional actions your task requires + ], + Resource = "*" + }, + ], + }) +} + +resource "aws_iam_role_policy_attachment" "s3_access_policy_attachment" { + role = aws_iam_role.ecs_instance_role.name + policy_arn = aws_iam_policy.s3_access_policy.arn +} +resource "aws_iam_policy" "ecr_read_policy" { + name = "ecr_read_policy" + path = "/" + description = "IAM policy for reading from ECR" + + policy = jsonencode({ + Version = "2012-10-17", + Statement = [ + { + Action = [ + "ecr:GetDownloadUrlForLayer", + "ecr:BatchGetImage", + "ecr:BatchCheckLayerAvailability", + ], + Effect = "Allow", + Resource = "*", + }, + ], + }) +} +resource "aws_iam_policy" "ecr_policy" { + name = "ECRPolicy" + path = "/" + description = "Allow ECS tasks to pull images from ECR" + + policy = jsonencode({ + Version = "2012-10-17", + Statement = [ + { + Effect = "Allow", + Action = "ecr:GetAuthorizationToken", + Resource = "*" + }, + { + Effect = "Allow", + Action = [ + "ecr:BatchCheckLayerAvailability", + "ecr:GetDownloadUrlForLayer", + "ecr:BatchGetImage" + ], + Resource = "arn:aws:ecr:us-east-1:916723593639:repository/qwenlight" + } + ] + }) +} +resource "aws_iam_role_policy_attachment" "ecs_logging_attach" { + role = aws_iam_role.ecs_task_execution_role.name + policy_arn = aws_iam_policy.ecs_logging.arn +} +resource "aws_iam_policy_attachment" "efs_permissions_attach" { + name = "EFSPermissionsAttachment" + roles = [aws_iam_role.ecs_task_execution_role.name] + policy_arn = aws_iam_policy.efs_permissions.arn +} +resource "aws_iam_policy_attachment" "ecr_policy_attach" { + name = "ECRPolicyAttachment" + roles = [aws_iam_role.ecs_task_execution_role.name] + policy_arn = aws_iam_policy.ecr_policy.arn +} +resource "aws_iam_role_policy_attachment" "ecs_task_execution_role_policy_attach" { + role = aws_iam_role.ecs_task_execution_role.name + policy_arn = aws_iam_policy.ecr_read_policy.arn +} +# Note: This script sets up the VPC, subnets, and security group. Ensure your ECS Task Definition and Service configurations align with this setup. +resource "aws_ecs_task_definition" "app_task" { + family = "helloworld" + network_mode = "awsvpc" + requires_compatibilities = ["EC2"] + execution_role_arn = aws_iam_role.ecs_task_execution_role.arn + task_role_arn = aws_iam_role.ecs_task_execution_role.arn + cpu = "4096" # Minimum vCPU for EC2 + memory = "32768" # Minimum memory for EC2 + + volume { + name = "efs-volume" + + efs_volume_configuration { + file_system_id = aws_efs_file_system.example.id + root_directory = "/" + transit_encryption = "ENABLED" + } + } + container_definitions = jsonencode([ + { + "name": "s3-sync-container", + "image": "amazon/aws-cli", + "entryPoint": ["sh", "-c"], + "command": ["aws s3 sync s3://qwenvlchat /qwenvl"], + "mountPoints": [ + { + "sourceVolume": "efs-volume", + "containerPath": "/qwenvl", + "readOnly": false + } + ], + "essential": true, + "log_configuration": { + "log_driver": "awslogs", + "options": { + "awslogs-group": "/ecs/app-task-logs", + "awslogs-region": "us-east-1", + "awslogs-stream-prefix": "s3-sync-container" + } + } + }, + { + name = "qwenfastapi-container" + image = "916723593639.dkr.ecr.us-east-1.amazonaws.com/qwenlight:latest" + cpu = 2048 + memory = 16384 + essential = true + mountPoints = [ + { + "sourceVolume": "efs-volume", + "containerPath": "/qwenvl", + "readOnly": false + } + ], + portMappings = [ + { + containerPort = 80 + hostPort = 80 + protocol = "tcp" + }, + ] + resourceRequirements = [ + { + type = "GPU" + value = "1" + }, + ] + }, + ]) +} diff --git a/scripts/testec2_main.tf b/scripts/testec2_main.tf index b93d183..8bb697f 100644 --- a/scripts/testec2_main.tf +++ b/scripts/testec2_main.tf @@ -28,6 +28,10 @@ resource "aws_route_table" "test_route_table" { } } +resource "aws_iam_instance_profile" "app_instance_profile" { + name = "app_instance_profile" + role = aws_iam_role.ecs_instance_role.name +} resource "aws_route_table_association" "test_rta" { count = length(aws_subnet.test_subnet.*.id) subnet_id = element(aws_subnet.test_subnet.*.id, count.index) @@ -92,16 +96,7 @@ resource "aws_iam_role" "ecs_instance_role" { }) } -# Attach the necessary policies to the ECS Instance Role -resource "aws_iam_role_policy_attachment" "ecs_instance_role_policy_attach" { - role = aws_iam_role.ecs_instance_role.name - policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role" -} -resource "aws_iam_instance_profile" "app_instance_profile" { - name = "app_instance_profile" - role = aws_iam_role.ecs_instance_role.name -} resource "aws_launch_template" "app_launch_template" { @@ -230,6 +225,12 @@ resource "aws_iam_policy" "ecr_policy" { ] }) } + +# Attach the necessary policies to the ECS Instance Role +resource "aws_iam_role_policy_attachment" "ecs_instance_role_policy_attach" { + role = aws_iam_role.ecs_instance_role.name + policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role" +} resource "aws_iam_policy_attachment" "ecr_policy_attach" { name = "ECRPolicyAttachment" roles = [aws_iam_role.ecs_task_execution_role.name] diff --git a/swarm_cloud.tf b/swarm_cloud.tf index cdae342..10907c8 100644 --- a/swarm_cloud.tf +++ b/swarm_cloud.tf @@ -101,7 +101,7 @@ resource "aws_launch_configuration" "model_api_conf" { git clone https://github.com/kyegomez/swarms-cloud.git cd swarms-cloud sudo docker build -t cogvlm . - sudo docker run -d -p 80:80 cogvlm + sudo docker run -d -p 8000:8000 cogvlm EOF lifecycle {