Skip to content
This repository has been archived by the owner on Sep 7, 2024. It is now read-only.
/ brainhack-24 Public archive
generated from TIL-24/til-24-base

Commit

Permalink
Change VLM to use bigger train set
Browse files Browse the repository at this point in the history
  • Loading branch information
neosouwchuan authored and qitianshi committed Jun 2, 2024
1 parent 6d44437 commit 41419af
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions vlm/src/VLMManager.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def __init__(self):
print([f for f in os.listdir('.') if os.path.isfile(f)])
self.clipmodel= torch.load(path.join(path.dirname(path.abspath(__file__)), "clip_ft_2.pt"))
self.objects = ["cargo aircraft","light aircraft","commercial aircraft","drone","missile","helicopter","fighter jet","fighter plane"]
self.model = YOLOWorld(path.join(path.dirname(path.abspath(__file__)), "yoloworldbest2.pt")).to(self.device)
self.model = YOLOWorld(path.join(path.dirname(path.abspath(__file__)), "800allbest.pt")).to(self.device)
for i in self.clipmodel.parameters():
i.requires_grad=False
for i in self.model.parameters():
Expand All @@ -51,18 +51,18 @@ def identify(self, imagebyte: bytes, caption: str):
tokenizedtext = clip.tokenize([caption]).to(self.device)
clipprob = []
maxscore = 0
for chosenindex in possible:
for chosenindex in range(len(bboxlist)):
bbox = bboxlist[chosenindex]
bbox[0]*=1520
bbox[1]*=870
bbox[2]*=1520
bbox[3]*=870
deltax = bbox[2]-bbox[0]
deltay = bbox[3]-bbox[1]
bbox[0]-=deltax/2
bbox[1]-=deltay/2
bbox[2]-=deltax/2
bbox[3]-=deltay/2
# bbox[0]-=deltax/2
# bbox[1]-=deltay/2
# bbox[2]-=deltax/2
# bbox[3]-=deltay/2
croppedimage = inputimage.crop(bbox)
croppedimage = self.clippreprocess(croppedimage).unsqueeze(0).to(self.device)
logits_per_image, logits_per_text = self.clipmodel(croppedimage, tokenizedtext)
Expand Down

0 comments on commit 41419af

Please sign in to comment.