Simple object detection
This commit is contained in:
parent
3a2ed7d4eb
commit
c56d7c86fc
|
@ -1,2 +1,3 @@
|
|||
.env
|
||||
config/
|
||||
using_yolov8.ipynb
|
|
@ -0,0 +1 @@
|
|||
3.10.5
|
BIN
environment.yml
BIN
environment.yml
Binary file not shown.
209
main.py
209
main.py
|
@ -1,209 +0,0 @@
|
|||
import datetime
|
||||
import face_recognition
|
||||
import cv2
|
||||
import numpy as np
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
import json
|
||||
import pathlib
|
||||
import requests
|
||||
import time
|
||||
|
||||
|
||||
load_dotenv()
|
||||
URL = os.getenv("URL")
|
||||
RUN_SCALE = os.getenv("RUN_SCALE")
|
||||
VIEW_SCALE = os.getenv("VIEW_SCALE")
|
||||
DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S"
|
||||
# RUN_SCALE = 0.25
|
||||
# VIEW_SCALE = 0.75
|
||||
DISPLAY = False
|
||||
RUN_BY_COMPOSE = os.getenv("RUN_BY_COMPOSE")
|
||||
NTFY_URL = os.getenv("NTFY_URL")
|
||||
|
||||
|
||||
def find_face_from_name(name):
|
||||
for face in config["faces"]:
|
||||
if config["faces"][face]["name"] == name:
|
||||
return face
|
||||
return None
|
||||
|
||||
|
||||
def write_config():
|
||||
with open(config_path, "w") as config_file:
|
||||
json.dump(config, config_file, indent=4)
|
||||
|
||||
|
||||
print("Hello, world!")
|
||||
|
||||
# Initialize some variables
|
||||
face_locations = []
|
||||
face_encodings = []
|
||||
face_names = []
|
||||
known_face_encodings = []
|
||||
known_face_names = []
|
||||
process_this_frame = True
|
||||
|
||||
# Load the config file, if it does not exist or is blank, create it
|
||||
config = {
|
||||
# If RUN_BY_COMPOSE is true, set url to rtsp://wyze-bridge:8554/wyze_cam_name, otherwise set it to "rtsp://localhost:8554/wyze_cam_name"
|
||||
"URL": "rtsp://localhost:8554/wyze_cam_name"
|
||||
if not RUN_BY_COMPOSE
|
||||
else "rtsp://bridge:8554/wyze_cam_name",
|
||||
"run_scale": "0.25",
|
||||
"view_scale": "0.75",
|
||||
"faces": {
|
||||
"example1": {"image": "config/example1.jpg", "last_seen": ""},
|
||||
"example2": {"image": "config/example2.jpg", "last_seen": ""},
|
||||
},
|
||||
"ntfy_url": "https://ntfy.sh/example",
|
||||
"display": True,
|
||||
}
|
||||
config_path = pathlib.Path("config/config.json")
|
||||
if config_path.exists():
|
||||
with open(config_path, "r") as config_file:
|
||||
config = json.load(config_file)
|
||||
else:
|
||||
with open(config_path, "w") as config_file:
|
||||
json.dump(config, config_file, indent=4)
|
||||
print("Config file created, please edit it and restart the program")
|
||||
print("For relative paths, use the format config/example.jpg")
|
||||
exit()
|
||||
|
||||
|
||||
if URL:
|
||||
config["URL"] = URL
|
||||
else:
|
||||
URL = config["URL"]
|
||||
if RUN_SCALE:
|
||||
config["RUN_SCALE"] = RUN_SCALE
|
||||
else:
|
||||
RUN_SCALE = float(config["RUN_SCALE"])
|
||||
if VIEW_SCALE:
|
||||
config["VIEW_SCALE"] = VIEW_SCALE
|
||||
else:
|
||||
VIEW_SCALE = float(config["VIEW_SCALE"])
|
||||
if DISPLAY:
|
||||
config["DISPLAY"] = DISPLAY
|
||||
else:
|
||||
DISPLAY = config["display"]
|
||||
if NTFY_URL:
|
||||
config["ntfy_url"] = NTFY_URL
|
||||
else:
|
||||
NTFY_URL = config["ntfy_url"]
|
||||
print(f"Current config: {config}")
|
||||
|
||||
for face in config["faces"]:
|
||||
# Load a sample picture and learn how to recognize it.
|
||||
image = face_recognition.load_image_file(config["faces"][face]["image"])
|
||||
face_encoding = face_recognition.face_encodings(image)[0]
|
||||
known_face_encodings.append(face_encoding)
|
||||
# Append the key to the list of known face names
|
||||
known_face_names.append(face)
|
||||
|
||||
video_capture = cv2.VideoCapture(URL)
|
||||
# Eliminate lag by setting the buffer size to 1
|
||||
# This makes it so that the video capture will only grab the most recent frame
|
||||
# However, this means that the video may be choppy
|
||||
video_capture.set(cv2.CAP_PROP_BUFFERSIZE, 1)
|
||||
|
||||
# Print the resolution of the video
|
||||
print(
|
||||
f"Video resolution: {video_capture.get(cv2.CAP_PROP_FRAME_WIDTH)}x{video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT)}"
|
||||
)
|
||||
|
||||
print("Beginning video capture...")
|
||||
while True:
|
||||
# Grab a single frame of video
|
||||
ret, frame = video_capture.read()
|
||||
# Only process every other frame of video to save time
|
||||
# Resize frame of video to a smaller size for faster face recognition processing
|
||||
run_frame = cv2.resize(frame, (0, 0), fx=RUN_SCALE, fy=RUN_SCALE)
|
||||
view_frame = cv2.resize(frame, (0, 0), fx=VIEW_SCALE, fy=VIEW_SCALE)
|
||||
# Convert the image from BGR color (which OpenCV uses) to RGB color (which face_recognition uses)
|
||||
rgb_run_frame = run_frame[:, :, ::-1]
|
||||
# Find all the faces and face encodings in the current frame of video
|
||||
# model cnn is gpu accelerated, but hog is cpu only
|
||||
face_locations = face_recognition.face_locations(
|
||||
rgb_run_frame, model="hog"
|
||||
) # This crashes the program without output on my laptop when it's running without Docker compose
|
||||
face_encodings = face_recognition.face_encodings(rgb_run_frame, face_locations)
|
||||
face_names = []
|
||||
for face_encoding in face_encodings:
|
||||
# See if the face is a match for the known face(s)
|
||||
matches = face_recognition.compare_faces(known_face_encodings, face_encoding)
|
||||
name = "Unknown"
|
||||
# Or instead, use the known face with the smallest distance to the new face
|
||||
face_distances = face_recognition.face_distance(
|
||||
known_face_encodings, face_encoding
|
||||
)
|
||||
best_match_index = np.argmin(face_distances)
|
||||
if matches[best_match_index]:
|
||||
name = known_face_names[best_match_index]
|
||||
last_seen = config["faces"][name]["last_seen"]
|
||||
# If it's never been seen, set the last seen time to x+5 seconds ago so it will be seen
|
||||
# Kind of a hacky way to do it, but it works... hopefully
|
||||
if last_seen == "":
|
||||
print(f"{name} has been seen for the first time")
|
||||
config["faces"][name]["last_seen"] = (
|
||||
datetime.datetime.now() - datetime.timedelta(seconds=15)
|
||||
).strftime(DATETIME_FORMAT)
|
||||
write_config()
|
||||
# Check if the face has been seen in the last 5 seconds
|
||||
if datetime.datetime.now() - datetime.datetime.strptime(
|
||||
last_seen, DATETIME_FORMAT
|
||||
) > datetime.timedelta(seconds=10):
|
||||
print(f"{name} has been seen")
|
||||
# Send a notification
|
||||
print(f"Sending notification to{NTFY_URL}")
|
||||
requests.post(
|
||||
NTFY_URL,
|
||||
data=f'"{name}" has been seen',
|
||||
headers={
|
||||
"Title": "Face Detected",
|
||||
"Priority": "default",
|
||||
"Tags": "neutral_face",
|
||||
},
|
||||
)
|
||||
# Update the last seen time
|
||||
config["faces"][name]["last_seen"] = datetime.datetime.now().strftime(
|
||||
DATETIME_FORMAT
|
||||
)
|
||||
# print("Writing config...")
|
||||
write_config()
|
||||
face_names.append(name)
|
||||
# Display the results
|
||||
# Iterate over each face found in the frame to draw a box around it
|
||||
# Zip is used to iterate over two lists at the same time
|
||||
for (top, right, bottom, left), name in zip(face_locations, face_names):
|
||||
# print(f"Face found at {top}, {right}, {bottom}, {left} with name {name}")
|
||||
# Scale back up face locations since the frame we detected in was scaled to 1/4 size
|
||||
top = int(top * (VIEW_SCALE / RUN_SCALE))
|
||||
right = int(right * (VIEW_SCALE / RUN_SCALE))
|
||||
bottom = int(bottom * (VIEW_SCALE / RUN_SCALE))
|
||||
left = int(left * (VIEW_SCALE / RUN_SCALE))
|
||||
|
||||
# Draw a box around the face
|
||||
cv2.rectangle(view_frame, (left, top), (right, bottom), (0, 0, 255), 2)
|
||||
|
||||
# Draw a label with a name below the face
|
||||
cv2.rectangle(
|
||||
view_frame, (left, bottom - 35), (right, bottom), (0, 0, 255), cv2.FILLED
|
||||
)
|
||||
font = cv2.FONT_HERSHEY_DUPLEX
|
||||
cv2.putText(
|
||||
view_frame, name, (left + 6, bottom - 6), font, 1.0, (255, 255, 255), 1
|
||||
)
|
||||
|
||||
# Display the resulting image if DISPLAY is set to true
|
||||
if config["display"]:
|
||||
cv2.imshow("Scaled View", view_frame)
|
||||
|
||||
# Hit 'q' on the keyboard to quit!
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
break
|
||||
|
||||
# Release handle to the webcam
|
||||
print("Releasing video capture")
|
||||
video_capture.release()
|
||||
cv2.destroyAllWindows()
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,58 @@
|
|||
[tool.poetry]
|
||||
name = "detect-it"
|
||||
version = "0.1.0"
|
||||
description = "Detect all the things"
|
||||
authors = ["slashtechno <77907286+slashtechno@users.noreply.github.com>"]
|
||||
license = "MIT"
|
||||
readme = "README.md"
|
||||
packages = [{include = "detect_it"}]
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.10"
|
||||
python-dotenv = "^1.0.0"
|
||||
httpx = "^0.25.0"
|
||||
opencv-python = "^4.8.1.78"
|
||||
ultralytics = "^8.0.190"
|
||||
hjson = "^3.1.0"
|
||||
numpy = "^1.23.2"
|
||||
torch = [
|
||||
{ version = "^2.0.0+cu118", source = "torch_cu118", markers = "extra=='cuda'" },
|
||||
{ version = "^2.0.0+cpu", source = "torch_cpu", markers = "extra!='cuda'" },
|
||||
]
|
||||
torchaudio = [
|
||||
{ version = "^2.0.0+cu118", source = "torch_cu118", markers = "extra=='cuda'" },
|
||||
{ version = "^2.0.0+cpu", source = "torch_cpu", markers = "extra!='cuda'" },
|
||||
]
|
||||
torchvision = [
|
||||
{ version = "^0.15+cu118", source = "torch_cu118", markers = "extra=='cuda'" },
|
||||
{ version = "^0.15+cpu", source = "torch_cpu", markers = "extra!='cuda'" },
|
||||
]
|
||||
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
black = "^23.9.1"
|
||||
ruff = "^0.0.291"
|
||||
ipykernel = "^6.25.2"
|
||||
|
||||
|
||||
[[tool.poetry.source]]
|
||||
name = "torch_cpu"
|
||||
url = "https://download.pytorch.org/whl/cpu"
|
||||
priority = "supplemental"
|
||||
|
||||
[[tool.poetry.source]]
|
||||
name = "torch_cu118"
|
||||
url = "https://download.pytorch.org/whl/cu118"
|
||||
priority = "supplemental"
|
||||
|
||||
[tool.poetry.extras]
|
||||
cuda = []
|
||||
|
||||
[[tool.poetry.source]]
|
||||
name = "PyPI"
|
||||
priority = "primary"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
# certifi @ file:///croot/certifi_1665076670883/work/certifi
|
||||
click==8.1.3
|
||||
dlib==19.24.0
|
||||
face-recognition==1.3.0
|
||||
face-recognition-models==0.3.0
|
||||
numpy==1.23.5
|
||||
opencv-python==4.6.0.66
|
||||
Pillow==9.3.0
|
||||
python-dotenv==0.21.0
|
||||
urllib3==1.26.13
|
||||
requests==2.31.0
|
|
@ -0,0 +1,129 @@
|
|||
# import face_recognition
|
||||
import cv2
|
||||
import numpy as np
|
||||
import dotenv
|
||||
from pathlib import Path
|
||||
import os
|
||||
import time
|
||||
# import hjson as json
|
||||
import torch
|
||||
from ultralytics import YOLO
|
||||
|
||||
import argparse
|
||||
|
||||
from .utils import notify, config_utils
|
||||
|
||||
DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S"
|
||||
args = None
|
||||
|
||||
def main():
|
||||
global args
|
||||
# RUN_BY_COMPOSE = os.getenv("RUN_BY_COMPOSE") # Replace this with code to check for gpu
|
||||
|
||||
if Path(".env").is_file():
|
||||
dotenv.load_dotenv()
|
||||
print("Loaded .env file")
|
||||
else:
|
||||
print("No .env file found")
|
||||
|
||||
argparser = argparse.ArgumentParser(
|
||||
prog="Detect It",
|
||||
description="Detect it all!",
|
||||
epilog=":)",
|
||||
)
|
||||
|
||||
# required='RUN_SCALE' not in os.environ,
|
||||
|
||||
argparser.add_argument(
|
||||
'--run-scale',
|
||||
# Set it to the env RUN_SCALE if it isn't blank, otherwise set it to 0.25
|
||||
default=os.environ['RUN_SCALE'] if 'RUN_SCALE' in os.environ and os.environ['RUN_SCALE'] != '' else 0.25, # noqa: E501
|
||||
type=float,
|
||||
help="The scale to run the detection at, default is 0.25",
|
||||
)
|
||||
# argparser.add_argument(
|
||||
# '--view-scale',
|
||||
# # Set it to the env VIEW_SCALE if it isn't blank, otherwise set it to 0.75
|
||||
# default=os.environ['VIEW_SCALE'] if 'VIEW_SCALE' in os.environ and os.environ['VIEW_SCALE'] != '' else 0.75, # noqa: E501
|
||||
# type=float,
|
||||
# help="The scale to view the detection at, default is 0.75",
|
||||
# )
|
||||
|
||||
stream_source = argparser.add_mutually_exclusive_group()
|
||||
# stream_source.add_argument(
|
||||
# '--url',
|
||||
# default=os.environ['URL'] if 'URL' in os.environ and os.environ['URL'] != '' else None, # noqa: E501
|
||||
# type=str,
|
||||
# help="The URL of the stream to use",
|
||||
# )
|
||||
stream_source.add_argument(
|
||||
'--capture-device',
|
||||
default=os.environ['CAPTURE_DEVICE'] if 'CAPTURE_DEVICE' in os.environ and os.environ['CAPTURE_DEVICE'] != '' else 0, # noqa: E501
|
||||
type=int,
|
||||
help="The capture device to use. Can also be a url."
|
||||
)
|
||||
|
||||
notifcation_services = argparser.add_argument_group("Notification Services")
|
||||
notifcation_services.add_argument(
|
||||
'--ntfy-url',
|
||||
default=os.environ['NTFY_URL'] if 'NTFY_URL' in os.environ and os.environ['NTFY_URL'] != '' else None, # noqa: E501
|
||||
type=str,
|
||||
help="The URL to send notifications to",
|
||||
)
|
||||
|
||||
args = argparser.parse_args()
|
||||
|
||||
# Check if a CUDA GPU is available. If it is, set it via torch. Ff not, set it to cpu
|
||||
# https://github.com/ultralytics/ultralytics/issues/3084#issuecomment-1732433168
|
||||
device = "0" if torch.cuda.is_available() else "cpu"
|
||||
if device == "0":
|
||||
torch.cuda.set_device(0)
|
||||
print("Set CUDA device")
|
||||
else:
|
||||
print("No CUDA device available, using CPU")
|
||||
|
||||
model = YOLO("yolov8n.pt")
|
||||
|
||||
video_capture = cv2.VideoCapture(args.capture_device)
|
||||
# Eliminate lag by setting the buffer size to 1
|
||||
# This makes it so that the video capture will only grab the most recent frame
|
||||
# However, this means that the video may be choppy
|
||||
video_capture.set(cv2.CAP_PROP_BUFFERSIZE, 1)
|
||||
|
||||
|
||||
# Print the resolution of the video
|
||||
print(
|
||||
f"Video resolution: {video_capture.get(cv2.CAP_PROP_FRAME_WIDTH)}x{video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT)}" # noqa: E501
|
||||
)
|
||||
|
||||
print("Beginning video capture...")
|
||||
while True:
|
||||
# Grab a single frame of video
|
||||
ret, frame = video_capture.read()
|
||||
# Only process every other frame of video to save time
|
||||
# Resize frame of video to a smaller size for faster recognition processing
|
||||
run_frame = cv2.resize(frame, (0, 0), fx=args.run_scale, fy=args.run_scale)
|
||||
# view_frame = cv2.resize(frame, (0, 0), fx=args.view_scale, fy=args.view_scale)
|
||||
|
||||
results = model(run_frame)
|
||||
for r in results:
|
||||
|
||||
im_array = r.plot()
|
||||
# Scale back up the coordinates of the locations of detected objects.
|
||||
# im_array = np.multiply(im_array, 1/args.run_scale)
|
||||
# print(type(im_array))
|
||||
# print(im_array)
|
||||
# exit()
|
||||
cv2.imshow("View", im_array)
|
||||
|
||||
|
||||
# Hit 'q' on the keyboard to quit!
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
break
|
||||
|
||||
# Release handle to the webcam
|
||||
print("Releasing video capture")
|
||||
video_capture.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
main()
|
|
@ -0,0 +1,4 @@
|
|||
|
||||
# def write_config():
|
||||
# with open(config_path, "w") as config_file:
|
||||
# json.dump(config, config_file, indent=4)
|
|
@ -0,0 +1,38 @@
|
|||
import datetime
|
||||
import httpx
|
||||
|
||||
|
||||
def construct_ntfy_headers(
|
||||
title: str = "Object/Person Detected",
|
||||
tag = "rotating_light", # https://docs.ntfy.sh/publish/#tags-emojis
|
||||
priority = "default", # https://docs.ntfy.sh/publish/#message-priority
|
||||
) -> (dict):
|
||||
return {
|
||||
'Title': title,
|
||||
'Priority': priority,
|
||||
'Tags': tag
|
||||
}
|
||||
|
||||
def send_notification(
|
||||
data: str,
|
||||
headers: dict,
|
||||
url: str
|
||||
):
|
||||
if url is None or data is None:
|
||||
raise ValueError("url and data cannot be None")
|
||||
httpx.post(url, data=data.encode('utf-8'), headers=headers)
|
||||
|
||||
def check_last_seen(last_seen: datetime.datetime, seconds: int = 15):
|
||||
'''
|
||||
Check if a time is older than a given number of seconds
|
||||
If it is, return True
|
||||
If last_seen is empty/null, return True
|
||||
'''
|
||||
if (
|
||||
datetime.datetime.now() - last_seen > datetime.timedelta(seconds=seconds)
|
||||
or last_seen == ""
|
||||
or last_seen is None
|
||||
):
|
||||
return True
|
||||
else:
|
||||
return False
|
Loading…
Reference in New Issue