diff --git a/wyzely_detect/__main__.py b/wyzely_detect/__main__.py index 937172e..1b33493 100644 --- a/wyzely_detect/__main__.py +++ b/wyzely_detect/__main__.py @@ -1,17 +1,14 @@ # import face_recognition -import cv2 -import dotenv from pathlib import Path -import os + +import cv2 # import hjson as json import torch from ultralytics import YOLO -import argparse - -from .utils import notify -from .utils import utils +from .utils import notify, utils +from .utils.cli_args import argparser DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S" args = None @@ -27,137 +24,6 @@ def main(): global args # RUN_BY_COMPOSE = os.getenv("RUN_BY_COMPOSE") # Replace this with code to check for gpu - if Path(".env").is_file(): - dotenv.load_dotenv() - print("Loaded .env file") - else: - print("No .env file found") - - # TODO: If possible, move the argparse stuff to a separate file - # It's taking up too many lines in this file - argparser = argparse.ArgumentParser( - prog="Wyzely Detect", - description="Recognize faces/objects in a video stream (from a webcam or a security camera) and send notifications to your devices", # noqa: E501 - epilog=":)", - ) - - # required='RUN_SCALE' not in os.environ, - - argparser.add_argument( - "--run-scale", - # Set it to the env RUN_SCALE if it isn't blank, otherwise set it to 0.25 - default=os.environ["RUN_SCALE"] - if "RUN_SCALE" in os.environ and os.environ["RUN_SCALE"] != "" - # else 0.25, - else 1, - type=float, - help="The scale to run the detection at, default is 0.25", - ) - argparser.add_argument( - "--view-scale", - # Set it to the env VIEW_SCALE if it isn't blank, otherwise set it to 0.75 - default=os.environ["VIEW_SCALE"] - if "VIEW_SCALE" in os.environ and os.environ["VIEW_SCALE"] != "" - # else 0.75, - else 1, - type=float, - help="The scale to view the detection at, default is 0.75", - ) - - argparser.add_argument( - "--no-display", - default=os.environ["NO_DISPLAY"] - if "NO_DISPLAY" in os.environ and os.environ["NO_DISPLAY"] != "" - else False, - action="store_true", - help="Don't display the video feed", - ) - - argparser.add_argument( - "--confidence-threshold", - default=os.environ["CONFIDENCE_THRESHOLD"] - if "CONFIDENCE_THRESHOLD" in os.environ - and os.environ["CONFIDENCE_THRESHOLD"] != "" - else 0.6, - type=float, - help="The confidence threshold to use", - ) - - argparser.add_argument( - "--faces-directory", - default=os.environ["FACES_DIRECTORY"] - if "FACES_DIRECTORY" in os.environ and os.environ["FACES_DIRECTORY"] != "" - else "faces", - type=str, - help="The directory to store the faces. Can either contain images or subdirectories with images, the latter being the preferred method", # noqa: E501 - ) - argparser.add_argument( - "--detect-object", - nargs="*", - default=[], - type=str, - help="The object(s) to detect. Must be something the model is trained to detect", - ) - - stream_source = argparser.add_mutually_exclusive_group() - stream_source.add_argument( - "--url", - default=os.environ["URL"] - if "URL" in os.environ and os.environ["URL"] != "" - else None, # noqa: E501 - type=str, - help="The URL of the stream to use", - ) - stream_source.add_argument( - "--capture-device", - default=os.environ["CAPTURE_DEVICE"] - if "CAPTURE_DEVICE" in os.environ and os.environ["CAPTURE_DEVICE"] != "" - else 0, # noqa: E501 - type=int, - help="The capture device to use. Can also be a url.", - ) - - # Defaults for the stuff here and down are already set in notify.py. - # Setting them here just means that argparse will display the default values as defualt - # TODO: Perhaps just remove the default parameter and just add to the help message that the default is set is x - # TODO: Make ntfy optional in ntfy.py. Currently, unless there is a local or LAN instance of ntfy, this can't run offline - notifcation_services = argparser.add_argument_group("Notification Services") - notifcation_services.add_argument( - "--ntfy-url", - default=os.environ["NTFY_URL"] - if "NTFY_URL" in os.environ and os.environ["NTFY_URL"] != "" - else "https://ntfy.sh/wyzely-detect", - type=str, - help="The URL to send notifications to", - ) - - timers = argparser.add_argument_group("Timers") - timers.add_argument( - "--detection-duration", - default=os.environ["DETECTION_DURATION"] - if "DETECTION_DURATION" in os.environ and os.environ["DETECTION_DURATION"] != "" - else 2, - type=int, - help="The duration (in seconds) that an object must be detected for before sending a notification", - ) - timers.add_argument( - "--detection-window", - default=os.environ["DETECTION_WINDOW"] - if "DETECTION_WINDOW" in os.environ and os.environ["DETECTION_WINDOW"] != "" - else 15, - type=int, - help="The time (seconds) before the detection duration resets", - ) - timers.add_argument( - "--notification-window", - default=os.environ["NOTIFICATION_WINDOW"] - if "NOTIFICATION_WINDOW" in os.environ - and os.environ["NOTIFICATION_WINDOW"] != "" - else 30, - type=int, - help="The time (seconds) before another notification can be sent", - ) - args = argparser.parse_args() # Check if a CUDA GPU is available. If it is, set it via torch. If not, set it to cpu @@ -175,8 +41,8 @@ def main(): # Depending on if the user wants to use a stream or a capture device, # Set the video capture to the appropriate source - if args.url: - video_capture = cv2.VideoCapture(args.url) + if args.rtsp_url is not None: + video_capture = cv2.VideoCapture(args.rtsp_url) else: video_capture = cv2.VideoCapture(args.capture_device) @@ -216,7 +82,10 @@ def main(): # May be better to check every iteration, but this also works if path_to_faces_exists: if face_details := utils.recognize_face( - path_to_directory=path_to_faces, run_frame=run_frame + path_to_directory=path_to_faces, + run_frame=run_frame, + min_confidence=args.face_confidence_threshold, + no_remove_representations=args.no_remove_representations, ): plot_boxes.append(face_details) objects_and_peoples = notify.thing_detected( @@ -265,7 +134,7 @@ def main(): # print("---") # Now do stuff (if conf > 0.5) - if conf < args.confidence_threshold or ( + if conf < args.object_confidence_threshold or ( class_id not in args.detect_object and args.detect_object != [] ): # If the confidence is too low diff --git a/wyzely_detect/utils/cli_args.py b/wyzely_detect/utils/cli_args.py new file mode 100644 index 0000000..dcf2766 --- /dev/null +++ b/wyzely_detect/utils/cli_args.py @@ -0,0 +1,167 @@ +import argparse +import os +import dotenv +from pathlib import Path + +argparser = None + + +def set_argparse(): + global argparser + + if Path(".env").is_file(): + dotenv.load_dotenv() + print("Loaded .env file") + else: + print("No .env file found") + + + # One important thing to consider is that most function parameters are optional and have a default value + # However, with argparse, those are never used since a argparse always passes something, even if it's None + argparser = argparse.ArgumentParser( + prog="Wyzely Detect", + description="Recognize faces/objects in a video stream (from a webcam or a security camera) and send notifications to your devices", # noqa: E501 + epilog=":)", + ) + + + video_options = argparser.add_argument_group("Video Options") + stream_source = video_options.add_mutually_exclusive_group() + stream_source.add_argument( + "--rtsp-url", + default=os.environ["RTSP_URL"] + if "RTSP_URL" in os.environ and os.environ["RTSP_URL"] != "" + else None, # noqa: E501 + type=str, + help="RTSP camera URL", + ) + stream_source.add_argument( + "--capture-device", + default=os.environ["CAPTURE_DEVICE"] + if "CAPTURE_DEVICE" in os.environ and os.environ["CAPTURE_DEVICE"] != "" + else 0, # noqa: E501 + type=int, + help="Capture device number", + ) + video_options.add_argument( + "--run-scale", + # Set it to the env RUN_SCALE if it isn't blank, otherwise set it to 0.25 + default=os.environ["RUN_SCALE"] + if "RUN_SCALE" in os.environ and os.environ["RUN_SCALE"] != "" + # else 0.25, + else 1, + type=float, + help="The scale to run the detection at, default is 0.25", + ) + video_options.add_argument( + "--view-scale", + # Set it to the env VIEW_SCALE if it isn't blank, otherwise set it to 0.75 + default=os.environ["VIEW_SCALE"] + if "VIEW_SCALE" in os.environ and os.environ["VIEW_SCALE"] != "" + # else 0.75, + else 1, + type=float, + help="The scale to view the detection at, default is 0.75", + ) + + video_options.add_argument( + "--no-display", + default=os.environ["NO_DISPLAY"] + if "NO_DISPLAY" in os.environ and os.environ["NO_DISPLAY"] != "" + else False, + action="store_true", + help="Don't display the video feed", + ) + + + notifcation_services = argparser.add_argument_group("Notification Services") + notifcation_services.add_argument( + "--ntfy-url", + default=os.environ["NTFY_URL"] + if "NTFY_URL" in os.environ and os.environ["NTFY_URL"] != "" + else None, + type=str, + help="The URL to send notifications to", + ) + + timers = argparser.add_argument_group("Timers") + timers.add_argument( + "--detection-duration", + default=os.environ["DETECTION_DURATION"] + if "DETECTION_DURATION" in os.environ and os.environ["DETECTION_DURATION"] != "" + else 2, + type=int, + help="The duration (in seconds) that an object must be detected for before sending a notification", + ) + timers.add_argument( + "--detection-window", + default=os.environ["DETECTION_WINDOW"] + if "DETECTION_WINDOW" in os.environ and os.environ["DETECTION_WINDOW"] != "" + else 15, + type=int, + help="The time (seconds) before the detection duration resets", + ) + timers.add_argument( + "--notification-window", + default=os.environ["NOTIFICATION_WINDOW"] + if "NOTIFICATION_WINDOW" in os.environ + and os.environ["NOTIFICATION_WINDOW"] != "" + else 30, + type=int, + help="The time (seconds) before another notification can be sent", + ) + + + face_recognition = argparser.add_argument_group("Face Recognition options") + face_recognition.add_argument( + "--faces-directory", + default=os.environ["FACES_DIRECTORY"] + if "FACES_DIRECTORY" in os.environ and os.environ["FACES_DIRECTORY"] != "" + else "faces", + type=str, + help="The directory to store the faces. Can either contain images or subdirectories with images, the latter being the preferred method", # noqa: E501 + ) + face_recognition.add_argument( + "--face-confidence-threshold", + default=os.environ["FACE_CONFIDENCE_THRESHOLD"] + if "FACE_CONFIDENCE_THRESHOLD" in os.environ + and os.environ["FACE_CONFIDENCE_THRESHOLD"] != "" + else 0.3, + type=float, + help="The confidence (currently cosine similarity) threshold to use for face recognition", + ) + face_recognition.add_argument( + "--no-remove-representations", + default=os.environ["NO_REMOVE_REPRESENTATIONS"] + if "NO_REMOVE_REPRESENTATIONS" in os.environ + and os.environ["NO_REMOVE_REPRESENTATIONS"] != "" + else False, + action="store_true", + help="Don't remove representations_.pkl at the start of the program. Greatly improves startup time, but doesn't take into account changes to the faces directory since it was created", # noqa: E501 + ) + + + + object_detection = argparser.add_argument_group("Object Detection options") + object_detection.add_argument( + "--detect-object", + nargs="*", + default=[], + type=str, + help="The object(s) to detect. Must be something the model is trained to detect", + ) + object_detection.add_argument( + "--object-confidence-threshold", + default=os.environ["OBJECT_CONFIDENCE_THRESHOLD"] + if "OBJECT_CONFIDENCE_THRESHOLD" in os.environ + and os.environ["OBJECT_CONFIDENCE_THRESHOLD"] != "" + else 0.6, + type=float, + help="The confidence threshold to use", + ) + + # return argparser + + +# This will run when this file is imported +set_argparse() diff --git a/wyzely_detect/utils/notify.py b/wyzely_detect/utils/notify.py index fce34de..d0c6b83 100644 --- a/wyzely_detect/utils/notify.py +++ b/wyzely_detect/utils/notify.py @@ -104,18 +104,23 @@ def thing_detected( ): respective_type[thing_name]["last_notification_time"] = time.time() print(f"Detected {thing_name} for {detection_duration} seconds") - headers = construct_ntfy_headers( - title=f"{thing_name} detected", - tag="rotating_light", - priority="default", - ) - send_notification( - data=f"{thing_name} detected for {detection_duration} seconds", - headers=headers, - url=ntfy_url, - ) - # Reset the detection duration - print("Just sent a notification - resetting detection duration") + if ntfy_url is None: + print( + "ntfy_url is None. Not sending notification. Set ntfy_url to send notifications" + ) + else: + headers = construct_ntfy_headers( + title=f"{thing_name} detected", + tag="rotating_light", + priority="default", + ) + send_notification( + data=f"{thing_name} detected for {detection_duration} seconds", + headers=headers, + url=ntfy_url, + ) + # Reset the detection duration + print("Just sent a notification - resetting detection duration") respective_type[thing_name]["detection_duration"] = 0 # Take the aliased objects_and_peoples and update the respective dictionary diff --git a/wyzely_detect/utils/utils.py b/wyzely_detect/utils/utils.py index f0114ca..d7ffddf 100644 --- a/wyzely_detect/utils/utils.py +++ b/wyzely_detect/utils/utils.py @@ -68,6 +68,8 @@ def recognize_face( path_to_directory: Path = Path("faces"), # opencv image run_frame: np.ndarray = None, + min_confidence: float = 0.3, + no_remove_representations: bool = False, ) -> np.ndarray: """ Accepts a path to a directory of images of faces to be used as a refference @@ -94,13 +96,16 @@ def recognize_face( global first_face_try # If it's the first time the function is being run, remove representations_arcface.pkl, if it exists - if first_face_try: + if first_face_try and not no_remove_representations: try: path_to_directory.joinpath("representations_arcface.pkl").unlink() print("Removing representations_arcface.pkl") except FileNotFoundError: print("representations_arcface.pkl does not exist") first_face_try = False + elif first_face_try and no_remove_representations: + print("Not attempting to remove representations_arcface.pkl") + first_face_try = False # face_dataframes is a vanilla list of dataframes # It seems face_dataframes is empty if the face database (directory) doesn't exist. Seems to work if it's empty though @@ -134,7 +139,7 @@ def recognize_face( # So we can just grab the path from there # iloc = Integer LOCation path_to_image = Path(df.iloc[-1]["identity"]) - # If the parent name is the same as the path to the database, then set label to the image name instead of the parent directory name + # If the parent name is the same as the path to the database, then set label to the image name instead of the parent name if path_to_image.parent == Path(path_to_directory): label = path_to_image.name else: @@ -149,15 +154,13 @@ def recognize_face( "y2": df.iloc[-1]["source_y"] + df.iloc[-1]["source_h"], } # After some brief testing, it seems positive matches are > 0.3 - distance = df.iloc[-1]["ArcFace_cosine"] - # TODO: Make this a CLI argument - if distance < 0.3: + cosine_similarity = df.iloc[-1]["ArcFace_cosine"] + if cosine_similarity < min_confidence: return None - # if 0.5 < distance < 0.7: # label = "Unknown" to_return = dict(label=label, **coordinates) print( - f"Confindence: {distance}, filname: {path_to_image.name}, to_return: {to_return}" + f"Cosine similarity: {cosine_similarity}, filname: {path_to_image.name}, to_return: {to_return}" ) return to_return