recodehive · sanjay-kv · Jul 16, 2024 · Jul 16, 2024
diff --git a/Human Detection using CCTV Footages/Dataset/README.md b/Human Detection using CCTV Footages/Dataset/README.md
@@ -0,0 +1 @@
+I have fetched the Human Detection Dataset from, https://www.kaggle.com/datasets/jonathannield/cctv-human-pose-estimation-dataset, for building and developing this Project. You guys can check out the dataset from the given link.
diff --git a/Human Detection using CCTV Footages/Images/1.jpg b/Human Detection using CCTV Footages/Images/1.jpg
diff --git a/Human Detection using CCTV Footages/Images/2.jpg b/Human Detection using CCTV Footages/Images/2.jpg
diff --git a/Human Detection using CCTV Footages/Images/3.jpg b/Human Detection using CCTV Footages/Images/3.jpg
diff --git a/Human Detection using CCTV Footages/Images/4.png b/Human Detection using CCTV Footages/Images/4.png
diff --git a/Human Detection using CCTV Footages/Images/5.png b/Human Detection using CCTV Footages/Images/5.png
diff --git a/Human Detection using CCTV Footages/Images/Screenshot 2024-05-11 173858.png b/Human Detection using CCTV Footages/Images/Screenshot 2024-05-11 173858.png
diff --git a/Human Detection using CCTV Footages/Images/Screenshot 2024-05-11 173921.png b/Human Detection using CCTV Footages/Images/Screenshot 2024-05-11 173921.png
diff --git a/Human Detection using CCTV Footages/Model/detect.py b/Human Detection using CCTV Footages/Model/detect.py
@@ -0,0 +1,195 @@
+import argparse
+import time
+from pathlib import Path
+
+import cv2
+import torch
+import torch.backends.cudnn as cudnn
+from numpy import random
+
+from models.experimental import attempt_load
+from utils.datasets import LoadStreams, LoadImages
+from utils.general import check_img_size, check_requirements, check_imshow, non_max_suppression, apply_classifier, \
+    scale_coords, xyxy2xywh, strip_optimizer, set_logging, increment_path
+from utils.plots import plot_one_box
+from utils.torch_utils import select_device, load_classifier, time_synchronized, TracedModel
+
+
+def detect(save_img=False):
+    source, weights, view_img, save_txt, imgsz, trace = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size, not opt.no_trace
+    save_img = not opt.nosave and not source.endswith('.txt')  # save inference images
+    webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
+        ('rtsp://', 'rtmp://', 'http://', 'https://'))
+
+    # Directories
+    save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))  # increment run
+    (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
+
+    # Initialize
+    set_logging()
+    device = select_device(opt.device)
+    half = device.type != 'cpu'  # half precision only supported on CUDA
+
+    # Load model
+    model = attempt_load(weights, map_location=device)  # load FP32 model
+    stride = int(model.stride.max())  # model stride
+    imgsz = check_img_size(imgsz, s=stride)  # check img_size
+
+    if trace:
+        model = TracedModel(model, device, opt.img_size)
+
+    if half:
+        model.half()  # to FP16
+
+    # Second-stage classifier
+    classify = False
+    if classify:
+        modelc = load_classifier(name='resnet101', n=2)  # initialize
+        modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval()
+
+    # Set Dataloader
+    vid_path, vid_writer = None, None
+    if webcam:
+        view_img = check_imshow()
+        cudnn.benchmark = True  # set True to speed up constant image size inference
+        dataset = LoadStreams(source, img_size=imgsz, stride=stride)
+    else:
+        dataset = LoadImages(source, img_size=imgsz, stride=stride)
+
+    # Get names and colors
+    names = model.module.names if hasattr(model, 'module') else model.names
+    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
+
+    # Run inference
+    if device.type != 'cpu':
+        model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters())))  # run once
+    old_img_w = old_img_h = imgsz
+    old_img_b = 1
+
+    t0 = time.time()
+    for path, img, im0s, vid_cap in dataset:
+        img = torch.from_numpy(img).to(device)
+        img = img.half() if half else img.float()  # uint8 to fp16/32
+        img /= 255.0  # 0 - 255 to 0.0 - 1.0
+        if img.ndimension() == 3:
+            img = img.unsqueeze(0)
+
+        # Warmup
+        if device.type != 'cpu' and (old_img_b != img.shape[0] or old_img_h != img.shape[2] or old_img_w != img.shape[3]):
+            old_img_b = img.shape[0]
+            old_img_h = img.shape[2]
+            old_img_w = img.shape[3]
+            for i in range(3):
+                model(img, augment=opt.augment)[0]
+
+        # Inference
+        t1 = time_synchronized()
+        pred = model(img, augment=opt.augment)[0]
+        t2 = time_synchronized()
+
+        # Apply NMS
+        pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
+        t3 = time_synchronized()
+
+        # Apply Classifier
+        if classify:
+            pred = apply_classifier(pred, modelc, img, im0s)
+
+        # Process detections
+        for i, det in enumerate(pred):  # detections per image
+            if webcam:  # batch_size >= 1
+                p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count
+            else:
+                p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)
+
+            p = Path(p)  # to Path
+            save_path = str(save_dir / p.name)  # img.jpg
+            txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}')  # img.txt
+            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
+            if len(det):
+                # Rescale boxes from img_size to im0 size
+                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
+
+                # Print results
+                for c in det[:, -1].unique():
+                    n = (det[:, -1] == c).sum()  # detections per class
+                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string
+
+                # Write results
+                for *xyxy, conf, cls in reversed(det):
+                    if save_txt:  # Write to file
+                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
+                        line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh)  # label format
+                        with open(txt_path + '.txt', 'a') as f:
+                            f.write(('%g ' * len(line)).rstrip() % line + '\n')
+
+                    if save_img or view_img:  # Add bbox to image
+                        label = f'{names[int(cls)]} {conf:.2f}'
+                        plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=1)
+
+            # Print time (inference + NMS)
+            print(f'{s}Done. ({(1E3 * (t2 - t1)):.1f}ms) Inference, ({(1E3 * (t3 - t2)):.1f}ms) NMS')
+
+            # Stream results
+            if view_img:
+                cv2.imshow(str(p), im0)
+                cv2.waitKey(1)  # 1 millisecond
+
+            # Save results (image with detections)
+            if save_img:
+                if dataset.mode == 'image':
+                    cv2.imwrite(save_path, im0)
+                    print(f" The image with the result is saved in: {save_path}")
+                else:  # 'video' or 'stream'
+                    if vid_path != save_path:  # new video
+                        vid_path = save_path
+                        if isinstance(vid_writer, cv2.VideoWriter):
+                            vid_writer.release()  # release previous video writer
+                        if vid_cap:  # video
+                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
+                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+                        else:  # stream
+                            fps, w, h = 30, im0.shape[1], im0.shape[0]
+                            save_path += '.mp4'
+                        vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
+                    vid_writer.write(im0)
+
+    if save_txt or save_img:
+        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
+        #print(f"Results saved to {save_dir}{s}")
+
+    print(f'Done. ({time.time() - t0:.3f}s)')
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--weights', nargs='+', type=str, default='yolov7.pt', help='model.pt path(s)')
+    parser.add_argument('--source', type=str, default='inference/images', help='source')  # file/folder, 0 for webcam
+    parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
+    parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold')
+    parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
+    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+    parser.add_argument('--view-img', action='store_true', help='display results')
+    parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
+    parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
+    parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
+    parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
+    parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
+    parser.add_argument('--augment', action='store_true', help='augmented inference')
+    parser.add_argument('--update', action='store_true', help='update all models')
+    parser.add_argument('--project', default='runs/detect', help='save results to project/name')
+    parser.add_argument('--name', default='exp', help='save results to project/name')
+    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
+    parser.add_argument('--no-trace', action='store_true', help='don`t trace model')
+    opt = parser.parse_args()
+    print(opt)
+    #check_requirements(exclude=('pycocotools', 'thop'))
+
+    with torch.no_grad():
+        if opt.update:  # update all models (to fix SourceChangeWarning)
+            for opt.weights in ['yolov7.pt']:
+                detect()
+                strip_optimizer(opt.weights)
+        else:
+            detect()
diff --git a/Human Detection using CCTV Footages/Model/human_detection.pb b/Human Detection using CCTV Footages/Model/human_detection.pb
diff --git a/Human Detection using CCTV Footages/Model/yolo_model.ipynb b/Human Detection using CCTV Footages/Model/yolo_model.ipynb
diff --git a/Human Detection using CCTV Footages/README.md b/Human Detection using CCTV Footages/README.md
@@ -0,0 +1,77 @@
+# HUMAN DETECTION USING CCTV FOOTAGES
+
+ ![image.png](https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSrLn9Zzwpzvpp2FLu0n8bdwOIR26fARzsV4A&usqp=CAU)
+
+# GOAL
+Detecting humans from CCTV Images
+
+# DESCRIPTION
+This is a tutorial demonstrating how to train a YOLOv4 people detector using [Darknet](https://github.com/AlexeyAB/darknet) and the Kaggle Human dataset,, which contains annotated images of people in various scenarios. YOLOv4 is a state-of-the-art object detection model known for its speed and accuracy, making it suitable for real-time applications such as surveillance and autonomous vehicles. Follow along to understand the process of training your own custom object detector to detect people in images and video streams.
+
+
+# DATASET
+The dataset for this project is taken from the Kaggle website. Here is the link for the dataset,https://www.kaggle.com/datasets/jonathannield/cctv-human-pose-estimation-dataset.
+
+Here in the dataset you will find the various human images captured through CCTV cameras installed on various places.
+But the dataset is not suitable for training for Yolo model.So,first we annotate the data according to format provided by Yolo.
+We used LabelIMG to annotate image.
+
+You can refer to https://machinelearningknowledge.ai/train-custom-yolov4-model-for-object-detection-in-google-colab/ for custom data preparation.
+
+
+# WHAT I HAD DONE
+
+The script detects objects using YOLOv4 model with Darknet, configurable through command-line arguments.
+* [Setup](#setup)
+* [Preparing training data](#preparing)
+    - It supports both image and video inputs, including live streams from webcams and various protocols.
+* [Training on a local PC](#training-locally)
+    - Detection results can be displayed, saved as images or videos, and exported as text files. 
+    - The script utilizes PyTorch for inference and CUDA for GPU acceleration if available.
+    - Training the model requires GPU.If GPU is not there then use google colab for training the model.Refer the yolo_model.ipynb file for detailed procedure of training and testing. 
+* [Testing the custom-trained yolov4 model](#testing)
+
+# MODEL IMPLEMENTED
+### DARKNET:-
+Darknet, the neural network framework developed by Joseph Redmon, has been instrumental in the evolution of YOLO models and numerous other deep learning projects. Offering flexibility and efficiency, Darknet serves as the backbone for implementing cutting-edge algorithms like YOLOv4 and YOLOv7. Its modular architecture facilitates rapid prototyping and experimentation, enabling researchers and developers to push the boundaries of object detection and recognition. Moreover, Darknet's open-source nature fosters collaboration and innovation within the computer vision community, driving continual advancements in human detection and beyond.
+
+### YOLO4:-
+You Only Look Once (YOLO) version 4 (YOLOv4) has been a significant advancement in human detection from CCTV footage, offering real-time processing capabilities and improved accuracy compared to its predecessors. YOLOv4 utilizes a single neural network to predict bounding boxes and class probabilities directly from full images in one evaluation. While it excels in detecting human figures swiftly and accurately, it falls short in discerning finer details such as facial features or subtle gestures. This limitation poses a challenge in scenarios where identifying specific individuals or analyzing intricate behaviors is crucial, highlighting the need for further refinement in detection algorithms.
+
+### YOLO7:-
+In contrast, YOLO version 7 (YOLOv7) represents the latest evolution in human detection technology, boasting enhanced performance and robustness over YOLOv4. With advancements in model architecture and training techniques, YOLOv7 demonstrates superior accuracy in identifying human subjects from CCTV feeds while also addressing some of the limitations of its predecessors. Its improved ability to capture finer details ensures better recognition of human attributes and behaviors, making it a preferred choice for applications requiring precise analysis and tracking in complex environments.
+
+# LIBRARIES NEEDED
+opencv-python==4.1.1.26
+lxml
+tqdm
+tensorflow==2.3.0
+absl-py
+easydict
+matplotlib
+pillow
+
+# EDA RESULTS
+#### Approach Using Yolov4
+![image.png](https://miro.medium.com/max/785/1*f2diI7O28j2A875FwQPMJA.jpeg)
+
+This model fails to perform with distant and finer object.
+
+#### Approach Using Yolov7
+![image.png](https://github.com/WongKinYiu/yolov7/raw/main/figure/performance.png)
+
+This model performs much better than Yolov4 on distant objects
+
+# COMPARING PERFORMANCE
+Loss is less in YoLo7 compared to Yolo4. Hence, YoLo7 is more accurate
+#### Performance of YoLo4 vs Performance of YoLo7
+![alt text](<./Images/Screenshot 2024-05-11 173921.png>) 
+
+##### Yolo7
+
+![alt text](<./Images/Screenshot 2024-05-11 173858.png>)
+
+##### Yolo4
+
+# CONCLUSION
+We have implemented two different approach Yolov4 and Yolov7 and, Yolov7 gives the best accuracy.
diff --git a/Human Detection using CCTV Footages/requirements.txt b/Human Detection using CCTV Footages/requirements.txt
@@ -0,0 +1,8 @@
+opencv-python==4.1.1.26
+lxml
+tqdm
+tensorflow==2.3.0
+absl-py
+easydict
+matplotlib
+pillow
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		I have fetched the Human Detection Dataset from, https://www.kaggle.com/datasets/jonathannield/cctv-human-pose-estimation-dataset, for building and developing this Project. You guys can check out the dataset from the given link.