From d3c95ecb0c1996b7282c83147532e9714d7bcb59 Mon Sep 17 00:00:00 2001
From: Johann <xforeverjlx@gmail.com>
Date: Tue, 17 Oct 2023 00:53:58 +0000
Subject: [PATCH 1/6] code clean up, push up to end of SP23

---
 .gitignore                                    |   6 +-
 ml-model/model/image_to_latex.py              |  22 ++
 ml-model/model/latex_to_tree.py               |  93 +++++++
 ml-model/yolov5/detect_copy1.py               | 259 ++++++++++++++++++
 ml-model/yolov5/input_info/names.txt          |   3 -
 ml-model/yolov5/main.py                       |   9 +-
 .../yolov5/ranking/dataset/exp/labels/1.txt   |   1 +
 .../yolov5/ranking/dataset/exp/labels/2.txt   |   1 +
 .../yolov5/ranking/dataset/exp/labels/3.txt   |   1 +
 .../exp2 => dcopy/exp}/labels/ex1_0.txt       |   0
 .../yolov5/ranking/dcopy/exp/labels/ex1_1.txt |   1 +
 ml-model/yolov5/ranking/img_database.csv      |  20 +-
 ml-model/yolov5/ranking/img_database_copy.csv |  18 ++
 ml-model/yolov5/ranking/top5.csv              |   9 +-
 ml-model/yolov5/tests3.py                     |  30 ++
 15 files changed, 441 insertions(+), 32 deletions(-)
 create mode 100644 ml-model/model/image_to_latex.py
 create mode 100644 ml-model/model/latex_to_tree.py
 create mode 100644 ml-model/yolov5/detect_copy1.py
 delete mode 100644 ml-model/yolov5/input_info/names.txt
 create mode 100644 ml-model/yolov5/ranking/dataset/exp/labels/1.txt
 create mode 100644 ml-model/yolov5/ranking/dataset/exp/labels/2.txt
 create mode 100644 ml-model/yolov5/ranking/dataset/exp/labels/3.txt
 rename ml-model/yolov5/ranking/{dataset/exp2 => dcopy/exp}/labels/ex1_0.txt (100%)
 create mode 100644 ml-model/yolov5/ranking/dcopy/exp/labels/ex1_1.txt
 create mode 100644 ml-model/yolov5/ranking/img_database_copy.csv
 create mode 100644 ml-model/yolov5/tests3.py

diff --git a/.gitignore b/.gitignore
index 57fc93b2e..120476eb3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,7 +2,8 @@
 ml-model/formula_images/
 ml-model/output/
 ml-model/crop_formula_images/
-
+ml-model/yolov5/config.py
+.vscode/
 
 output.zip
 *.zip
@@ -15,6 +16,7 @@ data/
 simese_data/
 venv/
 ml-model/model.pt
+ml-model/yolov5/preprocess_data/
 training_data/
 *.png
 im2latex/
@@ -25,4 +27,4 @@ venv/
 ml-model/paths_output.csv
 
 ml-model/web/__pycache__/
-
+datasets/
\ No newline at end of file
diff --git a/ml-model/model/image_to_latex.py b/ml-model/model/image_to_latex.py
new file mode 100644
index 000000000..0739031eb
--- /dev/null
+++ b/ml-model/model/image_to_latex.py
@@ -0,0 +1,22 @@
+from rapid_latex_ocr import LatexOCR
+
+image_resizer_path = 'models/image_resizer.onnx'
+encoder_path = 'models/encoder.onnx'
+decoder_path = 'models/decoder.onnx'
+tokenizer_json = 'models/tokenizer.json'
+model = LatexOCR(image_resizer_path=image_resizer_path,
+                encoder_path=encoder_path,
+                decoder_path=decoder_path,
+                tokenizer_json=tokenizer_json)
+
+img_path = "tests/test_files/6.png"
+with open(img_path, "rb") as f:
+    data = f. read()
+
+result, elapse = model(data)
+
+print(result)
+# {\frac{x^{2}}{a^{2}}}-{\frac{y^{2}}{b^{2}}}=1
+
+print(elapse)
+# 0.4131628000000003
\ No newline at end of file
diff --git a/ml-model/model/latex_to_tree.py b/ml-model/model/latex_to_tree.py
new file mode 100644
index 000000000..310fa9191
--- /dev/null
+++ b/ml-model/model/latex_to_tree.py
@@ -0,0 +1,93 @@
+import sympy as sp
+from sympy.parsing.latex import parse_latex
+from zss import Node, distance
+import networkx as nx
+import matplotlib.pyplot as plt
+
+
+def zss_to_nx(node, graph=None, parent=None):
+    if graph is None:
+        graph = nx.DiGraph()
+    graph.add_node(id(node), label=node.label)
+    if parent is not None:
+        graph.add_edge(id(parent), id(node))
+    for child in node.children:
+        zss_to_nx(child, graph, node)
+    return graph
+
+
+# Define some complex LaTeX expressions
+# expr1 represents our query
+latex_expr1 = r"\nabla J(\theta) = \frac{1}{m} \sum_{i=1}^m (h_\theta(x^{(i)}) - y^{(i)}) x^{(i)}"
+# expr represents our OCR'd expression from the file
+latex_expr2 = r"\nabla J(\Theta) = \frac{1}{m} \sum_{i=1}^m (h_\theta(z^{(i)}) - y^{(i)}) z^{(i)}"
+
+# Convert LaTeX to SymPy
+sympy_expr1 = parse_latex(latex_expr1)
+sympy_expr2 = parse_latex(latex_expr2)
+
+
+def sympy_to_zss(expr):
+    if isinstance(expr, sp.Symbol) or isinstance(expr, sp.Number):
+        return Node(str(expr))
+    else:
+        node = Node(str(expr.func))
+        for arg in expr.args:
+            child_node = sympy_to_zss(arg)
+            node.addkid(child_node)
+        return node
+
+
+# Convert the SymPy expression to a ZSS tree
+zss_tree1 = sympy_to_zss(sympy_expr1)
+zss_tree2 = sympy_to_zss(sympy_expr2)
+# print(zss_tree1)
+# print(zss_tree2)
+
+# Assuming zss_tree1 and zss_tree2 are your ZSS trees
+nx_tree1 = zss_to_nx(zss_tree1)
+nx_tree2 = zss_to_nx(zss_tree2)
+
+
+def hierarchy_pos(G, root=None, width=1., vert_gap=0.2, vert_loc=0, xcenter=0.5):
+    pos = _hierarchy_pos(G, root, width, vert_gap, vert_loc, xcenter)
+    return pos
+
+
+def _hierarchy_pos(G, root, width=1., vert_gap=0.2, vert_loc=0, xcenter=0.5, pos=None, parent=None, parsed=[]):
+    if pos is None:
+        pos = {root: (xcenter, vert_loc)}
+    else:
+        pos[root] = (xcenter, vert_loc)
+    children = list(G.neighbors(root))
+    if not isinstance(G, nx.DiGraph) and parent is not None:
+        children.remove(parent)
+    if len(children) != 0:
+        dx = width / len(children)
+        nextx = xcenter - width/2 - dx/2
+        for child in children:
+            nextx += dx
+            pos = _hierarchy_pos(G, child, width=dx, vert_gap=vert_gap,
+                                 vert_loc=vert_loc-vert_gap, xcenter=nextx,
+                                 pos=pos, parent=root, parsed=parsed)
+    return pos
+
+
+def draw_tree(tree):
+    pos = hierarchy_pos(tree, root=list(tree.nodes())
+                        [0])  # Specify the root node
+    labels = nx.get_node_attributes(tree, 'label')
+    nx.draw(tree, pos, labels=labels, with_labels=True,
+            node_size=3000, node_color='lightblue', font_size=10)
+    plt.show()
+
+
+# Draw the trees
+# draw_tree(nx_tree1)
+# draw_tree(nx_tree2)
+
+# Compare ZSS trees
+# make update non-zero to see difference in more updated tree vs. not
+distance = distance(zss_tree1, zss_tree2, get_children=Node.get_children,
+                    insert_cost=lambda node: 10, remove_cost=lambda node: 10, update_cost=lambda a, b: 1)
+print(distance)  # Output the tree edit distance
diff --git a/ml-model/yolov5/detect_copy1.py b/ml-model/yolov5/detect_copy1.py
new file mode 100644
index 000000000..5e83ac43f
--- /dev/null
+++ b/ml-model/yolov5/detect_copy1.py
@@ -0,0 +1,259 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Run YOLOv5 detection inference on images, videos, directories, globs, YouTube, webcam, streams, etc.
+
+Usage - sources:
+    $ python detect.py --weights yolov5s.pt --source 0                               # webcam
+                                                     img.jpg                         # image
+                                                     vid.mp4                         # video
+                                                     screen                          # screenshot
+                                                     path/                           # directory
+                                                     'path/*.jpg'                    # glob
+                                                     'https://youtu.be/Zgi9g1ksQHc'  # YouTube
+                                                     'rtsp://example.com/media.mp4'  # RTSP, RTMP, HTTP stream
+
+Usage - formats:
+    $ python detect.py --weights yolov5s.pt                 # PyTorch
+                                 yolov5s.torchscript        # TorchScript
+                                 yolov5s.onnx               # ONNX Runtime or OpenCV DNN with --dnn
+                                 yolov5s_openvino_model     # OpenVINO
+                                 yolov5s.engine             # TensorRT
+                                 yolov5s.mlmodel            # CoreML (macOS-only)
+                                 yolov5s_saved_model        # TensorFlow SavedModel
+                                 yolov5s.pb                 # TensorFlow GraphDef
+                                 yolov5s.tflite             # TensorFlow Lite
+                                 yolov5s_edgetpu.tflite     # TensorFlow Edge TPU
+                                 yolov5s_paddle_model       # PaddlePaddle
+"""
+
+import argparse
+import os
+import platform
+import sys
+from pathlib import Path
+
+import torch
+
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[0]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
+
+from models.common import DetectMultiBackend
+from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadScreenshots, LoadStreams
+from utils.general import (LOGGER, Profile, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
+                           increment_path, non_max_suppression, print_args, scale_boxes, strip_optimizer, xyxy2xywh)
+from utils.plots import Annotator, colors, save_one_box
+from utils.torch_utils import select_device, smart_inference_mode
+
+
+@smart_inference_mode()
+def run(
+        weights=ROOT / 'yolov5s.pt',  # model path or triton URL
+        source=ROOT / 'data/images',  # file/dir/URL/glob/screen/0(webcam)
+        data=ROOT / 'data/coco128.yaml',  # dataset.yaml path
+        imgsz=(640, 640),  # inference size (height, width)
+        conf_thres=0.25,  # confidence threshold
+        iou_thres=0.45,  # NMS IOU threshold
+        max_det=1000,  # maximum detections per image
+        device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
+        view_img=False,  # show results
+        save_txt=False,  # save results to *.txt
+        save_conf=False,  # save confidences in --save-txt labels
+        save_crop=False,  # save cropped prediction boxes
+        nosave=False,  # do not save images/videos
+        classes=None,  # filter by class: --class 0, or --class 0 2 3
+        agnostic_nms=False,  # class-agnostic NMS
+        augment=False,  # augmented inference
+        visualize=False,  # visualize features
+        update=False,  # update all models
+        project=ROOT / 'runs/detect',  # save results to project/name
+        name='exp',  # save results to project/name
+        exist_ok=False,  # existing project/name ok, do not increment
+        line_thickness=3,  # bounding box thickness (pixels)
+        hide_labels=False,  # hide labels
+        hide_conf=False,  # hide confidences
+        half=False,  # use FP16 half-precision inference
+        dnn=False,  # use OpenCV DNN for ONNX inference
+        vid_stride=1,  # video frame-rate stride
+):
+    source = str(source)
+    save_img = not nosave and not source.endswith('.txt')  # save inference images
+    is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
+    is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://'))
+    webcam = source.isnumeric() or source.endswith('.txt') or (is_url and not is_file)
+    screenshot = source.lower().startswith('screen')
+    if is_url and is_file:
+        source = check_file(source)  # download
+
+    # Directories
+    save_dir = increment_path(Path(project) / name, exist_ok=exist_ok)  # increment run
+    (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
+
+    # Load model
+    device = select_device(device)
+    model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
+    stride, names, pt = model.stride, model.names, model.pt
+    imgsz = check_img_size(imgsz, s=stride)  # check image size
+
+    # Dataloader
+    bs = 1  # batch_size
+    if webcam:
+        view_img = check_imshow(warn=True)
+        dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
+        bs = len(dataset)
+    elif screenshot:
+        dataset = LoadScreenshots(source, img_size=imgsz, stride=stride, auto=pt)
+    else:
+        dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
+    vid_path, vid_writer = [None] * bs, [None] * bs
+
+    # Run inference
+    model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz))  # warmup
+    seen, windows, dt = 0, [], (Profile(), Profile(), Profile())
+    for path, im, im0s, vid_cap, s in dataset:
+        with dt[0]:
+            im = torch.from_numpy(im).to(model.device)
+            im = im.half() if model.fp16 else im.float()  # uint8 to fp16/32
+            im /= 255  # 0 - 255 to 0.0 - 1.0
+            if len(im.shape) == 3:
+                im = im[None]  # expand for batch dim
+
+        # Inference
+        with dt[1]:
+            visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
+            pred = model(im, augment=augment, visualize=visualize)
+
+        # NMS
+        with dt[2]:
+            pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
+
+        # Second-stage classifier (optional)
+        # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)
+
+        # Process predictions
+        for i, det in enumerate(pred):  # per image
+            seen += 1
+            if webcam:  # batch_size >= 1
+                p, im0, frame = path[i], im0s[i].copy(), dataset.count
+                s += f'{i}: '
+            else:
+                p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)
+
+            p = Path(p)  # to Path
+            save_path = str(save_dir / p.name)  # im.jpg
+            txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}')  # im.txt
+            s += '%gx%g ' % im.shape[2:]  # print string
+            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
+            imc = im0.copy() if save_crop else im0  # for save_crop
+            annotator = Annotator(im0, line_width=line_thickness, example=str(names))
+            if len(det):
+                # Rescale boxes from img_size to im0 size
+                det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round()
+
+                # Print results
+                for c in det[:, 5].unique():
+                    n = (det[:, 5] == c).sum()  # detections per class
+                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string
+
+                # Write results
+                for *xyxy, conf, cls in reversed(det):
+                    if save_txt:  # Write to file
+                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
+                        line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
+                        with open(f'{txt_path}.txt', 'a') as f:
+                            f.write(('%g ' * len(line)).rstrip() % line + '\n')
+
+                    if save_img or save_crop or view_img:  # Add bbox to image
+                        c = int(cls)  # integer class
+                        label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}')
+                        annotator.box_label(xyxy, label, color=colors(c, True))
+                    if save_crop:
+                        save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}__.jpg', BGR=True)
+
+            # Stream results
+            im0 = annotator.result()
+            if view_img:
+                if platform.system() == 'Linux' and p not in windows:
+                    windows.append(p)
+                    cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO)  # allow window resize (Linux)
+                    cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0])
+                cv2.imshow(str(p), im0)
+                cv2.waitKey(1)  # 1 millisecond
+
+            # Save results (image with detections)
+            if save_img:
+                if dataset.mode == 'image':
+                    cv2.imwrite(save_path, im0)
+                else:  # 'video' or 'stream'
+                    if vid_path[i] != save_path:  # new video
+                        vid_path[i] = save_path
+                        if isinstance(vid_writer[i], cv2.VideoWriter):
+                            vid_writer[i].release()  # release previous video writer
+                        if vid_cap:  # video
+                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
+                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+                        else:  # stream
+                            fps, w, h = 30, im0.shape[1], im0.shape[0]
+                        save_path = str(Path(save_path).with_suffix('.mp4'))  # force *.mp4 suffix on results videos
+                        vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
+                    vid_writer[i].write(im0)
+
+        # Print time (inference-only)
+        LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms")
+
+    # Print results
+    t = tuple(x.t / seen * 1E3 for x in dt)  # speeds per image
+    LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t)
+    if save_txt or save_img:
+        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
+        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
+    if update:
+        strip_optimizer(weights[0])  # update model (to fix SourceChangeWarning)
+
+
+def parse_opt():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path or triton URL')
+    parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob/screen/0(webcam)')
+    parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='(optional) dataset.yaml path')
+    parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
+    parser.add_argument('--conf-thres', type=float, default=0.50, help='confidence threshold')
+    parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold')
+    parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image')
+    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+    parser.add_argument('--view-img', action='store_true', help='show results')
+    parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
+    parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
+    parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes')
+    parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
+    parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3')
+    parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
+    parser.add_argument('--augment', action='store_true', help='augmented inference')
+    parser.add_argument('--visualize', action='store_true', help='visualize features')
+    parser.add_argument('--update', action='store_true', help='update all models')
+    parser.add_argument('--project', default=ROOT / 'runs/detect', help='save results to project/name')
+    parser.add_argument('--name', default='exp', help='save results to project/name')
+    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
+    parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)')
+    parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')
+    parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
+    parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
+    parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
+    parser.add_argument('--vid-stride', type=int, default=1, help='video frame-rate stride')
+    opt = parser.parse_args()
+    opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1  # expand
+    print_args(vars(opt))
+    return opt
+
+
+def main(opt):
+    check_requirements(exclude=('tensorboard', 'thop'))
+    run(**vars(opt))
+
+
+if __name__ == "__main__":
+    opt = parse_opt()
+    main(opt)
diff --git a/ml-model/yolov5/input_info/names.txt b/ml-model/yolov5/input_info/names.txt
deleted file mode 100644
index bc337535d..000000000
--- a/ml-model/yolov5/input_info/names.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-mathsearch-intermediary
-012330fd-7c87-4236-8f4c-b39f3ea72968
-0c923fac-14a1-4f5f-ad1f-88c4e168693b
\ No newline at end of file
diff --git a/ml-model/yolov5/main.py b/ml-model/yolov5/main.py
index 00387c43a..67be5a608 100644
--- a/ml-model/yolov5/main.py
+++ b/ml-model/yolov5/main.py
@@ -80,8 +80,9 @@ def send_result_to_frontend(pdf_name):
         reader = csv.reader(f, delimiter=',')
         for row in reader:
             # adding page number and coords for each re-rank
-            # # result_coords += str(int(row[0])+1) + " "
-            result_coords += row[0] + " "
+            result_coords += str(int(row[0])+1) + " "
+
+            # result_coords += row[0] + " "
             result_coords += row[3] + " "
             result_coords += row[4] + " "
             result_coords += row[5] + " "
@@ -141,7 +142,7 @@ def download_files(pdf_name, target_name):
     print(target_name)
 
     remove_files()
-    time.sleep(10)
+    time.sleep(5)
     download_files(pdf_name,target_name)
 
     # prefix example:
@@ -153,4 +154,4 @@ def download_files(pdf_name, target_name):
 
     main(pdf_image_prefix,local_target)
     print("finished running yolo! sending results to frontend...")
-    send_result_to_frontend(pdf_name)
\ No newline at end of file
+    send_result_to_frontend(pdf_name)
diff --git a/ml-model/yolov5/ranking/dataset/exp/labels/1.txt b/ml-model/yolov5/ranking/dataset/exp/labels/1.txt
new file mode 100644
index 000000000..dbe4a65e5
--- /dev/null
+++ b/ml-model/yolov5/ranking/dataset/exp/labels/1.txt
@@ -0,0 +1 @@
+0 0.492647 0.564091 0.33 0.0518182
diff --git a/ml-model/yolov5/ranking/dataset/exp/labels/2.txt b/ml-model/yolov5/ranking/dataset/exp/labels/2.txt
new file mode 100644
index 000000000..c93f1a676
--- /dev/null
+++ b/ml-model/yolov5/ranking/dataset/exp/labels/2.txt
@@ -0,0 +1 @@
+0 0.498235 0.655455 0.289412 0.0309091
diff --git a/ml-model/yolov5/ranking/dataset/exp/labels/3.txt b/ml-model/yolov5/ranking/dataset/exp/labels/3.txt
new file mode 100644
index 000000000..438d0132b
--- /dev/null
+++ b/ml-model/yolov5/ranking/dataset/exp/labels/3.txt
@@ -0,0 +1 @@
+0 0.504706 0.387273 0.295294 0.0545455
diff --git a/ml-model/yolov5/ranking/dataset/exp2/labels/ex1_0.txt b/ml-model/yolov5/ranking/dcopy/exp/labels/ex1_0.txt
similarity index 100%
rename from ml-model/yolov5/ranking/dataset/exp2/labels/ex1_0.txt
rename to ml-model/yolov5/ranking/dcopy/exp/labels/ex1_0.txt
diff --git a/ml-model/yolov5/ranking/dcopy/exp/labels/ex1_1.txt b/ml-model/yolov5/ranking/dcopy/exp/labels/ex1_1.txt
new file mode 100644
index 000000000..84dd4b75b
--- /dev/null
+++ b/ml-model/yolov5/ranking/dcopy/exp/labels/ex1_1.txt
@@ -0,0 +1 @@
+0 0.330834 0.436236 0.277694 0.108673
diff --git a/ml-model/yolov5/ranking/img_database.csv b/ml-model/yolov5/ranking/img_database.csv
index f0bc8409e..de518aa49 100644
--- a/ml-model/yolov5/ranking/img_database.csv
+++ b/ml-model/yolov5/ranking/img_database.csv
@@ -1,18 +1,4 @@
 ,image_name,image_source,coo_1,coo_2,coo_3,coo_4
-0,Page2__.jpg,Page2,0.494163,0.301358,0.291868,0.0619835
-1,DeSa__.jpg,DeSa,0.439157,0.741329,0.415663,0.0526012
-2,DeSa__4.jpg,DeSa,0.45241,0.543353,0.339759,0.0416185
-3,Page3__.jpg,Page3,0.321057,0.389454,0.129704,0.0571792
-4,Page4__5.jpg,Page4,0.507806,0.791269,0.345476,0.156889
-5,DeSa__2.jpg,DeSa,0.315663,0.647688,0.314458,0.050289
-6,Page4__3.jpg,Page4,0.738391,0.306958,0.106085,0.0900409
-7,DeSa2__.jpg,DeSa2,0.439157,0.741329,0.415663,0.0526012
-8,DeSa2__2.jpg,DeSa2,0.315663,0.647688,0.314458,0.050289
-9,Page4__2.jpg,Page4,0.377902,0.305935,0.0832666,0.085266
-10,Page5__.jpg,Page5,0.485121,0.951722,0.342208,0.0955115
-11,DeSa2__3.jpg,DeSa2,0.421084,0.451445,0.453012,0.0693642
-12,Page4__4.jpg,Page4,0.500801,0.305935,0.0776621,0.0511596
-13,DeSa__3.jpg,DeSa,0.421084,0.451445,0.453012,0.0693642
-14,DeSa2__4.jpg,DeSa2,0.45241,0.543353,0.339759,0.0416185
-15,Page4__.jpg,Page4,0.293435,0.187926,0.0760608,0.0648022
-16,Page1__.jpg,Page1,0.487605,0.903516,0.395378,0.169531
+0,1__.jpg,1,0.492647,0.564091,0.33,0.0518182
+1,2__.jpg,2,0.498235,0.655455,0.289412,0.0309091
+2,3__.jpg,3,0.504706,0.387273,0.295294,0.0545455
diff --git a/ml-model/yolov5/ranking/img_database_copy.csv b/ml-model/yolov5/ranking/img_database_copy.csv
new file mode 100644
index 000000000..f0bc8409e
--- /dev/null
+++ b/ml-model/yolov5/ranking/img_database_copy.csv
@@ -0,0 +1,18 @@
+,image_name,image_source,coo_1,coo_2,coo_3,coo_4
+0,Page2__.jpg,Page2,0.494163,0.301358,0.291868,0.0619835
+1,DeSa__.jpg,DeSa,0.439157,0.741329,0.415663,0.0526012
+2,DeSa__4.jpg,DeSa,0.45241,0.543353,0.339759,0.0416185
+3,Page3__.jpg,Page3,0.321057,0.389454,0.129704,0.0571792
+4,Page4__5.jpg,Page4,0.507806,0.791269,0.345476,0.156889
+5,DeSa__2.jpg,DeSa,0.315663,0.647688,0.314458,0.050289
+6,Page4__3.jpg,Page4,0.738391,0.306958,0.106085,0.0900409
+7,DeSa2__.jpg,DeSa2,0.439157,0.741329,0.415663,0.0526012
+8,DeSa2__2.jpg,DeSa2,0.315663,0.647688,0.314458,0.050289
+9,Page4__2.jpg,Page4,0.377902,0.305935,0.0832666,0.085266
+10,Page5__.jpg,Page5,0.485121,0.951722,0.342208,0.0955115
+11,DeSa2__3.jpg,DeSa2,0.421084,0.451445,0.453012,0.0693642
+12,Page4__4.jpg,Page4,0.500801,0.305935,0.0776621,0.0511596
+13,DeSa__3.jpg,DeSa,0.421084,0.451445,0.453012,0.0693642
+14,DeSa2__4.jpg,DeSa2,0.45241,0.543353,0.339759,0.0416185
+15,Page4__.jpg,Page4,0.293435,0.187926,0.0760608,0.0648022
+16,Page1__.jpg,Page1,0.487605,0.903516,0.395378,0.169531
diff --git a/ml-model/yolov5/ranking/top5.csv b/ml-model/yolov5/ranking/top5.csv
index 33b74c425..255a7ae6e 100644
--- a/ml-model/yolov5/ranking/top5.csv
+++ b/ml-model/yolov5/ranking/top5.csv
@@ -1,6 +1,3 @@
-Unnamed: 0,image_name,image_source,coo_1,coo_2,coo_3,coo_4
-16,Page1__.jpg,Page1,0.487605,0.903516,0.395378,0.169531
-4,Page4__5.jpg,Page4,0.507806,0.791269,0.345476,0.156889
-10,Page5__.jpg,Page5,0.485121,0.951722,0.342208,0.0955115
-11,DeSa2__3.jpg,DeSa2,0.421084,0.451445,0.453012,0.0693642
-0,Page2__.jpg,Page2,0.494163,0.301358,0.291868,0.0619835
\ No newline at end of file
+0,1__.jpg,1,0.492647,0.564091,0.33,0.0518182
+2,3__.jpg,3,0.504706,0.387273,0.295294,0.0545455
+1,2__.jpg,2,0.498235,0.655455,0.289412,0.0309091
diff --git a/ml-model/yolov5/tests3.py b/ml-model/yolov5/tests3.py
new file mode 100644
index 000000000..4fdcefce6
--- /dev/null
+++ b/ml-model/yolov5/tests3.py
@@ -0,0 +1,30 @@
+import boto3
+from botocore.config import Config
+import cv2
+import numpy as np
+
+s3 = boto3.client(service_name='s3', region_name='us-east-1', aws_access_key_id='AKIAUHGY3PCBKOGUKOJN', aws_secret_access_key='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')
+# s3 = boto3.resource('s3')
+mybucket = 'mathsearch-intermediary'
+prefix = 'ex01/'
+
+i = 0
+for obj in s3.list_objects(Bucket=mybucket, Prefix=prefix).get('Contents'):
+  k = obj.get('Key')
+  if k[-3:] != 'png':
+    continue
+  contents = s3.get_object(Bucket=mybucket,Key=obj.get('Key'))['Body'].read()
+  print(cv2.imdecode(np.asarray(bytearray(contents)), cv2.IMREAD_COLOR))
+  # print(contents)
+  # contents = obj.get()['Body'].read()
+#   if contents != None and contents != "b''":
+#     print("contents:",contents,'end')
+#     print(cv2.imdecode(np.asarray(bytearray(contents)), cv2.IMREAD_COLOR))
+  # i += 1
+  # if i > 2:
+  #   break
+# bucket = s3.Bucket('mathsearch-intermediary')
+# for obj_sum in bucket.objects.all():
+  # obj = s3.Object(obj_sum.bucket_name, obj_sum.key)
+  # cv.imshow(obj_sum.get()['Body'].read())
+  # cv2.imshow(obj)
\ No newline at end of file

From 4cf4d6c18d6f9d1b68ac7481a162856799fdb102 Mon Sep 17 00:00:00 2001
From: Emerald Liu <44758321+emerisly@users.noreply.github.com>
Date: Sat, 18 Nov 2023 15:38:42 -0500
Subject: [PATCH 2/6] Delete ml-model/yolov5/tests3.py

this access key file shouldn't be pushed
---
 ml-model/yolov5/tests3.py | 30 ------------------------------
 1 file changed, 30 deletions(-)
 delete mode 100644 ml-model/yolov5/tests3.py

diff --git a/ml-model/yolov5/tests3.py b/ml-model/yolov5/tests3.py
deleted file mode 100644
index 4fdcefce6..000000000
--- a/ml-model/yolov5/tests3.py
+++ /dev/null
@@ -1,30 +0,0 @@
-import boto3
-from botocore.config import Config
-import cv2
-import numpy as np
-
-s3 = boto3.client(service_name='s3', region_name='us-east-1', aws_access_key_id='AKIAUHGY3PCBKOGUKOJN', aws_secret_access_key='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')
-# s3 = boto3.resource('s3')
-mybucket = 'mathsearch-intermediary'
-prefix = 'ex01/'
-
-i = 0
-for obj in s3.list_objects(Bucket=mybucket, Prefix=prefix).get('Contents'):
-  k = obj.get('Key')
-  if k[-3:] != 'png':
-    continue
-  contents = s3.get_object(Bucket=mybucket,Key=obj.get('Key'))['Body'].read()
-  print(cv2.imdecode(np.asarray(bytearray(contents)), cv2.IMREAD_COLOR))
-  # print(contents)
-  # contents = obj.get()['Body'].read()
-#   if contents != None and contents != "b''":
-#     print("contents:",contents,'end')
-#     print(cv2.imdecode(np.asarray(bytearray(contents)), cv2.IMREAD_COLOR))
-  # i += 1
-  # if i > 2:
-  #   break
-# bucket = s3.Bucket('mathsearch-intermediary')
-# for obj_sum in bucket.objects.all():
-  # obj = s3.Object(obj_sum.bucket_name, obj_sum.key)
-  # cv.imshow(obj_sum.get()['Body'].read())
-  # cv2.imshow(obj)
\ No newline at end of file

From 7f931b38f4e4a2f08a41600f857c3fa451a2f2f9 Mon Sep 17 00:00:00 2001
From: Emerald Liu <44758321+emerisly@users.noreply.github.com>
Date: Sat, 18 Nov 2023 15:40:00 -0500
Subject: [PATCH 3/6] Delete ml-model/model/image_to_latex.py

avoid merge conflict
---
 ml-model/model/image_to_latex.py | 22 ----------------------
 1 file changed, 22 deletions(-)
 delete mode 100644 ml-model/model/image_to_latex.py

diff --git a/ml-model/model/image_to_latex.py b/ml-model/model/image_to_latex.py
deleted file mode 100644
index 0739031eb..000000000
--- a/ml-model/model/image_to_latex.py
+++ /dev/null
@@ -1,22 +0,0 @@
-from rapid_latex_ocr import LatexOCR
-
-image_resizer_path = 'models/image_resizer.onnx'
-encoder_path = 'models/encoder.onnx'
-decoder_path = 'models/decoder.onnx'
-tokenizer_json = 'models/tokenizer.json'
-model = LatexOCR(image_resizer_path=image_resizer_path,
-                encoder_path=encoder_path,
-                decoder_path=decoder_path,
-                tokenizer_json=tokenizer_json)
-
-img_path = "tests/test_files/6.png"
-with open(img_path, "rb") as f:
-    data = f. read()
-
-result, elapse = model(data)
-
-print(result)
-# {\frac{x^{2}}{a^{2}}}-{\frac{y^{2}}{b^{2}}}=1
-
-print(elapse)
-# 0.4131628000000003
\ No newline at end of file

From f2a216c9fa6bef98cc690e98f930a128f7ae7335 Mon Sep 17 00:00:00 2001
From: Emerald Liu <44758321+emerisly@users.noreply.github.com>
Date: Sat, 18 Nov 2023 15:40:38 -0500
Subject: [PATCH 4/6] Delete ml-model/model/latex_to_tree.py

avoid merge conflict
---
 ml-model/model/latex_to_tree.py | 93 ---------------------------------
 1 file changed, 93 deletions(-)
 delete mode 100644 ml-model/model/latex_to_tree.py

diff --git a/ml-model/model/latex_to_tree.py b/ml-model/model/latex_to_tree.py
deleted file mode 100644
index 310fa9191..000000000
--- a/ml-model/model/latex_to_tree.py
+++ /dev/null
@@ -1,93 +0,0 @@
-import sympy as sp
-from sympy.parsing.latex import parse_latex
-from zss import Node, distance
-import networkx as nx
-import matplotlib.pyplot as plt
-
-
-def zss_to_nx(node, graph=None, parent=None):
-    if graph is None:
-        graph = nx.DiGraph()
-    graph.add_node(id(node), label=node.label)
-    if parent is not None:
-        graph.add_edge(id(parent), id(node))
-    for child in node.children:
-        zss_to_nx(child, graph, node)
-    return graph
-
-
-# Define some complex LaTeX expressions
-# expr1 represents our query
-latex_expr1 = r"\nabla J(\theta) = \frac{1}{m} \sum_{i=1}^m (h_\theta(x^{(i)}) - y^{(i)}) x^{(i)}"
-# expr represents our OCR'd expression from the file
-latex_expr2 = r"\nabla J(\Theta) = \frac{1}{m} \sum_{i=1}^m (h_\theta(z^{(i)}) - y^{(i)}) z^{(i)}"
-
-# Convert LaTeX to SymPy
-sympy_expr1 = parse_latex(latex_expr1)
-sympy_expr2 = parse_latex(latex_expr2)
-
-
-def sympy_to_zss(expr):
-    if isinstance(expr, sp.Symbol) or isinstance(expr, sp.Number):
-        return Node(str(expr))
-    else:
-        node = Node(str(expr.func))
-        for arg in expr.args:
-            child_node = sympy_to_zss(arg)
-            node.addkid(child_node)
-        return node
-
-
-# Convert the SymPy expression to a ZSS tree
-zss_tree1 = sympy_to_zss(sympy_expr1)
-zss_tree2 = sympy_to_zss(sympy_expr2)
-# print(zss_tree1)
-# print(zss_tree2)
-
-# Assuming zss_tree1 and zss_tree2 are your ZSS trees
-nx_tree1 = zss_to_nx(zss_tree1)
-nx_tree2 = zss_to_nx(zss_tree2)
-
-
-def hierarchy_pos(G, root=None, width=1., vert_gap=0.2, vert_loc=0, xcenter=0.5):
-    pos = _hierarchy_pos(G, root, width, vert_gap, vert_loc, xcenter)
-    return pos
-
-
-def _hierarchy_pos(G, root, width=1., vert_gap=0.2, vert_loc=0, xcenter=0.5, pos=None, parent=None, parsed=[]):
-    if pos is None:
-        pos = {root: (xcenter, vert_loc)}
-    else:
-        pos[root] = (xcenter, vert_loc)
-    children = list(G.neighbors(root))
-    if not isinstance(G, nx.DiGraph) and parent is not None:
-        children.remove(parent)
-    if len(children) != 0:
-        dx = width / len(children)
-        nextx = xcenter - width/2 - dx/2
-        for child in children:
-            nextx += dx
-            pos = _hierarchy_pos(G, child, width=dx, vert_gap=vert_gap,
-                                 vert_loc=vert_loc-vert_gap, xcenter=nextx,
-                                 pos=pos, parent=root, parsed=parsed)
-    return pos
-
-
-def draw_tree(tree):
-    pos = hierarchy_pos(tree, root=list(tree.nodes())
-                        [0])  # Specify the root node
-    labels = nx.get_node_attributes(tree, 'label')
-    nx.draw(tree, pos, labels=labels, with_labels=True,
-            node_size=3000, node_color='lightblue', font_size=10)
-    plt.show()
-
-
-# Draw the trees
-# draw_tree(nx_tree1)
-# draw_tree(nx_tree2)
-
-# Compare ZSS trees
-# make update non-zero to see difference in more updated tree vs. not
-distance = distance(zss_tree1, zss_tree2, get_children=Node.get_children,
-                    insert_cost=lambda node: 10, remove_cost=lambda node: 10, update_cost=lambda a, b: 1)
-print(distance)  # Output the tree edit distance

From 4ec62503ec10717d608159b118ef14e5f1eb9f4d Mon Sep 17 00:00:00 2001
From: Emerald Liu <44758321+emerisly@users.noreply.github.com>
Date: Sat, 18 Nov 2023 15:44:26 -0500
Subject: [PATCH 5/6] Delete ml-model/yolov5/main.py

avoid merge conflict
---
 ml-model/yolov5/main.py | 157 ----------------------------------------
 1 file changed, 157 deletions(-)
 delete mode 100644 ml-model/yolov5/main.py

diff --git a/ml-model/yolov5/main.py b/ml-model/yolov5/main.py
deleted file mode 100644
index 67be5a608..000000000
--- a/ml-model/yolov5/main.py
+++ /dev/null
@@ -1,157 +0,0 @@
-import subprocess
-from subprocess import call, run
-import json 
-import pandas as pd
-import os
-import shutil
-import boto3
-import numpy as np
-import cv2
-import time
-import sys
-import PyPDF2
-import pdf2image
-from PIL import Image
-import csv
-import requests
-
-PREPROCESS_FOLDER = "/home/ubuntu/MathSearch/ml-model/yolov5/preprocess_data/"
-DATA_FOLDER = "/home/ubuntu/MathSearch/ml-model/yolov5/input_data/"
-
-"""
-pdf files location hard coded to DATA_FOLDER
-Args: pdf_filename, target_filename
-
-pdf_filename is inputs/[pdf_filename] of user's pdf query in S3's 'mathsearch-intermediary' bucket
-target_filename is inputs/[target_file] of user's pdf query
-"""
-
-def main(pdf_image_prefix,local_target):
-    """
-    target_file_name: str of name of file we are looking for.
-    Assumption: input_images/ has been updated with the latest images. 
-    """
-    print("running yolov5/main.py...")
-
-    os.chdir("/home/ubuntu/MathSearch/ml-model/yolov5")
-    target_file_name = local_target
-
-    # Dataset contains output of YOLO model 
-    # Clear folder to reset working directory 
-    dataset_path = "ranking/dataset"
-    if(os.path.isdir(dataset_path)):
-      shutil.rmtree(dataset_path)
-
-    # Call YOLO model. 
-    # Uses best.torchscript weights 
-    # Input data: input_data/
-    # Writing output to ranking/dataset
-    run('conda run -n pytorch python detect.py --weights best.torchscript --source input_data/{} --save-txt --save-crop --project ranking/dataset/'.format(sys.argv[1]), shell=True)
-
-    # Get list of files written to YOLO output, except for target_file_name
-    dir_list = os.listdir(os.path.join(dataset_path,"exp" ,"crops", "equation"))
-    dir_list = [x for x in dir_list if x != target_file_name]
-
-    # Construct tbl of generated crops for similarity detection model 
-    img_database = pd.DataFrame(columns = ['image_name', 'image_source', 'coo_1', 'coo_2', 'coo_3', 'coo_4'])
-    for f in dir_list:
-      img_source, rem  = f.split("__")
-      df = pd.read_csv(os.path.join(dataset_path,"exp" ,"labels/") + img_source + ".txt", delim_whitespace=True, header=None)
-      new_row = {'image_name': f, 'image_source': img_source, 'coo_1':df.iloc[0, 1], 
-          'coo_2':df.iloc[0, 2], 'coo_3': df.iloc[0, 3], 
-          'coo_4': df.iloc[0, 4]} 
-      img_database = img_database.append(new_row, ignore_index = True) 
-    img_database.to_csv("ranking/img_database.csv") 
-
-    # Call similarity detection model 
-    # Writes final output to top5.csv
-    run('conda run -n pytorch python ./ranking/ImageMatching.py',shell=True )
-
-
-# Json example
-# {
-#  "file":"ex1.pdf",
-#  "coords":"0 0.3392857142857143 0.17142857142857146 0.30952380952380953 0.12698412698412698 1 0.32242063492063494 0.4380952380952381 0.26785714285714285 0.08888888888888889"
-# }
-def send_result_to_frontend(pdf_name):
-    result_coords = ""
-    result_csv = "/home/ubuntu/MathSearch/ml-model/yolov5/ranking/top5.csv"
-    with open(result_csv, 'r') as f:
-        reader = csv.reader(f, delimiter=',')
-        for row in reader:
-            # adding page number and coords for each re-rank
-            result_coords += str(int(row[0])+1) + " "
-
-            # result_coords += row[0] + " "
-            result_coords += row[3] + " "
-            result_coords += row[4] + " "
-            result_coords += row[5] + " "
-            result_coords += row[6] + " "
-    frontend_url = "http://3.94.25.91/api/result"
-    json = {
-        "file":pdf_name,
-        "coords":result_coords
-    }
-    print(pdf_name)
-    print(result_coords)
-    res = requests.get(frontend_url, json=json)
-    res = print(res) # OK = 200
-
-
-def remove_files():
-    global DATA_FOLDER
-    for f in os.listdir(DATA_FOLDER):
-        try:
-            os.remove(os.path.join(DATA_FOLDER, f))
-        except:
-            shutil.rmtree(os.path.join(DATA_FOLDER, f)) 
-
-def download_files(pdf_name, target_name):
-    global DATA_FOLDER
-    global PREPROCESS_FOLDER
-    s3 = boto3.client("s3")
-    MATHSEARCH_BUCKET='mathsearch-intermediary'
-    local_pdf = PREPROCESS_FOLDER + pdf_name
-    local_target = DATA_FOLDER + target_name[:-5] + "target.png"
-    print("local_pdf",local_pdf)
-    print("pdf_name",pdf_name)
-
-    # download and preprocess pdf to png
-    s3.download_file(
-        Bucket=MATHSEARCH_BUCKET, Key="inputs/"+pdf_name, Filename=local_pdf
-    )
-    images = pdf2image.convert_from_path(local_pdf)
-    print(local_pdf)
-    os.mkdir(DATA_FOLDER + pdf_name)
-    for i in range(len(images)):
-        pdf_image = DATA_FOLDER + pdf_name + "/"+ str(i) + ".png"
-        print(pdf_image)
-        images[i].save(pdf_image)
-    
-    # download target png
-    s3.download_file(
-        Bucket=MATHSEARCH_BUCKET, Key="inputs/"+target_name, Filename=local_target
-    )
-
-if __name__ == "__main__":
-
-    pdf_name = sys.argv[1]
-    target_name = sys.argv[2]
-
-    print(pdf_name)
-    print(target_name)
-
-    remove_files()
-    time.sleep(5)
-    download_files(pdf_name,target_name)
-
-    # prefix example:
-    # /home/ubuntu/MathSearch/ml-model/yolov5/input_data/012330fd-7c87-4236-8f4c-b39f3ea72968_pdf
-    # actual path:
-    # /home/ubuntu/MathSearch/ml-model/yolov5/input_data/012330fd-7c87-4236-8f4c-b39f3ea72968_pdf0.png
-    pdf_image_prefix = DATA_FOLDER + pdf_name
-    local_target = DATA_FOLDER + target_name[:-5] + "target.png"
-
-    main(pdf_image_prefix,local_target)
-    print("finished running yolo! sending results to frontend...")
-    send_result_to_frontend(pdf_name)

From 33c9553c4faaf74eadae1b665dbddff4aa2cc186 Mon Sep 17 00:00:00 2001
From: Emerald Liu <sl2322@cornell.edu>
Date: Sat, 18 Nov 2023 15:48:07 -0500
Subject: [PATCH 6/6] delete dataset folder

---
 ml-model/yolov5/ranking/dataset/exp/labels/1.txt | 1 -
 ml-model/yolov5/ranking/dataset/exp/labels/2.txt | 1 -
 ml-model/yolov5/ranking/dataset/exp/labels/3.txt | 1 -
 3 files changed, 3 deletions(-)
 delete mode 100644 ml-model/yolov5/ranking/dataset/exp/labels/1.txt
 delete mode 100644 ml-model/yolov5/ranking/dataset/exp/labels/2.txt
 delete mode 100644 ml-model/yolov5/ranking/dataset/exp/labels/3.txt

diff --git a/ml-model/yolov5/ranking/dataset/exp/labels/1.txt b/ml-model/yolov5/ranking/dataset/exp/labels/1.txt
deleted file mode 100644
index dbe4a65e5..000000000
--- a/ml-model/yolov5/ranking/dataset/exp/labels/1.txt
+++ /dev/null
@@ -1 +0,0 @@
-0 0.492647 0.564091 0.33 0.0518182
diff --git a/ml-model/yolov5/ranking/dataset/exp/labels/2.txt b/ml-model/yolov5/ranking/dataset/exp/labels/2.txt
deleted file mode 100644
index c93f1a676..000000000
--- a/ml-model/yolov5/ranking/dataset/exp/labels/2.txt
+++ /dev/null
@@ -1 +0,0 @@
-0 0.498235 0.655455 0.289412 0.0309091
diff --git a/ml-model/yolov5/ranking/dataset/exp/labels/3.txt b/ml-model/yolov5/ranking/dataset/exp/labels/3.txt
deleted file mode 100644
index 438d0132b..000000000
--- a/ml-model/yolov5/ranking/dataset/exp/labels/3.txt
+++ /dev/null
@@ -1 +0,0 @@
-0 0.504706 0.387273 0.295294 0.0545455