diff --git a/EAutodet.ipynb b/EAutodet.ipynb
new file mode 100644
index 0000000..09a9f0d
--- /dev/null
+++ b/EAutodet.ipynb
@@ -0,0 +1,165 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "private_outputs": true,
+      "provenance": [],
+      "machine_shape": "hm",
+      "gpuType": "A100",
+      "authorship_tag": "ABX9TyMvTTM6qJIF9fbb5iHAFlMx",
+      "include_colab_link": true
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/aashikrasool/EAutoDet/blob/master/EAutodet.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "9eP4BfF8OdIX"
+      },
+      "outputs": [],
+      "source": [
+        "!git clone https://github.com/aashikrasool/EAutoDet.git"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "cd EAutoDet"
+      ],
+      "metadata": {
+        "id": "qbCniNOHOnDH"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "ls\n"
+      ],
+      "metadata": {
+        "id": "gm3JophGOxeE"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install thop"
+      ],
+      "metadata": {
+        "id": "NMBr0rxhY_sy"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install wandb"
+      ],
+      "metadata": {
+        "id": "Dio2_oXWboTt"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!bash data/scripts/get_coco.sh"
+      ],
+      "metadata": {
+        "id": "8Lg26SaJO99O"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!touch logs/EAutoDet-s_test1.log\n"
+      ],
+      "metadata": {
+        "id": "M6YrDoKPcI0z"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!bash scripts/search.sh 0"
+      ],
+      "metadata": {
+        "id": "8hnodOlxPFOI"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!python -u train_search.py --data $data --cfg $cfg --weights '' --epochs 50 --batch-size $BATCHSIZE > $LOG_DIR/${cfg_file}_test1.log 2>&1 &\n",
+        "\n"
+      ],
+      "metadata": {
+        "id": "LruE8zGJPNuY"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!!python -u train_search.py --data $data --cfg $cfg --weights '' --epochs 50 --batch-size $BATCHSIZE > $LOG_DIR/${cfg_file}_test1.log 2>&1\n"
+      ],
+      "metadata": {
+        "id": "Yq7qlZBtRrpq"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!bash scripts/full_train.sh 0"
+      ],
+      "metadata": {
+        "id": "2xK2W_B9SFw7"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [],
+      "metadata": {
+        "id": "RRnUZSD4SlNK"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file
diff --git a/data/scripts/get_coco.sh b/data/scripts/get_coco.sh
index 96d0199..e643321 100755
--- a/data/scripts/get_coco.sh
+++ b/data/scripts/get_coco.sh
@@ -8,20 +8,20 @@
 #     /yolov5
 
 # Download/unzip labels
-d='../' # unzip directory
+d='./' # unzip directory
 url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
-f='coco2017labels.zip' # or 'coco2017labels-segments.zip', 68 MB
+f='coco2017labels-segments.zip' # or 'coco2017labels.zip', 68 MB
 echo 'Downloading' $url$f ' ...'
 curl -L $url$f -o $f && unzip -q $f -d $d && rm $f & # download, unzip, remove in background
 
-## Download/unzip images
-#d='../coco/images' # unzip directory
-#url=http://images.cocodataset.org/zips/
-#f1='train2017.zip' # 19G, 118k images
-#f2='val2017.zip'   # 1G, 5k images
-#f3='test2017.zip'  # 7G, 41k images (optional)
-#for f in $f1 $f2; do
-#  echo 'Downloading' $url$f '...'
-#  curl -L $url$f -o $f && unzip -q $f -d $d && rm $f & # download, unzip, remove in background
-#done
+# Download/unzip images
+d='./coco/images' # unzip directory
+url=http://images.cocodataset.org/zips/
+f1='train2017.zip' # 19G, 118k images
+f2='val2017.zip'   # 1G, 5k images
+f3='test2017.zip'  # 7G, 41k images (optional)
+for f in $f1 $f2 $f3; do
+  echo 'Downloading' $url$f '...'
+  curl -L $url$f -o $f && unzip -q $f -d $d && rm $f & # download, unzip, remove in background
+done
 wait # finish background tasks
diff --git a/train.py b/train.py
index 9667244..21e3525 100644
--- a/train.py
+++ b/train.py
@@ -680,4 +680,3 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
         plot_evolution(yaml_file)
         print(f'Hyperparameter evolution complete. Best results saved as: {yaml_file}\n'
               f'Command to train a new model with these hyperparameters: $ python train.py --hyp {yaml_file}')
-
diff --git a/train_search.py b/train_search.py
index a50749f..7d22298 100644
--- a/train_search.py
+++ b/train_search.py
@@ -26,9 +26,10 @@
 from models.yolo_search import Model, parse_model
 from utils.autoanchor import check_anchors
 from utils.datasets import create_dataloader_search, create_dataloader
-from utils.general import labels_to_class_weights, increment_path, labels_to_image_weights, init_seeds, \
-    fitness, strip_optimizer_search, get_latest_run, check_dataset, check_file, check_git_status, check_img_size, \
-    check_requirements, print_mutation, set_logging, one_cycle, colorstr
+from utils.general import (labels_to_class_weights, increment_path, labels_to_image_weights, init_seeds,
+                           fitness, strip_optimizer_search, get_latest_run, check_dataset, check_file,
+                           check_git_status, check_img_size, check_requirements, print_mutation, set_logging,
+                           one_cycle, colorstr)
 from utils.google_utils import attempt_download
 from utils.loss import ComputeLoss
 from utils.plots import plot_images, plot_labels, plot_results, plot_evolution
@@ -39,18 +40,19 @@
 logger = logging.getLogger(__name__)
 
 
-def train(hyp, opt, device, tb_writer=None, wandb=None):
+def train(hyp, opt, device, tb_writer=None, wandb_run=None):
     logger.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items()))
-    save_dir, epochs, batch_size, total_batch_size, weights, rank = \
+    save_dir, epochs, batch_size, total_batch_size, weights, rank = (
         Path(opt.save_dir), opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.global_rank
+    )
 
     # Directories
     wdir = save_dir / 'weights'
-    wdir.mkdir(parents=True, exist_ok=True)  # make dir
+    wdir.mkdir(parents=True, exist_ok=True)
     geno_dir = save_dir / 'genotypes'
-    geno_dir.mkdir(parents=True, exist_ok=True)  # make dir
+    geno_dir.mkdir(parents=True, exist_ok=True)
     alpha_dir = save_dir / 'alphas'
-    alpha_dir.mkdir(parents=True, exist_ok=True)  # make dir
+    alpha_dir.mkdir(parents=True, exist_ok=True)
     last = wdir / 'last.pt'
     best = wdir / 'best.pt'
     results_file = save_dir / 'results.txt'
@@ -62,232 +64,198 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
         yaml.dump(vars(opt), f, sort_keys=False)
 
     # Configure
-    plots = not opt.evolve  # create plots
+    plots = not opt.evolve
     cuda = device.type != 'cpu'
     init_seeds(2 + rank)
     with open(opt.data) as f:
-        data_dict = yaml.load(f, Loader=yaml.SafeLoader)  # data dict
+        data_dict = yaml.load(f, Loader=yaml.SafeLoader)
     with torch_distributed_zero_first(rank):
-        check_dataset(data_dict)  # check
+        check_dataset(data_dict)
     train_path = data_dict['train']
     test_path = data_dict['val']
-    nc = 1 if opt.single_cls else int(data_dict['nc'])  # number of classes
-    names = ['item'] if opt.single_cls and len(data_dict['names']) != 1 else data_dict['names']  # class names
-    assert len(names) == nc, '%g names found for nc=%g dataset in %s' % (len(names), nc, opt.data)  # check
+    nc = 1 if opt.single_cls else int(data_dict['nc'])
+    names = ['item'] if opt.single_cls and len(data_dict['names']) != 1 else data_dict['names']
+    assert len(names) == nc, '%g names found for nc=%g dataset in %s' % (len(names), nc, opt.data)
 
     # Model
     pretrained = weights.endswith('.pt')
     if pretrained:
         with torch_distributed_zero_first(rank):
-            attempt_download(weights)  # download if not found locally
-        ckpt = torch.load(weights, map_location=device)  # load checkpoint
-        model = Model(opt.cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)  # create
-        exclude = ['anchor'] if (opt.cfg or hyp.get('anchors')) and not opt.resume else []  # exclude keys
-        state_dict = ckpt['model'].float().state_dict()  # to FP32
-        state_dict = intersect_dicts(state_dict, model.state_dict(), exclude=exclude)  # intersect
-        model.load_state_dict(state_dict, strict=False)  # load
-        logger.info('Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weights))  # report
+            attempt_download(weights)
+        ckpt = torch.load(weights, map_location=device)
+        model = Model(opt.cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)
+        exclude = ['anchor'] if (opt.cfg or hyp.get('anchors')) and not opt.resume else []
+        state_dict = ckpt['model'].float().state_dict()
+        state_dict = intersect_dicts(state_dict, model.state_dict(), exclude=exclude)
+        model.load_state_dict(state_dict, strict=False)
+        logger.info('Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weights))
     else:
-        model = Model(opt.cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)  # create
-#    model.update_arch_parameters() # Since to(device) will not update model._arch_parameters
-#    for alpha in model.arch_parameters():
-#        alpha.requires_grad_(True)
+        model = Model(opt.cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)
 
-    # Freeze
-    freeze = []  # parameter names to freeze (full or partial)
+    # Freeze (if needed)
+    freeze = []  # List parameter names to freeze (if any)
     for k, v in model.named_parameters():
-        v.requires_grad = True  # train all layers
+        v.requires_grad = True
         if any(x in k for x in freeze):
             print('freezing %s' % k)
             v.requires_grad = False
 
     # Optimizer
-#    nbs = 64  # nominal batch size
-#    nbs = total_batch_size  # nominal batch size
     nbs = max(total_batch_size, 64)  # nominal batch size
-    accumulate = max(round(nbs / total_batch_size), 1)  # accumulate loss before optimizing
-    hyp['weight_decay'] *= total_batch_size * accumulate / nbs  # scale weight_decay
+    accumulate = max(round(nbs / total_batch_size), 1)
+    hyp['weight_decay'] *= total_batch_size * accumulate / nbs
     logger.info(f"Scaled weight_decay = {hyp['weight_decay']}")
 
-    pg0, pg1, pg2 = [], [], []  # optimizer parameter groups
+    pg0, pg1, pg2 = [], [], []
     for k, v in model.named_modules():
         if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter):
-            pg2.append(v.bias)  # biases
+            pg2.append(v.bias)
         if isinstance(v, nn.BatchNorm2d) and hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter):
-            pg0.append(v.weight)  # no decay
+            pg0.append(v.weight)
         elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter):
-            pg1.append(v.weight)  # apply decay
+            pg1.append(v.weight)
         elif hasattr(v, 'depth_weight') and isinstance(v.depth_weight, nn.Parameter):
-            pg1.append(v.depth_weight)  # apply decay
-            if hasattr(v, 'point_weight') and isinstance(v.point_weight, nn.Parameter): pg1.append(v.point_weight)  # apply decay
+            pg1.append(v.depth_weight)
+            if hasattr(v, 'point_weight') and isinstance(v.point_weight, nn.Parameter):
+                pg1.append(v.point_weight)
         elif hasattr(v, 'depth_weight1') and isinstance(v.depth_weight1, nn.Parameter):
-            pg1.append(v.depth_weight1)  # apply decay
+            pg1.append(v.depth_weight1)
         elif hasattr(v, 'depth_weight2') and isinstance(v.depth_weight2, nn.Parameter):
-            pg1.append(v.depth_weight2)  # apply decay
+            pg1.append(v.depth_weight2)
 
-    tt = 0
-    for w in model.parameters(): tt+=1
-    assert (tt == len(pg0)+len(pg1)+len(pg2))
-#    # Test genotype code
-#    geno, model_yaml = model.genotype()
-#    parse_model(model_yaml, [3])
-#    print(geno)
-#    assert 0
+    tt = sum(1 for _ in model.parameters())
+    assert (tt == len(pg0) + len(pg1) + len(pg2))
 
     if opt.adam:
-        optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999))  # adjust beta1 to momentum
+        optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999))
     else:
         optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
 
-    optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']})  # add pg1 with weight_decay
-    optimizer.add_param_group({'params': pg2})  # add pg2 (biases)
+    optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']})
+    optimizer.add_param_group({'params': pg2})
     logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0)))
     del pg0, pg1, pg2
 
-    # Scheduler https://arxiv.org/pdf/1812.01187.pdf
-    # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR
     if opt.linear_lr:
-        lf = lambda x: (1 - x / (epochs - 1)) * (1.0 - hyp['lrf']) + hyp['lrf']  # linear
+        lf = lambda x: (1 - x / (epochs - 1)) * (1.0 - hyp['lrf']) + hyp['lrf']
     else:
-        lf = one_cycle(1, hyp['lrf'], epochs)  # cosine 1->hyp['lrf']
+        lf = one_cycle(1, hyp['lrf'], epochs)
     scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
-    # plot_lr_scheduler(optimizer, scheduler, epochs)
-
-    # Logging
-    if rank in [-1, 0] and wandb and wandb.run is None:
-        opt.hyp = hyp  # add hyperparameters
-        wandb_run = wandb.init(config=opt, resume="allow",
-                               project='YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem,
-                               name=save_dir.stem,
-                               entity=opt.entity,
-                               id=ckpt.get('wandb_id') if 'ckpt' in locals() else None)
-    loggers = {'wandb': wandb}  # loggers dict
+
+    # Initialize WandB logging if available
+    if rank in [-1, 0] and wandb_run is None and wandb is not None:
+        opt.hyp = hyp  # add hyperparameters to config
+        try:
+            wandb_run = wandb.init(
+                config=opt,
+                resume="allow",
+                project='YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem,
+                name=Path(opt.save_dir).stem,
+                entity=opt.entity,
+                id=ckpt.get('wandb_id') if 'ckpt' in locals() else None
+            )
+        except wandb.errors.UsageError as e:
+            print(f"Wandb init failed: {e}. Disabling wandb logging.")
+            wandb_run = None
+    loggers = {'wandb': wandb_run}
 
     # EMA
-#    ema = ModelEMA(model) if rank in [-1, 0] else None
     ema = EMA(model) if rank in [-1, 0] else None
 
-    # Resume
+    # Resume from checkpoint if applicable
     start_epoch, best_fitness = 0, 0.0
     if pretrained:
-        # Optimizer
         if ckpt['optimizer'] is not None:
             optimizer.load_state_dict(ckpt['optimizer'])
             best_fitness = ckpt['best_fitness']
-
-        # EMA
         if ema and ckpt.get('ema'):
-#            ema.ema.load_state_dict(ckpt['ema'].float().state_dict())
             ema.shadow = ckpt['ema']
             ema.updates = ckpt['updates']
-
-        # Results
         if ckpt.get('training_results') is not None:
-            results_file.write_text(ckpt['training_results'])  # write results.txt
-
-        # Epochs
+            results_file.write_text(ckpt['training_results'])
         start_epoch = ckpt['epoch'] + 1
         if opt.resume:
             assert start_epoch > 0, '%s training to %g epochs is finished, nothing to resume.' % (weights, epochs)
         if epochs < start_epoch:
             logger.info('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' %
                         (weights, ckpt['epoch'], epochs))
-            epochs += ckpt['epoch']  # finetune additional epochs
-
-        del ckpt, state_dict
+            epochs += ckpt['epoch']
+        del ckpt
 
-    # Image sizes
-    gs = max(int(model.stride.max()), 32)  # grid size (max stride)
-    nl = model.model[-1].nl  # number of detection layers (used for scaling hyp['obj'])
-    imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size]  # verify imgsz are gs-multiples
+    gs = max(int(model.stride.max()), 32)
+    nl = model.model[-1].nl
+    imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size]
 
-    # DP mode
     if cuda and rank == -1 and torch.cuda.device_count() > 1:
         model = torch.nn.DataParallel(model)
-
-    # SyncBatchNorm
     if opt.sync_bn and cuda and rank != -1:
         model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)
         logger.info('Using SyncBatchNorm()')
-
-    # DDP mode
     if cuda and rank != -1:
         model = DDP(model, device_ids=[opt.local_rank], output_device=opt.local_rank)
 
-    # Trainloader
-    dataloader, dataloader_val, dataset, dataset_val = create_dataloader_search(train_path, imgsz, batch_size, gs, opt,
-                                            hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, rank=rank,
-                                            world_size=opt.world_size, workers=opt.workers,
-                                            image_weights=opt.image_weights, quad=opt.quad, prefix=colorstr('train: '), train_portion=opt.train_portion)
-    mlc = np.concatenate(dataset.dataset.labels, 0)[:, 0].max()  # max label class. dataset is an instance of torch.utils.data.Subset
-    nb = len(dataloader)  # number of batches
+    dataloader, dataloader_val, dataset, dataset_val = create_dataloader_search(
+        train_path, imgsz, batch_size, gs, opt,
+        hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, rank=rank,
+        world_size=opt.world_size, workers=opt.workers,
+        image_weights=opt.image_weights, quad=opt.quad,
+        prefix=colorstr('train: '), train_portion=opt.train_portion
+    )
+    mlc = np.concatenate(dataset.dataset.labels, 0)[:, 0].max()
+    nb = len(dataloader)
     assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % (mlc, nc, opt.data, nc - 1)
 
-    # Process 0
     if rank in [-1, 0]:
-        testloader = create_dataloader(test_path, imgsz_test, batch_size * 2, gs, opt,  # testloader
-                                       hyp=hyp, cache=opt.cache_images and not opt.notest, rect=True, rank=-1,
-                                       world_size=opt.world_size, workers=opt.workers,
-                                       pad=0.5, prefix=colorstr('val: '))[0]
-
+        testloader = create_dataloader(
+            test_path, imgsz_test, batch_size * 2, gs, opt,
+            hyp=hyp, cache=opt.cache_images and not opt.notest, rect=True, rank=-1,
+            world_size=opt.world_size, workers=opt.workers,
+            pad=0.5, prefix=colorstr('val: ')
+        )[0]
         if not opt.resume:
             labels = np.concatenate(dataset.dataset.labels, 0)
-            c = torch.tensor(labels[:, 0])  # classes
-            # cf = torch.bincount(c.long(), minlength=nc) + 1.  # frequency
-            # model._initialize_biases(cf.to(device))
+            c = torch.tensor(labels[:, 0])
             if plots:
                 plot_labels(labels, save_dir, loggers)
                 if tb_writer:
                     tb_writer.add_histogram('classes', c, 0)
-
-            # Anchors
             if not opt.noautoanchor:
                 check_anchors(dataset.dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)
-#            model.half().float()  # pre-reduce anchor precision 
-
-    # Model parameters
-    hyp['box'] *= 3. / nl  # scale to layers
-    hyp['cls'] *= nc / 80. * 3. / nl  # scale to classes and layers
-    hyp['obj'] *= (imgsz / 640) ** 2 * 3. / nl  # scale to image size and layers
-    model.nc = nc  # attach number of classes to model
-    model.hyp = hyp  # attach hyperparameters to model
-    model.gr = 1.0  # iou loss ratio (obj_loss = 1.0 or iou)
-    model.class_weights = labels_to_class_weights(dataset.dataset.labels, nc).to(device) * nc  # attach class weights
+
+    hyp['box'] *= 3. / nl
+    hyp['cls'] *= nc / 80. * 3. / nl
+    hyp['obj'] *= (imgsz / 640) ** 2 * 3. / nl
+    model.nc = nc
+    model.hyp = hyp
+    model.gr = 1.0
+    model.class_weights = labels_to_class_weights(dataset.dataset.labels, nc).to(device) * nc
     model.names = names
 
-    # Start training
     t0 = time.time()
-    nw = max(round(hyp['warmup_epochs'] * nb), 1000)  # number of warmup iterations, max(3 epochs, 1k iterations)
-    # nw = min(nw, (epochs - start_epoch) / 2 * nb)  # limit warmup to < 1/2 of training
-    maps = np.zeros(nc)  # mAP per class
-    results = (0, 0, 0, 0, 0, 0, 0)  # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls)
-    scheduler.last_epoch = start_epoch - 1  # do not move
+    nw = max(round(hyp['warmup_epochs'] * nb), 1000)
+    maps = np.zeros(nc)
+    results = (0, 0, 0, 0, 0, 0, 0)
+    scheduler.last_epoch = start_epoch - 1
     scaler = amp.GradScaler(enabled=cuda)
-    compute_loss = ComputeLoss(model)  # init loss class
+    compute_loss = ComputeLoss(model)
     logger.info(f'Image sizes {imgsz} train, {imgsz_test} test\n'
                 f'Using {dataloader.num_workers} dataloader workers\n'
                 f'Logging results to {save_dir}\n'
                 f'Starting training for {epochs} epochs...')
 
     # Architect for NAS
-    architect = Architect(model, compute_loss, accumulate, device, opt, DDP=torch.cuda.device_count()>1)
+    architect = Architect(model, compute_loss, accumulate, device, opt, DDP=torch.cuda.device_count() > 1)
     ori_model = model.module if is_parallel(model) else model
     ori_model.display_alphas()
 
-#    torch.autograd.set_detect_anomaly(True)
-
-    for epoch in range(start_epoch, epochs):  # epoch ------------------------------------------------------------------
+    for epoch in range(start_epoch, epochs):
         model.train()
-
-        # Update image weights (optional)
         if opt.image_weights:
-            # Generate indices
             if rank in [-1, 0]:
-                cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc  # class weights
-                iw = labels_to_image_weights(dataset.dataset.labels, nc=nc, class_weights=cw)  # image weights
-                dataset.indices = random.choices(range(len(dataset.indices)), weights=iw[:len(dataset.indices)], k=len(dataset.indices))  # rand weighted idx
-                dataset_val.indices = random.choices(range(len(dataset_val.indices)), weights=iw[-len(dataset_val.indices):], k=len(dataset_val.indices))  # rand weighted idx
-            # Broadcast if DDP
+                cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc
+                iw = labels_to_image_weights(dataset.dataset.labels, nc=nc, class_weights=cw)
+                dataset.indices = random.choices(range(len(dataset.indices)), weights=iw[:len(dataset.indices)], k=len(dataset.indices))
+                dataset_val.indices = random.choices(range(len(dataset_val.indices)), weights=iw[-len(dataset_val.indices):], k=len(dataset_val.indices))
             if rank != -1:
                 indices = (torch.tensor(dataset.indices) if rank == 0 else torch.zeros(len(dataset.indices))).int()
                 dist.broadcast(indices, 0)
@@ -298,271 +266,206 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
                 if rank != 0:
                     dataset_val.indices = indices.cpu().numpy()
 
-        # Update mosaic border
-        # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
-        # dataset.mosaic_border = [b - imgsz, -b]  # height, width borders
-
-        mloss = torch.zeros(4, device=device)  # mean losses
+        mloss = torch.zeros(4, device=device)
         if rank != -1:
             dataloader.sampler.set_epoch(epoch)
             dataloader_val.sampler.set_epoch(epoch)
         pbar = enumerate(dataloader)
         logger.info(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'total', 'targets', 'img_size'))
         if rank in [-1, 0]:
-            pbar = tqdm(pbar, total=nb)  # progress bar
+            pbar = tqdm(pbar, total=nb)
         optimizer.zero_grad()
 
         def valid_generator():
             while True:
-              for x, t, path, shape in dataloader_val:
-                yield x, t, path, shape
+                for x, t, path, shape in dataloader_val:
+                    yield x, t, path, shape
         valid_gen = valid_generator()
-        for i, (imgs, targets, paths, _) in pbar:  # batch -------------------------------------------------------------
-            ni = i + nb * epoch  # number integrated batches (since train start)
-            imgs = imgs.to(device, non_blocking=True).float() / 255.0  # uint8 to float32, 0-255 to 0.0-1.0
 
-            # Warmup
+        for i, (imgs, targets, paths, _) in pbar:
+            ni = i + nb * epoch
+            imgs = imgs.to(device, non_blocking=True).float() / 255.0
+
             if ni <= nw:
-                xi = [0, nw]  # x interp
-                # model.gr = np.interp(ni, xi, [0.0, 1.0])  # iou loss ratio (obj_loss = 1.0 or iou)
+                xi = [0, nw]
                 accumulate = max(1, np.interp(ni, xi, [1, nbs / total_batch_size]).round())
                 for j, x in enumerate(optimizer.param_groups):
-                    # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
                     x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch)])
                     if 'momentum' in x:
                         x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']])
-
-            # Multi-scale
             if opt.multi_scale:
-                sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs  # size
-                sf = sz / max(imgs.shape[2:])  # scale factor
+                sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs
+                sf = sz / max(imgs.shape[2:])
                 if sf != 1:
-                    ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]]  # new shape (stretched to gs-multiple)
+                    ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]]
                     imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)
 
-            # architect
             if epoch >= opt.search_warmup:
-#              input_valid = imgs
-#              target_valid = targets
-              input_valid, target_valid, _, _ = next(valid_gen)
-              input_valid = input_valid.to(device, non_blocking=True).float() / 255.0  # uint8 to float32, 0-255 to 0.0-1.0
-              # Multi-scale
-              if opt.multi_scale:
-                  sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs  # size
-                  sf = sz / max(input_valid.shape[2:])  # scale factor
-                  if sf != 1:
-                      ns = [math.ceil(x * sf / gs) * gs for x in input_valid.shape[2:]]  # new shape (stretched to gs-multiple)
-                      input_valid = F.interpolate(input_valid, size=ns, mode='bilinear', align_corners=False)
-              architect.step(input_valid, target_valid)
-
-            # Forward
+                input_valid, target_valid, _, _ = next(valid_gen)
+                input_valid = input_valid.to(device, non_blocking=True).float() / 255.0
+                if opt.multi_scale:
+                    sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs
+                    sf = sz / max(input_valid.shape[2:])
+                    if sf != 1:
+                        ns = [math.ceil(x * sf / gs) * gs for x in input_valid.shape[2:]]
+                        input_valid = F.interpolate(input_valid, size=ns, mode='bilinear', align_corners=False)
+                architect.step(input_valid, target_valid)
+
             with amp.autocast(enabled=cuda):
-                pred = model(imgs)  # forward
-                loss, loss_items = compute_loss(pred, targets.to(device))  # loss scaled by batch_size
+                pred = model(imgs)
+                loss, loss_items = compute_loss(pred, targets.to(device))
                 if rank != -1:
-                    loss *= opt.world_size  # gradient averaged between devices in DDP mode
+                    loss *= opt.world_size
                 if opt.quad:
                     loss *= 4.
-
-            # Backward
-#            scaler.scale(loss).backward()
-            grads =  torch.autograd.grad(scaler.scale(loss), model.parameters(), grad_outputs=torch.ones_like(loss), allow_unused=True)
-#            for idx, (name, p) in enumerate(model.named_parameters()):
-#              if grads[idx] is None: print(name)
-#              else: print(grads[idx])
-#            assert 0
+            grads = torch.autograd.grad(scaler.scale(loss), model.parameters(), grad_outputs=torch.ones_like(loss), allow_unused=True)
             for v, g in zip(model.parameters(), grads):
-              if v.grad is None:
-                if not (g is None):
-                  v.grad = torch.autograd.Variable(g.data)
-              else:
-                if not (g is None):
-                  v.grad.data.add_(g.data)
-
-            # Optimize
+                if v.grad is None:
+                    if g is not None:
+                        v.grad = torch.autograd.Variable(g.data)
+                else:
+                    if g is not None:
+                        v.grad.data.add_(g.data)
             if ni % accumulate == 0:
-                scaler.step(optimizer)  # optimizer.step
+                scaler.step(optimizer)
                 scaler.update()
                 optimizer.zero_grad()
                 if ema:
-#                    ema.update(model)
                     ema.update()
-
-            # Print
             if rank in [-1, 0]:
-                mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
-                mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0)  # (GB)
+                mloss = (mloss * i + loss_items) / (i + 1)
+                mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0)
                 s = ('%10s' * 2 + '%10.4g' * 6) % (
-                    '%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1])
+                    '%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1]
+                )
                 pbar.set_description(s)
-
-                # Plot
                 if plots and ni < 3:
-                    f = save_dir / f'train_batch{ni}.jpg'  # filename
+                    f = save_dir / f'train_batch{ni}.jpg'
                     Thread(target=plot_images, args=(imgs, targets, paths, f), daemon=True).start()
-                    # if tb_writer:
-                    #     tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch)
-                    #     tb_writer.add_graph(model, imgs)  # add model to tensorboard
-                elif plots and ni == 10 and wandb:
-                    wandb.log({"Mosaics": [wandb.Image(str(x), caption=x.name) for x in save_dir.glob('train*.jpg')
-                                           if x.exists()]}, commit=False)
-
-#            # test whether the model in architect and ema is updated
-#            state_dict_m = model.state_dict()
-#            state_dict_a = architect.model.state_dict()
-#            state_dict_e = ema.model.state_dict()
-#            for key in state_dict_m.keys():
-#              print((state_dict_m[key]-state_dict_a[key]).sum())
-#              print((state_dict_m[key]-state_dict_e[key]).sum())
-#            model.display_alphas()
-
-            # end batch ------------------------------------------------------------------------------------------------
-        # DDP process 0 or single-GPU
+                elif plots and ni == 10 and wandb_run:
+                    wandb_run.log({
+                        "Mosaics": [wandb_run.Image(str(x), caption=x.name) for x in save_dir.glob('train*.jpg') if x.exists()]
+                    }, commit=False)
         if rank in [-1, 0]:
-          # save alpha
-          alpha_file = os.path.join(alpha_dir, '%d.yaml'%epoch)
-          alphas_yaml = {}
-          state_dict = ori_model.state_dict()
-          for key in state_dict.keys():
-            if 'alpha' in key:
-              alphas_yaml[key] = state_dict[key].data.cpu().numpy().tolist()
-          with open(alpha_file, encoding='utf-8', mode='w') as f:
-            try:
-                yaml.dump(data=alphas_yaml, stream=f, allow_unicode=True)
-            except Exception as e:
-                print(e)
-          
-          geno, model_yaml = ori_model.genotype()
-          # save genotype
-          geno_file = os.path.join(geno_dir, '%d.yaml'%epoch)
-          with open(geno_file, encoding='utf-8', mode='w') as f:
-            try:
-                yaml.dump(data=model_yaml, stream=f, allow_unicode=True)
-            except Exception as e:
-                print(e)
-          print("==================")
-          print("normalized alphas:")
-          ori_model.display_alphas()
-          print('genotype:')
-          for g in geno:
-            print(g)
-          print("==================")
+            alpha_file = os.path.join(alpha_dir, f'{epoch}.yaml')
+            alphas_yaml = {}
+            state_dict = ori_model.state_dict()
+            for key in state_dict.keys():
+                if 'alpha' in key:
+                    alphas_yaml[key] = state_dict[key].data.cpu().numpy().tolist()
+            with open(alpha_file, encoding='utf-8', mode='w') as f:
+                try:
+                    yaml.dump(data=alphas_yaml, stream=f, allow_unicode=True)
+                except Exception as e:
+                    print(e)
+            geno, model_yaml = ori_model.genotype()
+            geno_file = os.path.join(geno_dir, f'{epoch}.yaml')
+            with open(geno_file, encoding='utf-8', mode='w') as f:
+                try:
+                    yaml.dump(data=model_yaml, stream=f, allow_unicode=True)
+                except Exception as e:
+                    print(e)
+            print("==================")
+            print("normalized alphas:")
+            ori_model.display_alphas()
+            print('genotype:')
+            for g in geno:
+                print(g)
+            print("==================")
         
-        # end epoch ----------------------------------------------------------------------------------------------------
-
-        # Scheduler
-        lr = [x['lr'] for x in optimizer.param_groups]  # for tensorboard
+        lr = [x['lr'] for x in optimizer.param_groups]
         scheduler.step()
-
-        # DDP process 0 or single-GPU
         if rank in [-1, 0]:
-            # mAP
-#            ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'gr', 'names', 'stride', 'class_weights'])
-            final_epoch = epoch + 1 == epochs
-            if not opt.notest or final_epoch:  # Calculate mAP
+            if not opt.notest or (epoch + 1) == epochs:
                 ema.apply_shadow()
-                results, maps, times = test.test(opt.data,
-                                                 batch_size=batch_size * 2,
-                                                 imgsz=imgsz_test,
-#                                                 model=ema.ema,
-                                                 model=model.module if is_parallel(model) else model,
-                                                 single_cls=opt.single_cls,
-                                                 dataloader=testloader,
-                                                 save_dir=save_dir,
-                                                 verbose=nc < 50 and final_epoch,
-                                                 plots=plots and final_epoch,
-                                                 log_imgs=opt.log_imgs if wandb else 0,
-                                                 compute_loss=compute_loss)
+                results, maps, times = test.test(
+                    opt.data,
+                    batch_size=batch_size * 2,
+                    imgsz=imgsz_test,
+                    model=model.module if is_parallel(model) else model,
+                    single_cls=opt.single_cls,
+                    dataloader=testloader,
+                    save_dir=save_dir,
+                    verbose=nc < 50 and (epoch + 1) == epochs,
+                    plots=plots and (epoch + 1) == epochs,
+                    log_imgs=opt.log_imgs if wandb_run else 0,
+                    compute_loss=compute_loss
+                )
                 ema.restore()
-
-            # Write
             with open(results_file, 'a') as f:
-                f.write(s + '%10.4g' * 7 % results + '\n')  # append metrics, val_loss
+                f.write(s + '%10.4g' * 7 % results + '\n')
             if len(opt.name) and opt.bucket:
-                os.system('gsutil cp %s gs://%s/results/results%s.txt' % (results_file, opt.bucket, opt.name))
-
-            # Log
-            tags = ['train/box_loss', 'train/obj_loss', 'train/cls_loss',  # train loss
+                os.system(f'gsutil cp {results_file} gs://{opt.bucket}/results/results{opt.name}.txt')
+            tags = ['train/box_loss', 'train/obj_loss', 'train/cls_loss',
                     'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95',
-                    'val/box_loss', 'val/obj_loss', 'val/cls_loss',  # val loss
-                    'x/lr0', 'x/lr1', 'x/lr2']  # params
+                    'val/box_loss', 'val/obj_loss', 'val/cls_loss',
+                    'x/lr0', 'x/lr1', 'x/lr2']
             for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags):
                 if tb_writer:
-                    tb_writer.add_scalar(tag, x, epoch)  # tensorboard
-                if wandb:
-                    wandb.log({tag: x}, step=epoch, commit=tag == tags[-1])  # W&B
-
-            # Update best mAP
-            fi = fitness(np.array(results).reshape(1, -1))  # weighted combination of [P, R, mAP@.5, mAP@.5-.95]
+                    tb_writer.add_scalar(tag, x, epoch)
+                if wandb_run:
+                    wandb_run.log({tag: x}, step=epoch, commit=tag == tags[-1])
+            fi = fitness(np.array(results).reshape(1, -1))
             if fi > best_fitness:
                 best_fitness = fi
-
-            # Save model
-            if (not opt.nosave) or (final_epoch and not opt.evolve):  # if save
-                ckpt = {'epoch': epoch,
-                        'best_fitness': best_fitness,
-                        'training_results': results_file.read_text(),
-#                        'model': (model.module if is_parallel(model) else model).half(),
-                        'model': model.module if is_parallel(model) else model,
-#                        'ema': deepcopy(ema.ema).half(),
-                        'ema': ema.shadow,
-                        'updates': ema.updates,
-                        'optimizer': optimizer.state_dict(),
-                        'wandb_id': wandb_run.id if wandb else None}
-                # Save last, best and delete
+            if (not opt.nosave) or ((epoch + 1) == epochs and not opt.evolve):
+                ckpt = {
+                    'epoch': epoch,
+                    'best_fitness': best_fitness,
+                    'training_results': results_file.read_text(),
+                    'model': model.module if is_parallel(model) else model,
+                    'ema': ema.shadow,
+                    'updates': ema.updates,
+                    'optimizer': optimizer.state_dict(),
+                    'wandb_id': wandb_run.id if wandb_run else None
+                }
                 torch.save(ckpt, last)
                 if best_fitness == fi:
                     torch.save(ckpt, best)
                 del ckpt
                 model.float()
-
-        # end epoch ----------------------------------------------------------------------------------------------------
-    # end training
+        scheduler.step()
 
     if rank in [-1, 0]:
-        # Strip optimizers
-        final = best if best.exists() else last  # final model
-        for f in last, best:
+        final = best if best.exists() else last
+        for f in [last, best]:
             if f.exists():
-                tmp = f.with_name('stripped_%s'%f.name)
+                tmp = f.with_name('stripped_%s' % f.name)
                 strip_optimizer_search(f, s=tmp)
         if opt.bucket:
-            os.system(f'gsutil cp {final} gs://{opt.bucket}/weights')  # upload
-
-        # Plots
+            os.system(f'gsutil cp {final} gs://{opt.bucket}/weights')
         if plots:
-            plot_results(save_dir=save_dir)  # save as results.png
-            if wandb:
+            plot_results(save_dir=save_dir)
+            if wandb_run:
                 files = ['results.png', 'confusion_matrix.png', *[f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R')]]
-                wandb.log({"Results": [wandb.Image(str(save_dir / f), caption=f) for f in files
-                                       if (save_dir / f).exists()]})
+                wandb_run.log({
+                    "Results": [wandb_run.Image(str(save_dir / f), caption=f) for f in files if (save_dir / f).exists()]
+                })
                 if opt.log_artifacts:
-                    wandb.log_artifact(artifact_or_path=str(final), type='model', name=save_dir.stem)
-
-        # Test best.pt
+                    wandb_run.log_artifact(artifact_or_path=str(final), type='model', name=save_dir.stem)
         logger.info('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600))
-#        best = best.with_name('stripped_%s'%best.name)
-        if opt.data.endswith('coco.yaml') and nc == 80:  # if COCO
-            for m in (last, best) if best.exists() else (last):  # speed, mAP tests
-                stripped_m = m.with_name('stripped_%s'%m.name)
+        if opt.data.endswith('coco.yaml') and nc == 80:
+            for m in (last, best) if best.exists() else (last,):
+                stripped_m = m.with_name('stripped_%s' % m.name)
                 print(stripped_m)
-                results, _, _ = test.test(opt.data,
-                                          batch_size=batch_size * 2,
-                                          imgsz=imgsz_test,
-                                          conf_thres=0.001,
-                                          iou_thres=0.7,
-#                                          model=attempt_load(stripped_m, device).half(),
-                                          model=attempt_load(stripped_m, device),
-                                          single_cls=opt.single_cls,
-                                          dataloader=testloader,
-                                          save_dir=save_dir,
-                                          save_json=True,
-                                          plots=False)
-
+                results, _, _ = test.test(
+                    opt.data,
+                    batch_size=batch_size * 2,
+                    imgsz=imgsz_test,
+                    conf_thres=0.001,
+                    iou_thres=0.7,
+                    model=attempt_load(stripped_m, device),
+                    single_cls=opt.single_cls,
+                    dataloader=testloader,
+                    save_dir=save_dir,
+                    save_json=True,
+                    plots=False
+                )
     else:
         dist.destroy_process_group()
 
-    wandb.run.finish() if wandb and wandb.run else None
+    if wandb_run:
+        wandb_run.finish()
     torch.cuda.empty_cache()
     return results
 
@@ -600,59 +503,47 @@ def valid_generator():
     parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
     parser.add_argument('--quad', action='store_true', help='quad dataloader')
     parser.add_argument('--linear-lr', action='store_true', help='linear LR')
-
-    # For NAS
     parser.add_argument('--arch_learning_rate', type=float, default=3e-4, help='learning rate for arch encoding')
     parser.add_argument('--arch_weight_decay', type=float, default=1e-3, help='weight decay for arch encoding')
     parser.add_argument('--search_warmup', type=int, default=0, help='Epoch to Warmup the operation weights')
     parser.add_argument('--train_portion', type=float, default=0.5, help='portion to split the train set and search set')
-
     opt = parser.parse_args()
     opt.project = '{}-{}'.format(opt.project, time.strftime("%Y%m%d-%H%M%S"))
-    print("Experiments dir: %s"%opt.project)
-    print("cfg file: %s"%opt.cfg)
-
-    # Set DDP variables
+    print("Experiments dir: %s" % opt.project)
+    print("cfg file: %s" % opt.cfg)
     opt.world_size = int(os.environ['WORLD_SIZE']) if 'WORLD_SIZE' in os.environ else 1
     opt.global_rank = int(os.environ['RANK']) if 'RANK' in os.environ else -1
     set_logging(opt.global_rank)
     if opt.global_rank in [-1, 0]:
         check_git_status()
         check_requirements()
-
-    # Resume
-    if opt.resume:  # resume an interrupted run
-        ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run()  # specified or most recent path
+    if opt.resume:
+        ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run()
         assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist'
         apriori = opt.global_rank, opt.local_rank
         with open(Path(ckpt).parent.parent / 'opt.yaml') as f:
-            opt = argparse.Namespace(**yaml.load(f, Loader=yaml.SafeLoader))  # replace
-        opt.cfg, opt.weights, opt.resume, opt.batch_size, opt.global_rank, opt.local_rank = '', ckpt, True, opt.total_batch_size, *apriori  # reinstate
+            opt = argparse.Namespace(**yaml.load(f, Loader=yaml.SafeLoader))
+        opt.cfg, opt.weights, opt.resume, opt.batch_size, opt.global_rank, opt.local_rank = (
+            '', ckpt, True, opt.total_batch_size, *apriori
+        )
         logger.info('Resuming training from %s' % ckpt)
     else:
-        # opt.hyp = opt.hyp or ('hyp.finetune.yaml' if opt.weights else 'hyp.scratch.yaml')
-        opt.data, opt.cfg, opt.hyp = check_file(opt.data), check_file(opt.cfg), check_file(opt.hyp)  # check files
+        opt.data, opt.cfg, opt.hyp = check_file(opt.data), check_file(opt.cfg), check_file(opt.hyp)
         assert len(opt.cfg) or len(opt.weights), 'either --cfg or --weights must be specified'
-        opt.img_size.extend([opt.img_size[-1]] * (2 - len(opt.img_size)))  # extend to 2 sizes (train, test)
+        opt.img_size.extend([opt.img_size[-1]] * (2 - len(opt.img_size)))
         opt.name = 'evolve' if opt.evolve else opt.name
-        opt.save_dir = increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok | opt.evolve)  # increment run
-
-    # DDP mode
+        opt.save_dir = increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok | opt.evolve)
     opt.total_batch_size = opt.batch_size
     device = select_device(opt.device, batch_size=opt.batch_size)
     if opt.local_rank != -1:
         assert torch.cuda.device_count() > opt.local_rank
         torch.cuda.set_device(opt.local_rank)
         device = torch.device('cuda', opt.local_rank)
-        dist.init_process_group(backend='nccl', init_method='env://')  # distributed backend
+        dist.init_process_group(backend='nccl', init_method='env://')
         assert opt.batch_size % opt.world_size == 0, '--batch-size must be multiple of CUDA device count'
         opt.batch_size = opt.total_batch_size // opt.world_size
-
-    # Hyperparameters
     with open(opt.hyp) as f:
-        hyp = yaml.load(f, Loader=yaml.SafeLoader)  # load hyps
-
-    # Train
+        hyp = yaml.load(f, Loader=yaml.SafeLoader)
     logger.info(opt)
     try:
         import wandb
@@ -660,92 +551,88 @@ def valid_generator():
         wandb = None
         prefix = colorstr('wandb: ')
         logger.info(f"{prefix}Install Weights & Biases for YOLOv5 logging with 'pip install wandb' (recommended)")
+    wandb_run = None
+    if wandb is not None and not opt.evolve and opt.global_rank in [-1, 0]:
+        try:
+            wandb_run = wandb.init(
+                config=opt,
+                resume="allow",
+                project='YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem,
+                name=Path(opt.save_dir).stem,
+                entity=opt.entity,
+                id=ckpt.get('wandb_id') if 'ckpt' in locals() else None
+            )
+        except wandb.errors.UsageError as e:
+            print(f"Wandb init failed: {e}. Disabling wandb logging.")
+            wandb_run = None
     if not opt.evolve:
-        tb_writer = None  # init loggers
+        tb_writer = None
         if opt.global_rank in [-1, 0]:
             logger.info(f'Start Tensorboard with "tensorboard --logdir {opt.project}", view at http://localhost:6006/')
-            tb_writer = SummaryWriter(opt.save_dir)  # Tensorboard
-        train(hyp, opt, device, tb_writer, wandb)
-
-    # Evolve hyperparameters (optional)
+            tb_writer = SummaryWriter(opt.save_dir)
+        train(hyp, opt, device, tb_writer, wandb_run)
     else:
-        # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit)
-        meta = {'lr0': (1, 1e-5, 1e-1),  # initial learning rate (SGD=1E-2, Adam=1E-3)
-                'lrf': (1, 0.01, 1.0),  # final OneCycleLR learning rate (lr0 * lrf)
-                'momentum': (0.3, 0.6, 0.98),  # SGD momentum/Adam beta1
-                'weight_decay': (1, 0.0, 0.001),  # optimizer weight decay
-                'warmup_epochs': (1, 0.0, 5.0),  # warmup epochs (fractions ok)
-                'warmup_momentum': (1, 0.0, 0.95),  # warmup initial momentum
-                'warmup_bias_lr': (1, 0.0, 0.2),  # warmup initial bias lr
-                'box': (1, 0.02, 0.2),  # box loss gain
-                'cls': (1, 0.2, 4.0),  # cls loss gain
-                'cls_pw': (1, 0.5, 2.0),  # cls BCELoss positive_weight
-                'obj': (1, 0.2, 4.0),  # obj loss gain (scale with pixels)
-                'obj_pw': (1, 0.5, 2.0),  # obj BCELoss positive_weight
-                'iou_t': (0, 0.1, 0.7),  # IoU training threshold
-                'anchor_t': (1, 2.0, 8.0),  # anchor-multiple threshold
-                'anchors': (2, 2.0, 10.0),  # anchors per output grid (0 to ignore)
-                'fl_gamma': (0, 0.0, 2.0),  # focal loss gamma (efficientDet default gamma=1.5)
-                'hsv_h': (1, 0.0, 0.1),  # image HSV-Hue augmentation (fraction)
-                'hsv_s': (1, 0.0, 0.9),  # image HSV-Saturation augmentation (fraction)
-                'hsv_v': (1, 0.0, 0.9),  # image HSV-Value augmentation (fraction)
-                'degrees': (1, 0.0, 45.0),  # image rotation (+/- deg)
-                'translate': (1, 0.0, 0.9),  # image translation (+/- fraction)
-                'scale': (1, 0.0, 0.9),  # image scale (+/- gain)
-                'shear': (1, 0.0, 10.0),  # image shear (+/- deg)
-                'perspective': (0, 0.0, 0.001),  # image perspective (+/- fraction), range 0-0.001
-                'flipud': (1, 0.0, 1.0),  # image flip up-down (probability)
-                'fliplr': (0, 0.0, 1.0),  # image flip left-right (probability)
-                'mosaic': (1, 0.0, 1.0),  # image mixup (probability)
-                'mixup': (1, 0.0, 1.0)}  # image mixup (probability)
-
+        meta = {'lr0': (1, 1e-5, 1e-1),
+                'lrf': (1, 0.01, 1.0),
+                'momentum': (0.3, 0.6, 0.98),
+                'weight_decay': (1, 0.0, 0.001),
+                'warmup_epochs': (1, 0.0, 5.0),
+                'warmup_momentum': (1, 0.0, 0.95),
+                'warmup_bias_lr': (1, 0.0, 0.2),
+                'box': (1, 0.02, 0.2),
+                'cls': (1, 0.2, 4.0),
+                'cls_pw': (1, 0.5, 2.0),
+                'obj': (1, 0.2, 4.0),
+                'obj_pw': (1, 0.5, 2.0),
+                'iou_t': (0, 0.1, 0.7),
+                'anchor_t': (1, 2.0, 8.0),
+                'anchors': (2, 2.0, 10.0),
+                'fl_gamma': (0, 0.0, 2.0),
+                'hsv_h': (1, 0.0, 0.1),
+                'hsv_s': (1, 0.0, 0.9),
+                'hsv_v': (1, 0.0, 0.9),
+                'degrees': (1, 0.0, 45.0),
+                'translate': (1, 0.0, 0.9),
+                'scale': (1, 0.0, 0.9),
+                'shear': (1, 0.0, 10.0),
+                'perspective': (0, 0.0, 0.001),
+                'flipud': (1, 0.0, 1.0),
+                'fliplr': (0, 0.0, 1.0),
+                'mosaic': (1, 0.0, 1.0),
+                'mixup': (1, 0.0, 1.0)}
         assert opt.local_rank == -1, 'DDP mode not implemented for --evolve'
-        opt.notest, opt.nosave = True, True  # only test/save final epoch
-        # ei = [isinstance(x, (int, float)) for x in hyp.values()]  # evolvable indices
-        yaml_file = Path(opt.save_dir) / 'hyp_evolved.yaml'  # save best result here
+        opt.notest, opt.nosave = True, True
+        yaml_file = Path(opt.save_dir) / 'hyp_evolved.yaml'
         if opt.bucket:
-            os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket)  # download evolve.txt if exists
-
-        for _ in range(300):  # generations to evolve
-            if Path('evolve.txt').exists():  # if evolve.txt exists: select best hyps and mutate
-                # Select parent(s)
-                parent = 'single'  # parent selection method: 'single' or 'weighted'
+            os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket)
+        for _ in range(300):
+            if Path('evolve.txt').exists():
+                parent = 'single'
                 x = np.loadtxt('evolve.txt', ndmin=2)
-                n = min(5, len(x))  # number of previous results to consider
-                x = x[np.argsort(-fitness(x))][:n]  # top n mutations
-                w = fitness(x) - fitness(x).min()  # weights
+                n = min(5, len(x))
+                x = x[np.argsort(-fitness(x))][:n]
+                w = fitness(x) - fitness(x).min()
                 if parent == 'single' or len(x) == 1:
-                    # x = x[random.randint(0, n - 1)]  # random selection
-                    x = x[random.choices(range(n), weights=w)[0]]  # weighted selection
+                    x = x[random.choices(range(n), weights=w)[0]]
                 elif parent == 'weighted':
-                    x = (x * w.reshape(n, 1)).sum(0) / w.sum()  # weighted combination
-
-                # Mutate
-                mp, s = 0.8, 0.2  # mutation probability, sigma
+                    x = (x * w.reshape(n, 1)).sum(0) / w.sum()
+                mp, s = 0.8, 0.2
                 npr = np.random
                 npr.seed(int(time.time()))
-                g = np.array([x[0] for x in meta.values()])  # gains 0-1
+                g = np.array([x[0] for x in meta.values()])
                 ng = len(meta)
                 v = np.ones(ng)
-                while all(v == 1):  # mutate until a change occurs (prevent duplicates)
+                while all(v == 1):
                     v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0)
-                for i, k in enumerate(hyp.keys()):  # plt.hist(v.ravel(), 300)
-                    hyp[k] = float(x[i + 7] * v[i])  # mutate
-
-            # Constrain to limits
+                for i, k in enumerate(hyp.keys()):
+                    hyp[k] = float(x[i + 7] * v[i])
             for k, v in meta.items():
-                hyp[k] = max(hyp[k], v[1])  # lower limit
-                hyp[k] = min(hyp[k], v[2])  # upper limit
-                hyp[k] = round(hyp[k], 5)  # significant digits
-
-            # Train mutation
+                hyp[k] = max(hyp[k], v[1])
+                hyp[k] = min(hyp[k], v[2])
+                hyp[k] = round(hyp[k], 5)
             results = train(hyp.copy(), opt, device, wandb=wandb)
-
-            # Write mutation results
             print_mutation(hyp.copy(), results, yaml_file, opt.bucket)
-
-        # Plot results
         plot_evolution(yaml_file)
         print(f'Hyperparameter evolution complete. Best results saved as: {yaml_file}\n'
               f'Command to train a new model with these hyperparameters: $ python train.py --hyp {yaml_file}')
-
+      
diff --git a/utils/datasets.py b/utils/datasets.py
index 1506682..524ef10 100644
--- a/utils/datasets.py
+++ b/utils/datasets.py
@@ -453,7 +453,8 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r
                 x[:, 0] = 0
 
         n = len(shapes)  # number of images
-        bi = np.floor(np.arange(n) / batch_size).astype(np.int)  # batch index
+        bi = np.floor(np.arange(n) / batch_size).astype(int)  # batch index
+  # batch index
         nb = bi[-1] + 1  # number of batches
         self.batch = bi  # batch index of image
         self.n = n
@@ -481,7 +482,7 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r
                 elif mini > 1:
                     shapes[i] = [1, 1 / mini]
 
-            self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride
+            self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(int) * stride
 
         # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
         self.imgs = [None] * n
diff --git a/utils/loss.py b/utils/loss.py
index 080b51c..9557309 100644
--- a/utils/loss.py
+++ b/utils/loss.py
@@ -208,7 +208,7 @@ def build_targets(self, p, targets):
 
             # Append
             a = t[:, 6].long()  # anchor indices
-            indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1)))  # image, anchor, grid indices
+            indices.append((b, a, gj.clamp_(0, int(gain[3].item()) - 1), gi.clamp_(0, int(gain[2].item()) - 1)))  # image, anchor, grid indices  # image, anchor, grid indices
             tbox.append(torch.cat((gxy - gij, gwh), 1))  # box
             anch.append(anchors[a])  # anchors
             tcls.append(c)  # class