Line Following Robot with Deep Learning

6 min readApr 21, 2023

Create a node to move Turtlebot3 to take and save images into USB with Python code.

#!/usr/bin/env python
import cv2
import rospy
from geometry_msgs.msg import Twist
import time
import numpy as np
for i in range(0,100):
print ('hello')
cap =cv2.VideoCapture(4)
time.sleep(2)
print(cap.isOpened())
if not cap.isOpened():
print('Cannot open camera')
while True:
ret, frame = cap.read()
print("camera is accessed")
if not ret:
break
cv2.imshow('frame', frame)
k = cv2.waitKey(1)
if k == 27:
break
try:
check, frame = cap.read()
print(check)
print(frame)
cv2.imshow("Capturing", frame)
key = cv2.waitKey(1)
if key == ord('s'):
cv2.imwrite(filename ='/media/pi/SP UFD
U3/photos/test/Croped'+ str(i) + '.jpg', img=frame)
cap.release()
img_ = cv2.imread('/media/pi/SP UFD
U3/photos/test/Croped'+ str(i) + '.jpg', cv2.IMREAD_ANYCOLOR)
img_ = cv2.resize(img_, (100,100))
img_ = img_[80:100, 40:70] #[y from up:, x from
left:]
print("Croped...")
img_resized = cv2.imwrite(filename='/media/pi/SP UFD
U3/photos/test/Croped'+ str(i) + '.jpg', img=img_)
cv2.imshow('/media/pi/SP UFD U3/photos/test/Croped'+
str(i) + '.jpg', img_resized)
print('2s waiting')
time.sleep(2)
print("Image %s saved"%i)
breakelif key == ord('q'):
cap.release()
cv2.destroyAllWindows()
break
except (KeyboardInterrupt):
print("Turning off camera.")
cap.release()
print("Camera off.")
print ("Program ended.")
cv2.destroyAllWindows()
break
cap.release()
cv2.destroyAllWindows()
i+=1
rospy.init_node('topic_publisher')
pub = rospy.Publisher('/cmd_vel', Twist, queue_size=1)
rate = rospy.Rate(2)
move = Twist()
move.linear.x = -0.05
move.angular.z = 0.0
pub.publish(move)
rate.sleep()
move.linear.x = 0.0
move.angular.z = 0.0
pub.publish(move)
print(i)

Train the LeNet-5 with robot’s dataset.

# import the necessary packages
from keras.models import Sequential
from keras.layers.convolutional import Conv2D, MaxPooling2D 
from keras.layers.core import Activation, Flatten, Dense
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf
from tensorflow.keras.utils import img_to_array #In Keras Documentation V2.9.0, In tf version 2.9.0 the img_to_array moved to utlis
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from imutils import paths
import numpy as np
import argparse
import random
import cv2
import os
import matplotlib
import matplotlib.pyplot as plt

class LeNet:
  @staticmethod
  def build(width, height, depth, classes):
    # initialize the model
    model = Sequential()
    inputShape = (height, width, depth)
    
    # first set of CONV => RELU => POOL layers
    model.add(Conv2D(20, (5, 5), padding="same",
      input_shape=inputShape))
    model.add(Activation("relu"))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    
    # second set of CONV => RELU => POOL layers
    model.add(Conv2D(50, (5, 5), padding="same"))
    model.add(Activation("relu"))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    
    # first (and only) set of FC => RELU layers
    model.add(Flatten())
    model.add(Dense(500))
    model.add(Activation("relu"))
    
    # softmax classifier
    model.add(Dense(classes))
    model.add(Activation("softmax"))
    
    # return the constructed network architecture
    return model
    
    
dataset = 'H://photos//test'
# initialize the data and labels
print("[INFO] loading images...")
data = []
labels = []
 
# grab the image paths and randomly shuffle them
imagePaths = sorted(list(paths.list_images(dataset)))
random.seed(42)
random.shuffle(imagePaths)
# loop over the input images
for imagePath in imagePaths:
    # load the image, pre-process it, and store it in the data list
    image = cv2.imread(imagePath)
    image = cv2.resize(image, (28, 28))
    image = img_to_array(image)
    data.append(image)# extract the class label from the image path and update the
    # labels list
    label = imagePath.split(os.path.sep)[-2]
    print(label)
    if label == 'forward':
        label = 0
    elif label == 'right':
        label = 1
    #elif label == 'left':
     #   label = 2
    else:
        label = 2
    labels.append(label)
    
    
# scale the raw pixel intensities to the range [0, 1]
data = np.array(data, dtype="float") / 255.0
labels = np.array(labels)
 
# partition the data into training and testing splits using 75% of
# the data for training and the remaining 25% for testing
(trainX, testX, trainY, testY) = train_test_split(data,
    labels, test_size=0.25, random_state=42)# convert the labels from integers to vectors
trainY = to_categorical(trainY, num_classes=3)
testY = to_categorical(testY, num_classes=3)


# initialize the number of epochs to train for, initial learning rate,
# and batch size
#EPOCHS=15
EPOCHS = 100
INIT_LR = 1e-3
BS = 32# initialize the model
print("[INFO] compiling model...")
model = LeNet.build(width=28, height=28, depth=3, classes=3)
opt = Adam(learning_rate=INIT_LR, decay=INIT_LR / EPOCHS)
model.compile(loss="binary_crossentropy", optimizer=opt,
    metrics=["acc"])
 
# train the network
print("[INFO] training network...")
history = model.fit(trainX, trainY, batch_size=BS,
    validation_data=(testX, testY), steps_per_epoch=len(trainX) // BS,
    epochs=EPOCHS, verbose=1)#(more info for model.fit():"https://keras.io/api/models/model_training_apis/"
 
# save the model to usb
print("[INFO] serializing network...")
model.save("H://photos//model")

model.summary()

plt.xlabel('Epoch Number')
plt.ylabel("Loss / Accuracy Magnitude")
plt.plot(history.history['loss'], label="loss")
plt.plot(history.history['acc'], label="acc")
plt.plot(history.history['val_loss'], label="val_loss")
plt.plot(history.history['val_acc'], label="val_acc")
plt.legend()
plt.show()

Model summary

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 conv2d_2 (Conv2D)           (None, 28, 28, 20)        1520      
                                                                 
 activation_4 (Activation)   (None, 28, 28, 20)        0         
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 14, 14, 20)       0         
 2D)                                                             
                                                                 
 conv2d_3 (Conv2D)           (None, 14, 14, 50)        25050     
                                                                 
 activation_5 (Activation)   (None, 14, 14, 50)        0         
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 7, 7, 50)         0         
 2D)                                                             
                                                                 
 flatten_1 (Flatten)         (None, 2450)              0         
                                                                 
 dense_2 (Dense)             (None, 500)               1225500   
                                                                 
 activation_6 (Activation)   (None, 500)               0         
                                                                 
 dense_3 (Dense)             (None, 3)                 1503      
                                                                 
 activation_7 (Activation)   (None, 3)                 0         
                                                                 
=================================================================
Total params: 1,253,573
Trainable params: 1,253,573
Non-trainable params: 0
_________________________________________________________________

What are loss and val_loss?

In deep learning, the loss is the value that a neural network is trying to minimize: it’s the distance between the ground truth and the predictions. In order to minimize this distance, the neural network learns by adjusting weights and biases in a manner that reduces the loss.

For instance, in regression tasks, you have a continuous target, e.g., height. What you want to minimize is the difference between your predictions, and the actual height. You can use mean_absolute_error as loss so the neural network knows this is what it needs to minimize.

In classification, it’s a little more complicated, but very similar. Predicted classes are based on probability. The loss is therefore also based on probability. In classification, the neural network minimizes the likelihood to assign a low probability to the actual class. The loss is typically categorical_crossentropy.

loss and val_loss differ because the former is applied to the train set, and the latter the test set. As such, the latter is a good indication of how the model performs on unseen data. You can get a validation set by using validation_data=[x_test, y_test] or validation_split=0.2.

It’s best to rely on the val_loss to prevent overfitting. Overfitting is when the model fits the training data too closely, and the loss keeps decreasing while the val_loss is stale, or increases.

What are acc and val_acc?

Accuracy is a metric only for classification. It makes no sense on a task with a continuous target. It gives the percentage of instances that are correctly classified.

Once again, acc is on the training data, and val_acc is on the validation data. It's best to rely on val_acc for a fair representation of model performance because a good neural network will end up fitting the training data at 100%, but would perform poorly on unseen data.

as Nicolas Gervais explained here.

Test the trained model

import time
import cv2
import numpy as np
import math
import tensorflow as tf
from tensorflow.keras.utils import img_to_array
from tensorflow.keras.saving import load_model
from tensorflow.compat.v1.keras.backend import set_session
import os, sys, imutils, argparse
import matplotlib
import matplotlib.pyplot as plt

#function to control direction of robot based on prediction from CNN
def control_robot(image):
  
    prediction = np.argmax(model.predict(image))
    if prediction == 0:
        print("forward")
        #mc.forward()
    elif prediction == 2:
            print("left")
            #mc.left()
    else:
            print("right")
            #mc.right()

model = load_model("H://photos//model")
if __name__ == "__main__":
    while(True):
        #mc.stop()
        image = cv2.imread('C://Users//asus//Desktop//New folder//20.jpg')
        cv2.imshow('image', image)
        plt.imshow(image)
        image = cv2.resize(image, (28, 28))
        image = img_to_array(image)
        image = np.array(image, dtype="float") / 255.0
        image = image.reshape(-1, 28, 28, 3)
        control_robot(image)
        #rawCapture.truncate(0)
        key = cv2.waitKey(0)
        if key == ord('q'):
            cv2.destroyAllWindows()
            break

1/1 [==============================] — 0s 78ms/step
left

1/1 [==============================] — 0s 89ms/step
forward

1/1 [==============================] — 0s 85ms/step right — 1/1 [==============================] — 0s 85ms/step
right

Line Following Robot with Deep Learning

Written by Sepideh Berahimzadeh

No responses yet