Simple notebook to test sending data to Redis¶
This notebook serves as "simulation of a car" sending a data to Redis database to see if the script in 04_model_training_pipeline.ipynb
is working
Imports¶
In [1]:
Copied!
import os
import sys
sys.path.append(os.getcwd())
os.chdir("../..")
os.environ["REDIS_PASSWORD"] = "redis"
os.environ["REDIS_HOST"] = "10.152.183.250"
os.environ["REDIS_PORT"] = "6379"
import os
import sys
sys.path.append(os.getcwd())
os.chdir("../..")
os.environ["REDIS_PASSWORD"] = "redis"
os.environ["REDIS_HOST"] = "10.152.183.250"
os.environ["REDIS_PORT"] = "6379"
In [2]:
Copied!
import json
import pandas as pd
from sklearn.model_selection import train_test_split
import ast
import time
import redis
# workaround - horizontal scrollbar is not working in jupyterlab
from IPython.display import display, HTML
display(HTML("<style>.jp-OutputArea-output {display:flex}</style>"))
import json
import pandas as pd
from sklearn.model_selection import train_test_split
import ast
import time
import redis
# workaround - horizontal scrollbar is not working in jupyterlab
from IPython.display import display, HTML
display(HTML(""))
Datasets¶
In [3]:
Copied!
def load_new_dataset(num: int):
data_sets = [
"DS1_stopped_with_ignition_on_22Feb24_115812.csv",
"DS1_stopped_with_ignition_on_25Jan24_124019.csv",
"DS1_stopped_with_ignition_on_25Jan24_151531.csv",
"DS1_stopped_with_ignition_on_25Mar24_153740.CSV",
"DS2_national_road_90km_h_max_25Jan24_153019.csv",
"DS2_national_road_90km_h_max_25Mar24_133516.CSV",
"DS3_highway_120km_h_max_22Feb24_121145.csv",
"DS3_highway_120km_h_max_25Mar24_154857.csv"
]
file = "tools/vehicle/datasets/ateca_R4_2.0l_TDI/" + data_sets[num]
df = pd.read_csv(file)
df.head()
df.drop(columns=["Unnamed: 0", "Unnamed: 25"], inplace=True)
df.drop(index=0, inplace=True)
timestamp_columns = [col for col in df.columns if col.startswith("STAMP")]
# keep only the first timestamp column
df["timestamp"] = df["STAMP"]
df = df.drop(columns=timestamp_columns)
df["class"] = 0
df["vehicle_id"] = "123abc"
df.loc[:100, ["class"]] = 1
df.loc[:100, ["Normed load value"]] = 100
df[df.drop(columns=["class", "vehicle_id"]).columns] = df[df.drop(columns=["class", "vehicle_id"]).columns].astype(float)
# Remove special characters from column names
df.columns = df.columns.str.replace('[^A-Za-z0-9]+', '_', regex=True)
# add some "reasonable" timestamp for testing
df["timestamp"] = pd.to_datetime(time.time() - 3600 + df["timestamp"], unit="s")
return df
def load_old_dataset():
with open("data/log_tiguan_27_mar_dac.txt") as f:
data = ast.literal_eval(f.read())
df = pd.DataFrame()
for data_value in data:
temp_df = pd.DataFrame(data_value[list(data_value)[0]]).sort_values(
by="ts_millis:", ascending=True
)["value"]
temp_df.rename(list(data_value)[0], inplace=True)
df = pd.concat([df, temp_df], axis=1)
df.dropna(inplace=True)
df["class"] = 0
df["vehicle_id"] = "123abc"
df.loc[:100, ["class"]] = 1
df.loc[:100, ["engine_load"]] = 100
# add some "reasonable" timestamp for testing
df["timestamp"] = pd.to_datetime([time.time() - 3600 + ix for ix in list(df.index)], unit="s")
return df
def load_new_dataset(num: int):
data_sets = [
"DS1_stopped_with_ignition_on_22Feb24_115812.csv",
"DS1_stopped_with_ignition_on_25Jan24_124019.csv",
"DS1_stopped_with_ignition_on_25Jan24_151531.csv",
"DS1_stopped_with_ignition_on_25Mar24_153740.CSV",
"DS2_national_road_90km_h_max_25Jan24_153019.csv",
"DS2_national_road_90km_h_max_25Mar24_133516.CSV",
"DS3_highway_120km_h_max_22Feb24_121145.csv",
"DS3_highway_120km_h_max_25Mar24_154857.csv"
]
file = "tools/vehicle/datasets/ateca_R4_2.0l_TDI/" + data_sets[num]
df = pd.read_csv(file)
df.head()
df.drop(columns=["Unnamed: 0", "Unnamed: 25"], inplace=True)
df.drop(index=0, inplace=True)
timestamp_columns = [col for col in df.columns if col.startswith("STAMP")]
# keep only the first timestamp column
df["timestamp"] = df["STAMP"]
df = df.drop(columns=timestamp_columns)
df["class"] = 0
df["vehicle_id"] = "123abc"
df.loc[:100, ["class"]] = 1
df.loc[:100, ["Normed load value"]] = 100
df[df.drop(columns=["class", "vehicle_id"]).columns] = df[df.drop(columns=["class", "vehicle_id"]).columns].astype(float)
# Remove special characters from column names
df.columns = df.columns.str.replace('[^A-Za-z0-9]+', '_', regex=True)
# add some "reasonable" timestamp for testing
df["timestamp"] = pd.to_datetime(time.time() - 3600 + df["timestamp"], unit="s")
return df
def load_old_dataset():
with open("data/log_tiguan_27_mar_dac.txt") as f:
data = ast.literal_eval(f.read())
df = pd.DataFrame()
for data_value in data:
temp_df = pd.DataFrame(data_value[list(data_value)[0]]).sort_values(
by="ts_millis:", ascending=True
)["value"]
temp_df.rename(list(data_value)[0], inplace=True)
df = pd.concat([df, temp_df], axis=1)
df.dropna(inplace=True)
df["class"] = 0
df["vehicle_id"] = "123abc"
df.loc[:100, ["class"]] = 1
df.loc[:100, ["engine_load"]] = 100
# add some "reasonable" timestamp for testing
df["timestamp"] = pd.to_datetime([time.time() - 3600 + ix for ix in list(df.index)], unit="s")
return df
In [4]:
Copied!
# 1. get/create some example data
# df_pd = load_old_dataset()
df_pd = load_new_dataset(num=1)
# 1. get/create some example data
# df_pd = load_old_dataset()
df_pd = load_new_dataset(num=1)
In [5]:
Copied!
target_col = "class"
id_cols = ["vehicle_id", "timestamp"]
cat_cols = []
cont_cols = df_pd.drop(
columns=id_cols + cat_cols + [target_col]
).columns.values.tolist()
df_pd[cat_cols] = df_pd[cat_cols].astype(str)
target_col = "class"
id_cols = ["vehicle_id", "timestamp"]
cat_cols = []
cont_cols = df_pd.drop(
columns=id_cols + cat_cols + [target_col]
).columns.values.tolist()
df_pd[cat_cols] = df_pd[cat_cols].astype(str)
In [6]:
Copied!
valid_size = 0.2
test_size = 0.5
random_state = 1
df_train, df_valid = train_test_split(
df_pd, test_size=valid_size, stratify=df_pd[target_col], random_state=random_state
)
df_valid, df_test = train_test_split(
df_valid,
test_size=test_size,
stratify=df_valid[target_col],
random_state=random_state,
)
valid_size = 0.2
test_size = 0.5
random_state = 1
df_train, df_valid = train_test_split(
df_pd, test_size=valid_size, stratify=df_pd[target_col], random_state=random_state
)
df_valid, df_test = train_test_split(
df_valid,
test_size=test_size,
stratify=df_valid[target_col],
random_state=random_state,
)
In [7]:
Copied!
df_test.head()
df_test.head()
Out[7]:
Vehicle_speed | Time_since_engine_start | Normed_load_value | Accelerator_pedal_position | Engine_torque | Oil_fill_level | Engine_oil_temperature | Fuel_level | Fuel_consumption | Brake_pressure | Engaged_gear_raw_signal_Bits_0_7 | Efficiency_of_the_SCR_catalytic_converter | timestamp | class | vehicle_id | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
36 | 0.0 | 120.0 | 100.0 | 14.5 | 40.4 | 30.044 | 13.5 | 9.0 | 0.82 | -0.03 | 0.0 | 0.00430 | 2024-08-08 08:43:15.730782509 | 1 | 123abc |
126 | 0.0 | 255.0 | 27.8 | 14.5 | 36.5 | 29.924 | 19.8 | 9.0 | 0.73 | -0.03 | 0.0 | 0.04614 | 2024-08-08 08:45:30.280782461 | 0 | 123abc |
102 | 0.0 | 219.0 | 29.8 | 14.5 | 38.8 | 29.804 | 18.5 | 9.0 | 0.77 | -0.03 | 0.0 | 0.00906 | 2024-08-08 08:44:54.430782557 | 0 | 123abc |
81 | 0.0 | 187.0 | 100.0 | 14.5 | 39.6 | 29.864 | 17.1 | 9.0 | 0.80 | -0.03 | 0.0 | 0.05289 | 2024-08-08 08:44:23.020782471 | 1 | 123abc |
426 | 0.0 | 705.0 | 25.1 | 14.5 | 30.5 | 35.524 | 29.8 | 8.0 | 0.60 | -0.03 | 0.0 | 0.08121 | 2024-08-08 08:53:00.800782442 | 0 | 123abc |
In [10]:
Copied!
df_test_redis = df_test.copy()
df_test_redis["timestamp"] -= pd.to_timedelta(1, unit="h")
df_test_redis_json = df_test_redis.drop(columns=["class"]).reset_index(drop=True).to_json(orient="split")
redisClient = redis.Redis(host=os.environ["REDIS_HOST"], password=os.environ["REDIS_PASSWORD"], port=os.environ["REDIS_PORT"])
time.sleep(2) # Wait for a few seconds before publishing
redisClient.publish("idneo_v2x", df_test_redis_json)
df_test_redis = df_test.copy()
df_test_redis["timestamp"] -= pd.to_timedelta(1, unit="h")
df_test_redis_json = df_test_redis.drop(columns=["class"]).reset_index(drop=True).to_json(orient="split")
redisClient = redis.Redis(host=os.environ["REDIS_HOST"], password=os.environ["REDIS_PASSWORD"], port=os.environ["REDIS_PORT"])
time.sleep(2) # Wait for a few seconds before publishing
redisClient.publish("idneo_v2x", df_test_redis_json)
Out[10]:
2