So i have an ai that tries to guess if it should buy hold or sell stocks, but whenever i set the batch size to less than 64 it shows the progressbars but also starts filling the memory infinitly even when set to 1!, but when i set the batch size to something bigger than 64 no matter how big the batch size is it will NEVER fill up the entire ram i even tried 10.000.000.000 batch size and it ran with no issues for about 250 episodes until i stopped the code since an episode took a while. im currently running this on a laptop with a nvidia 4060 in an env on wsl2.
Here is my code if anyone can help or figure out a fix i could maybe start using this code on lambda wich i would love to be able to do
# Import necessary libraries
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.callbacks import ModelCheckpoint
import gym
from gym import spaces
import os
import sys
import io
import random
import logging
import matplotlib.pyplot as plt
import gc
import psutil
mem = psutil.virtual_memory()
print(f"Memory usage: {mem.percent}%")
# Check for available GPUs
gpus = tf.config.list_physical_devices('GPU')
if gpus:
try:
# Enable memory growth on all GPUs
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
print(f"Using GPU: {len(gpus)} available")
except RuntimeError as e:
print(e)
else:
print("No GPU found, running on CPU")
# Enable mixed precision to speed up training
tf.keras.mixed_precision.set_global_policy('mixed_float16')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0' # Show all logs (including info)
os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
logging.basicConfig(
filename='training.log',
filemode='a',
format='%(asctime)s - %(levelname)s - %(message)s',
level=logging.INFO
)
logging.info("Script started.")
# Simple AI trading environment
class TradingEnv(gym.Env):
def __init__(self, df, initial_balance=10000, window_size=120, risk_per_trade=1, stop_loss_percent=1):
super(TradingEnv, self).__init__()
self.df = df
self.current_step = 0
self.initial_balance = initial_balance
self.balance = self.initial_balance
self.num_shares = 0
self.window_size = window_size
# Action space: 0 = Hold, 1 = Buy, 2 = Sell
self.action_space = spaces.Discrete(3)
# Observation space: A window of previous prices and current balance
self.observation_space = spaces.Box(low=0, high=np.inf, shape=(window_size, 120), dtype=np.float32)
# Risk management parameters
self.risk_per_trade = risk_per_trade # Risk percentage per trade
self.stop_loss_percent = stop_loss_percent # Stop-loss percentage
def reset(self):
self.current_step = self.window_size
self.balance = self.initial_balance
self.num_shares = 0
self.entry_price = 0 # Track entry price for stop-loss
return self._next_observation()
def _next_observation(self):
window = self.df.iloc[self.current_step - self.window_size:self.current_step][['Open', 'High', 'Low', 'Close', 'Volume']].values
balance_data = np.array([[self.balance]] * self.window_size)
return np.concatenate((window, balance_data), axis=1).astype(np.float32)
def step(self, action):
current_price = self.df.iloc[self.current_step]['Close']
reward = 0
if action == 1: # Buy
# Calculate position size based on risk management
risk_amount = self.balance * self.risk_per_trade # Amount willing to risk
shares_to_buy = int(risk_amount / current_price) # Calculate shares based on risk
if self.balance >= shares_to_buy * current_price:
self.num_shares += shares_to_buy
self.balance -= shares_to_buy * current_price
self.entry_price = current_price # Set entry price for stop-loss
elif action == 2: # Sell
if self.num_shares > 0:
reward = self.num_shares * (current_price - self.entry_price) # Profit calculation
self.balance += self.num_shares * current_price
self.num_shares = 0
# Check for stop-loss condition
if self.num_shares > 0 and current_price < self.entry_price * (1 - self.stop_loss_percent):
# Execute stop-loss
reward = self.num_shares * (current_price - self.entry_price) # Loss calculation
self.balance += self.num_shares * current_price
self.num_shares = 0 # Sell all shares at stop-loss
print(f"Stop-loss triggered at step {self.current_step}, selling shares.")
# Move to the next step
self.current_step += 1
done = self.current_step >= len(self.df) - 1
return self._next_observation(), reward, done, {}
# Function to build the LSTM model with added dropout layers
def build_lstm_model(window_size, action_shape):
model = tf.keras.Sequential()
model.add(layers.LSTM(256, return_sequences=True, input_shape=(window_size, 6)))
model.add(layers.LSTM(128, return_sequences=False))
model.add(layers.BatchNormalization())
model.add(layers.Dense(96, activation='relu'))
model.add(layers.Dense(48, activation='relu'))
model.add(layers.Dense(24, activation='relu'))
model.add(layers.Dense(action_shape, activation='linear'))
model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001))
return model
# Function to save training parameters like epsilon
def save_training_parameters(epsilon, filename='training_parameters.csv'):
params = pd.DataFrame({
'epsilon': [epsilon]
})
params.to_csv(filename, index=False)
# Function to load training parameters like epsilon
def load_training_parameters(filename='training_parameters.csv'):
if os.path.exists(filename):
params = pd.read_csv(filename)
if 'epsilon' in params.columns:
return params['epsilon'][0]
return 1.0 # Default value for epsilon if no saved file exists
# Model opslaan
def save_model(model, path='trading_model.keras'):
model.save(path) # Save in the new Keras format
# Model laden
def load_model(path='trading_model.keras'):
return tf.keras.models.load_model(path)
# Status opslaan (balans, aandelenpositie)
def save_status(balance, num_shares, step, filename='trading_status.csv'):
status = pd.DataFrame({
'balance': [balance],
'num_shares': [num_shares],
'step': [step]
})
status.to_csv(filename, index=False)
# Status laden
def load_status(filename='trading_status.csv'):
if os.path.exists(filename):
status = pd.read_csv(filename)
return status['balance'][0], status['num_shares'][0], status['step'][0]
else:
return 10000, 0, 0 # Beginwaardes
# Historische data laden
def load_data(csv_file):
df = pd.read_csv(csv_file)
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index('timestamp', inplace=True) # Set 'timestamp' as index
df = df[['Open', 'High', 'Low', 'Close', 'Volume']] # Ignore 'Adj Close'
return df
# New function to pick a random file from a folder
def pick_random_file(folder):
files = os.listdir(folder) # Get all files in the folder
csv_files = [f for f in files if f.endswith('.csv')] # Filter for CSV files
if not csv_files:
raise Exception("No CSV files found in the folder.")
return os.path.join(folder, random.choice(csv_files)) # Choose a random file and return its path
# Constants
BATCH_SIZE = 1000000000 # Batch size for training
GAMMA = 0.95 # Discount factor for future rewards
# Define the training step function outside of the main loop
def train_step(model, states_array, q_values):
return model.train_on_batch(states_array, q_values)
def main():
print("Starting the training process...", flush=True)
checkpoint_filepath = 'best_model.weights.h5'
model_checkpoint_callback = ModelCheckpoint(
filepath=checkpoint_filepath,
save_weights_only=True,
monitor='loss',
mode='min',
save_best_only=True
)
episode_count = 0
csv_change_frequency = 100 # Change CSV every 100 episodes
folder_path = 'DATA'
# Load the first CSV file
current_csv = pick_random_file(folder_path)
df = load_data(current_csv)
env = TradingEnv(df)
# Log the first CSV file being used
logging.info(f"Using CSV: {current_csv} for the first 100 episodes.")
# Load previous training parameters if they exist
epsilon = load_training_parameters()
# Load or initialize model
model = None
if os.path.exists('trading_model.keras'):
model = load_model('trading_model.keras')
print("Loaded existing model.", flush=True)
else:
model = build_lstm_model(env.window_size, env.action_space.n)
print("Created new model.", flush=True)
# Initialize replay memory
replay_memory = []
print(f"Started Training", flush=True)
# Training loop
for episode in range(10000): # Number of episodes for training
episode_count += 1
# Switch CSV file every 100 episodes
if episode_count % csv_change_frequency == 0:
current_csv = pick_random_file(folder_path) # Pick a new CSV
df = load_data(current_csv)
env = TradingEnv(df)
logging.info(f"Switched to new CSV: {current_csv} at episode {episode + 1}")
print(f"Starting episode {episode + 1}...", flush=True)
state = env.reset()
done = False
total_reward = 0
step_count = 0 # Initialize step counter
while not done:
step_count += 1 # Increment step counter
# Epsilon-greedy action selection
if np.random.rand() < epsilon:
action = env.action_space.sample() # Explore
else:
q_values = model.predict(state.reshape(1, env.window_size, 6), verbose=0)
action = np.argmax(q_values[0]) # Exploit
next_state, reward, done, _ = env.step(action)
replay_memory.append((state, action, reward, next_state, done))
if len(replay_memory) > 128:
replay_memory.pop(0) # Maintain the replay memory size
# Update the state
state = next_state
total_reward += reward
# Training the model with samples from the replay memory
if len(replay_memory) >= BATCH_SIZE:
batch = random.sample(replay_memory, BATCH_SIZE)
states_array = np.array([item[0] for item in batch])
actions_array = np.array([item[1] for item in batch])
rewards_array = np.array([item[2] for item in batch])
next_states_array = np.array([item[3] for item in batch])
dones_array = np.array([item[4] for item in batch])
# Q-value calculation
q_values_next = model.predict(next_states_array)
q_values_target = rewards_array + GAMMA * np.max(q_values_next, axis=1) * (1 - dones_array)
# Update Q-values
q_values = model.predict(states_array)
q_values[np.arange(BATCH_SIZE), actions_array] = q_values_target
# Train the model
train_step(model, states_array, q_values) # Call the training step
# Update epsilon value after each episode
epsilon = max(0.01, epsilon * 0.9999)
# Save model and status
save_model(model)
save_training_parameters(epsilon)
save_status(env.balance, env.num_shares, episode)
# Print progress
logging.info(f"Episode {episode + 1} finished in {step_count} steps with total reward: {total_reward}, Epsilon: {epsilon}, Using CSV: {current_csv}")
gc.collect()
# Run the main function
if __name__ == "__main__":
main()
the csv is formatted like this
timestamp,Open,High,Low,Close,Adj Close,Volume