diff --git a/autogpt/agent/agent.py b/autogpt/agent/agent.py
index 01a1b133..2a8f32ff 100644
--- a/autogpt/agent/agent.py
+++ b/autogpt/agent/agent.py
@@ -70,7 +70,7 @@ class Agent:
     ):
         self.ai_name = ai_name
         self.memory = memory
-        self.history = MessageHistory(self)
+        self.history = MessageHistory.for_model(config.smart_llm, agent=self)
         self.next_action_count = next_action_count
         self.command_registry = command_registry
         self.config = config
diff --git a/autogpt/llm/base.py b/autogpt/llm/base.py
index 4ff80dc7..5cf4993f 100644
--- a/autogpt/llm/base.py
+++ b/autogpt/llm/base.py
@@ -1,13 +1,14 @@
 from __future__ import annotations
 
+from copy import deepcopy
 from dataclasses import dataclass, field
 from math import ceil, floor
-from typing import TYPE_CHECKING, List, Literal, Optional, TypedDict
+from typing import TYPE_CHECKING, Literal, Optional, Type, TypedDict, TypeVar, overload
 
 if TYPE_CHECKING:
     from autogpt.llm.providers.openai import OpenAIFunctionCall
 
-MessageRole = Literal["system", "user", "assistant"]
+MessageRole = Literal["system", "user", "assistant", "function"]
 MessageType = Literal["ai_response", "action_result"]
 
 TText = list[int]
@@ -68,15 +69,31 @@ class EmbeddingModelInfo(ModelInfo):
     embedding_dimensions: int
 
 
+# Can be replaced by Self in Python 3.11
+TChatSequence = TypeVar("TChatSequence", bound="ChatSequence")
+
+
 @dataclass
 class ChatSequence:
     """Utility container for a chat sequence"""
 
     model: ChatModelInfo
-    messages: list[Message] = field(default_factory=list)
+    messages: list[Message] = field(default_factory=list[Message])
 
-    def __getitem__(self, i: int):
-        return self.messages[i]
+    @overload
+    def __getitem__(self, key: int) -> Message:
+        ...
+
+    @overload
+    def __getitem__(self: TChatSequence, key: slice) -> TChatSequence:
+        ...
+
+    def __getitem__(self: TChatSequence, key: int | slice) -> Message | TChatSequence:
+        if isinstance(key, slice):
+            copy = deepcopy(self)
+            copy.messages = self.messages[key]
+            return copy
+        return self.messages[key]
 
     def __iter__(self):
         return iter(self.messages)
@@ -84,6 +101,14 @@ class ChatSequence:
     def __len__(self):
         return len(self.messages)
 
+    def add(
+        self,
+        message_role: MessageRole,
+        content: str,
+        type: MessageType | None = None,
+    ) -> None:
+        self.append(Message(message_role, content, type))
+
     def append(self, message: Message):
         return self.messages.append(message)
 
@@ -95,21 +120,23 @@ class ChatSequence:
             self.messages.insert(index, message)
 
     @classmethod
-    def for_model(cls, model_name: str, messages: list[Message] | ChatSequence = []):
+    def for_model(
+        cls: Type[TChatSequence],
+        model_name: str,
+        messages: list[Message] | ChatSequence = [],
+        **kwargs,
+    ) -> TChatSequence:
         from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS
 
         if not model_name in OPEN_AI_CHAT_MODELS:
             raise ValueError(f"Unknown chat model '{model_name}'")
 
-        return ChatSequence(
-            model=OPEN_AI_CHAT_MODELS[model_name], messages=list(messages)
+        return cls(
+            model=OPEN_AI_CHAT_MODELS[model_name], messages=list(messages), **kwargs
         )
 
-    def add(self, message_role: MessageRole, content: str):
-        self.messages.append(Message(message_role, content))
-
     @property
-    def token_length(self):
+    def token_length(self) -> int:
         from autogpt.llm.utils import count_message_tokens
 
         return count_message_tokens(self.messages, self.model.name)
@@ -128,7 +155,7 @@ class ChatSequence:
             [f"{separator(m.role)}\n{m.content}" for m in self.messages]
         )
         return f"""
-============== ChatSequence ==============
+============== {__class__.__name__} ==============
 Length: {self.token_length} tokens; {len(self.messages)} messages
 {formatted_messages}
 ==========================================
@@ -148,7 +175,7 @@ class LLMResponse:
 class EmbeddingModelResponse(LLMResponse):
     """Standard response struct for a response from an embedding model."""
 
-    embedding: List[float] = field(default_factory=list)
+    embedding: list[float] = field(default_factory=list)
 
     def __post_init__(self):
         if self.completion_tokens_used:
diff --git a/autogpt/llm/chat.py b/autogpt/llm/chat.py
index e97b7936..cc04eeb1 100644
--- a/autogpt/llm/chat.py
+++ b/autogpt/llm/chat.py
@@ -93,13 +93,13 @@ def chat_with_ai(
 
     # Account for user input (appended later)
     user_input_msg = Message("user", triggering_prompt)
-    current_tokens_used += count_message_tokens([user_input_msg], model)
+    current_tokens_used += count_message_tokens(user_input_msg, model)
 
-    current_tokens_used += 500  # Reserve space for new_summary_message
+    current_tokens_used += agent.history.max_summary_tlength  # Reserve space
     current_tokens_used += 500  # Reserve space for the openai functions TODO improve
 
     # Add Messages until the token limit is reached or there are no more messages to add.
-    for cycle in reversed(list(agent.history.per_cycle(agent.config))):
+    for cycle in reversed(list(agent.history.per_cycle())):
         messages_to_add = [msg for msg in cycle if msg is not None]
         tokens_to_add = count_message_tokens(messages_to_add, model)
         if current_tokens_used + tokens_to_add > send_token_limit:
@@ -115,9 +115,9 @@ def chat_with_ai(
         new_summary_message, trimmed_messages = agent.history.trim_messages(
             current_message_chain=list(message_sequence), config=agent.config
         )
-        tokens_to_add = count_message_tokens([new_summary_message], model)
+        tokens_to_add = count_message_tokens(new_summary_message, model)
         message_sequence.insert(insertion_index, new_summary_message)
-        current_tokens_used += tokens_to_add - 500
+        current_tokens_used += tokens_to_add - agent.history.max_summary_tlength
 
         # FIXME: uncomment when memory is back in use
         # memory_store = get_memory(config)
@@ -143,7 +143,7 @@ def chat_with_ai(
         )
         logger.debug(budget_message)
         message_sequence.add("system", budget_message)
-        current_tokens_used += count_message_tokens([message_sequence[-1]], model)
+        current_tokens_used += count_message_tokens(message_sequence[-1], model)
 
     # Append user input, the length of this is accounted for above
     message_sequence.append(user_input_msg)
@@ -157,9 +157,7 @@ def chat_with_ai(
         )
         if not plugin_response or plugin_response == "":
             continue
-        tokens_to_add = count_message_tokens(
-            [Message("system", plugin_response)], model
-        )
+        tokens_to_add = count_message_tokens(Message("system", plugin_response), model)
         if current_tokens_used + tokens_to_add > send_token_limit:
             logger.debug(f"Plugin response too long, skipping: {plugin_response}")
             logger.debug(f"Plugins remaining at stop: {plugin_count - i}")
diff --git a/autogpt/llm/utils/token_counter.py b/autogpt/llm/utils/token_counter.py
index e34dbd1c..b0791e65 100644
--- a/autogpt/llm/utils/token_counter.py
+++ b/autogpt/llm/utils/token_counter.py
@@ -1,7 +1,7 @@
 """Functions for counting the number of tokens in a message or string."""
 from __future__ import annotations
 
-from typing import List
+from typing import List, overload
 
 import tiktoken
 
@@ -9,8 +9,18 @@ from autogpt.llm.base import Message
 from autogpt.logs import logger
 
 
+@overload
+def count_message_tokens(messages: Message, model: str = "gpt-3.5-turbo") -> int:
+    ...
+
+
+@overload
+def count_message_tokens(messages: List[Message], model: str = "gpt-3.5-turbo") -> int:
+    ...
+
+
 def count_message_tokens(
-    messages: List[Message], model: str = "gpt-3.5-turbo-0301"
+    messages: Message | List[Message], model: str = "gpt-3.5-turbo"
 ) -> int:
     """
     Returns the number of tokens used by a list of messages.
@@ -24,6 +34,9 @@ def count_message_tokens(
     Returns:
         int: The number of tokens used by the list of messages.
     """
+    if isinstance(messages, Message):
+        messages = [messages]
+
     if model.startswith("gpt-3.5-turbo"):
         tokens_per_message = (
             4  # every message follows <|start|>{role/name}\n{content}<|end|>\n
diff --git a/autogpt/memory/message_history.py b/autogpt/memory/message_history.py
index c7e8b73a..2459e1f5 100644
--- a/autogpt/memory/message_history.py
+++ b/autogpt/memory/message_history.py
@@ -2,49 +2,46 @@ from __future__ import annotations
 
 import copy
 import json
-from dataclasses import dataclass, field
-from typing import TYPE_CHECKING
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Optional
 
 if TYPE_CHECKING:
     from autogpt.agent import Agent
 
 from autogpt.config import Config
 from autogpt.json_utils.utilities import extract_json_from_response
-from autogpt.llm.base import ChatSequence, Message, MessageRole, MessageType
+from autogpt.llm.base import ChatSequence, Message
 from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS
-from autogpt.llm.utils import count_string_tokens, create_chat_completion
+from autogpt.llm.utils import (
+    count_message_tokens,
+    count_string_tokens,
+    create_chat_completion,
+)
 from autogpt.log_cycle.log_cycle import PROMPT_SUMMARY_FILE_NAME, SUMMARY_FILE_NAME
 from autogpt.logs import logger
 
 
 @dataclass
-class MessageHistory:
-    agent: Agent
-
-    messages: list[Message] = field(default_factory=list)
+class MessageHistory(ChatSequence):
+    max_summary_tlength: int = 500
+    agent: Optional[Agent] = None
     summary: str = "I was created"
-
     last_trimmed_index: int = 0
 
-    def __getitem__(self, i: int):
-        return self.messages[i]
+    SUMMARIZATION_PROMPT = '''Your task is to create a concise running summary of actions and information results in the provided text, focusing on key and potentially important information to remember.
 
-    def __iter__(self):
-        return iter(self.messages)
+You will receive the current summary and your latest actions. Combine them, adding relevant key information from the latest development in 1st person past tense and keeping the summary concise.
 
-    def __len__(self):
-        return len(self.messages)
+Summary So Far:
+"""
+{summary}
+"""
 
-    def add(
-        self,
-        role: MessageRole,
-        content: str,
-        type: MessageType | None = None,
-    ):
-        return self.append(Message(role, content, type))
-
-    def append(self, message: Message):
-        return self.messages.append(message)
+Latest Development:
+"""
+{new_events}
+"""
+'''
 
     def trim_messages(
         self, current_message_chain: list[Message], config: Config
@@ -84,7 +81,7 @@ class MessageHistory:
 
         return new_summary_message, new_messages_not_in_chain
 
-    def per_cycle(self, config: Config, messages: list[Message] | None = None):
+    def per_cycle(self, messages: list[Message] | None = None):
         """
         Yields:
             Message: a message containing user input
@@ -119,26 +116,33 @@ class MessageHistory:
         )
 
     def update_running_summary(
-        self, new_events: list[Message], config: Config
+        self,
+        new_events: list[Message],
+        config: Config,
+        max_summary_length: Optional[int] = None,
     ) -> Message:
         """
-        This function takes a list of dictionaries representing new events and combines them with the current summary,
-        focusing on key and potentially important information to remember. The updated summary is returned in a message
-        formatted in the 1st person past tense.
+        This function takes a list of Message objects and updates the running summary
+        to include the events they describe. The updated summary is returned
+        in a Message formatted in the 1st person past tense.
 
         Args:
-            new_events (List[Dict]): A list of dictionaries containing the latest events to be added to the summary.
+            new_events: A list of Messages containing the latest events to be added to the summary.
 
         Returns:
-            str: A message containing the updated summary of actions, formatted in the 1st person past tense.
+            Message: a Message containing the updated running summary.
 
         Example:
+            ```py
             new_events = [{"event": "entered the kitchen."}, {"event": "found a scrawled note with the number 7"}]
             update_running_summary(new_events)
             # Returns: "This reminds you of these events from your past: \nI entered the kitchen and found a scrawled note saying 7."
+            ```
         """
         if not new_events:
             return self.summary_message()
+        if not max_summary_length:
+            max_summary_length = self.max_summary_tlength
 
         # Create a copy of the new_events list to prevent modifying the original list
         new_events = copy.deepcopy(new_events)
@@ -166,29 +170,29 @@ class MessageHistory:
             elif event.role == "user":
                 new_events.remove(event)
 
-        # Summarize events and current summary in batch to a new running summary
+        summ_model = OPEN_AI_CHAT_MODELS[config.fast_llm]
 
-        # Assume an upper bound length for the summary prompt template, i.e. Your task is to create a concise running summary...., in summarize_batch func
-        # TODO make this default dynamic
-        prompt_template_length = 100
-        max_tokens = OPEN_AI_CHAT_MODELS.get(config.fast_llm).max_tokens
-        summary_tlength = count_string_tokens(str(self.summary), config.fast_llm)
+        # Determine token lengths for use in batching
+        prompt_template_length = len(
+            MessageHistory.SUMMARIZATION_PROMPT.format(summary="", new_events="")
+        )
+        max_input_tokens = summ_model.max_tokens - max_summary_length
+        summary_tlength = count_string_tokens(self.summary, summ_model.name)
         batch = []
         batch_tlength = 0
 
-        # TODO Can put a cap on length of total new events and drop some previous events to save API cost, but need to think thru more how to do it without losing the context
+        # TODO: Put a cap on length of total new events and drop some previous events to
+        # save API cost. Need to think thru more how to do it without losing the context.
         for event in new_events:
-            event_tlength = count_string_tokens(str(event), config.fast_llm)
+            event_tlength = count_message_tokens(event, summ_model.name)
 
             if (
                 batch_tlength + event_tlength
-                > max_tokens - prompt_template_length - summary_tlength
+                > max_input_tokens - prompt_template_length - summary_tlength
             ):
                 # The batch is full. Summarize it and start a new one.
-                self.summarize_batch(batch, config)
-                summary_tlength = count_string_tokens(
-                    str(self.summary), config.fast_llm
-                )
+                self.summarize_batch(batch, config, max_summary_length)
+                summary_tlength = count_string_tokens(self.summary, summ_model.name)
                 batch = [event]
                 batch_tlength = event_tlength
             else:
@@ -197,41 +201,36 @@ class MessageHistory:
 
         if batch:
             # There's an unprocessed batch. Summarize it.
-            self.summarize_batch(batch, config)
+            self.summarize_batch(batch, config, max_summary_length)
 
         return self.summary_message()
 
-    def summarize_batch(self, new_events_batch, config):
-        prompt = f'''Your task is to create a concise running summary of actions and information results in the provided text, focusing on key and potentially important information to remember.
-
-You will receive the current summary and your latest actions. Combine them, adding relevant key information from the latest development in 1st person past tense and keeping the summary concise.
-
-Summary So Far:
-"""
-{self.summary}
-"""
-
-Latest Development:
-"""
-{new_events_batch or "Nothing new happened."}
-"""
-'''
+    def summarize_batch(
+        self, new_events_batch: list[Message], config: Config, max_output_length: int
+    ):
+        prompt = MessageHistory.SUMMARIZATION_PROMPT.format(
+            summary=self.summary, new_events=new_events_batch
+        )
 
         prompt = ChatSequence.for_model(config.fast_llm, [Message("user", prompt)])
-        self.agent.log_cycle_handler.log_cycle(
-            self.agent.ai_name,
-            self.agent.created_at,
-            self.agent.cycle_count,
-            prompt.raw(),
-            PROMPT_SUMMARY_FILE_NAME,
-        )
+        if self.agent:
+            self.agent.log_cycle_handler.log_cycle(
+                self.agent.ai_config.ai_name,
+                self.agent.created_at,
+                self.agent.cycle_count,
+                prompt.raw(),
+                PROMPT_SUMMARY_FILE_NAME,
+            )
 
-        self.summary = create_chat_completion(prompt, config).content
+        self.summary = create_chat_completion(
+            prompt, config, max_tokens=max_output_length
+        ).content
 
-        self.agent.log_cycle_handler.log_cycle(
-            self.agent.ai_name,
-            self.agent.created_at,
-            self.agent.cycle_count,
-            self.summary,
-            SUMMARY_FILE_NAME,
-        )
+        if self.agent:
+            self.agent.log_cycle_handler.log_cycle(
+                self.agent.ai_config.ai_name,
+                self.agent.created_at,
+                self.agent.cycle_count,
+                self.summary,
+                SUMMARY_FILE_NAME,
+            )
diff --git a/tests/unit/test_message_history.py b/tests/unit/test_message_history.py
index 8ceee63f..9b275252 100644
--- a/tests/unit/test_message_history.py
+++ b/tests/unit/test_message_history.py
@@ -38,8 +38,8 @@ def agent(config: Config):
     return agent
 
 
-def test_message_history_batch_summary(mocker, agent, config):
-    history = MessageHistory(agent)
+def test_message_history_batch_summary(mocker, agent: Agent, config: Config):
+    history = MessageHistory.for_model(agent.config.smart_llm, agent=agent)
     model = config.fast_llm
     message_tlength = 0
     message_count = 0
@@ -48,7 +48,7 @@ def test_message_history_batch_summary(mocker, agent, config):
     mock_summary_response = ChatModelResponse(
         model_info=OPEN_AI_CHAT_MODELS[model],
         content="I executed browse_website command for each of the websites returned from Google search, but none of them have any job openings.",
-        function_call={},
+        function_call=None,
     )
     mock_summary = mocker.patch(
         "autogpt.memory.message_history.create_chat_completion",
@@ -105,7 +105,7 @@ def test_message_history_batch_summary(mocker, agent, config):
         result = (
             "Command browse_website returned: Answer gathered from website: The text in job"
             + str(i)
-            + " does not provide information on specific job requirements or a job URL.]",
+            + " does not provide information on specific job requirements or a job URL.]"
         )
         msg = Message("system", result, "action_result")
         history.append(msg)
@@ -117,7 +117,7 @@ def test_message_history_batch_summary(mocker, agent, config):
         history.append(user_input_msg)
 
     # only take the last cycle of the message history,  trim the rest of previous messages, and generate a summary for them
-    for cycle in reversed(list(history.per_cycle(config))):
+    for cycle in reversed(list(history.per_cycle())):
         messages_to_add = [msg for msg in cycle if msg is not None]
         message_sequence.insert(insertion_index, *messages_to_add)
         break
@@ -134,7 +134,7 @@ def test_message_history_batch_summary(mocker, agent, config):
     )
 
     expected_call_count = math.ceil(
-        message_tlength / (OPEN_AI_CHAT_MODELS.get(config.fast_llm).max_tokens)
+        message_tlength / (OPEN_AI_CHAT_MODELS[config.fast_llm].max_tokens)
     )
     # Expecting 2 batches because of over max token
     assert mock_summary.call_count == expected_call_count  # 2 at the time of writing