diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..6609e2b52a8167d7633d53087d93804b72ffd227 --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +*env/ +.env + +*.egg-* +*.pyc +*.txt +config.txt diff --git a/Dockerfile b/Dockerfile index 5910a680d795db4a6b5d7b7c7e7fc7de7ceccca3..6349f47c227493952580cee4888e07e93c5376f4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,23 +1,25 @@ -FROM jupyter/scipy-notebook:hub-1.5.0 +FROM jupyter/datascience-notebook:hub-3.1.1 -# Install from APT repository USER root -RUN apt-get update -y -RUN apt-get install -y git libpq-dev gcc -# Set working directory -WORKDIR /usr/src/app +COPY requirements.txt environment.yml /tmp/ +RUN conda env update -q -f /tmp/environment.yml && \ + /opt/conda/bin/pip install -r /tmp/requirements.txt && \ + conda clean -y --all && \ + conda env export -n "root" && \ + jupyter lab build -# Install basics -USER jovyan -RUN pip3 install --upgrade pip -COPY .env .env +COPY dash_proxy /tmp/dash_proxy/ +RUN pip install /tmp/dash_proxy/ -# Install python packages -COPY requirements.txt requirements.txt -RUN pip install --no-cache-dir -r requirements.txt -ENV IPYTHONDIR /usr/src/app/ipython/ -WORKDIR /usr/src/app/ipython/profile_default/startup/ -COPY python_startup/ ./ -WORKDIR /home/ +COPY llm_utils /llm_utils/ +RUN pip install /llm_utils/ +ENV CONFIG_PATH=/home/jovyan/config.txt +COPY app /dash/app/ +RUN chown -R jovyan /dash/app/ + +# install some NLTK and spaCy data +RUN python -m nltk.downloader stopwords +RUN python -m nltk.downloader wordnet +RUN python -m spacy download en_core_web_sm diff --git a/README.md b/README.md index 38011d6a37fc8d8cec424c34a4e464afbffda1cd..ef4faa61b32056f2b8678950612ad916b456bf0e 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,3 @@ -# bücker +# A Jupyterlab for LLM +In order to run Dash or use the client, AZURE_OPENAI_API_KEY, AZURE_OPENAI_ENDPOINT, OPENAI_API_VERSION need to be stored in a config.txt file in the home directory. diff --git a/app/app.py b/app/app.py new file mode 100644 index 0000000000000000000000000000000000000000..b43ae3ac69e2d41ac5bf45446abf5d36aa5d7996 --- /dev/null +++ b/app/app.py @@ -0,0 +1,65 @@ +import sys +sys.path.append("/home/jovyan/") + +import argparse +import logging + +from urllib.parse import urlparse, urljoin + +from dash import Dash + +from jupyter_server.serverapp import list_running_servers + +from layout import layout +from callbacks import register_callbacks + +logging.basicConfig(level=logging.INFO) + +# weird trick to find base_url for the jupyterlab +def find_jupyterlab_base_url(): + servers = list_running_servers() + for server in servers: + if server["port"] == 8888: + return server['url'] + return None + + +# get the correct port from proxy +parser = argparse.ArgumentParser() +parser.add_argument("--port", type=int) +args = parser.parse_args() +port: int = args.port + +if not port: + raise ValueError(f"Port of proxy server for Dash not found in {args}.") +else: + logging.debug(f"Dash app running on port {port}.") + + +base_url = find_jupyterlab_base_url() +if base_url is None: + raise ValueError("Base URL of Jupyterlab could not be detected.") +logging.debug(f"Base URL: {base_url}") + +proxy_base_path = urlparse(urljoin(base_url + "/", f"proxy/{port}/")).path +logging.debug(f"Proxy base path: {proxy_base_path}") + +# define Dash app +app = Dash( + name=__name__, + requests_pathname_prefix=proxy_base_path +) + +# define layout +app.layout = layout + +# register all callback functions +register_callbacks(app=app) + +# Run Dash app in the notebook +app.run( + jupyter_mode="jupyterlab", + port=port, + host="0.0.0.0", + debug=True +) diff --git a/app/callbacks.py b/app/callbacks.py new file mode 100644 index 0000000000000000000000000000000000000000..9a61c5cb2d483203df434aae8482552bac8dfc0c --- /dev/null +++ b/app/callbacks.py @@ -0,0 +1,65 @@ +import os +from datetime import datetime + +from dash import ( + html, + Dash +) +from dash.dependencies import ( + Input, + Output, + State +) + +from llm_utils.client import ChatGPT, get_openai_client + + +def format_chat_messages(chat_history): + chat_messages = [] + for message in chat_history: + chat_messages.append(html.Div([ + html.P(f'{message["sender"]}: {message["message"]}'), + html.P(f'Sent at: {message["timestamp"]}') + ])) + return chat_messages + + +def register_callbacks(app: Dash): + model="gpt4" + client = get_openai_client( + model=model, + config_path=os.environ.get("CONFIG_PATH") + ) + chat_gpt = ChatGPT( + client=client, + model="gpt4" + ) + + @app.callback( + [Output('chat-container', 'children'), + Output('chat-history', 'data')], + [Input('send-button', 'n_clicks')], + [State('user-input', 'value'), + State('chat-history', 'data')] + ) + def update_chat(n_clicks, input_value, chat_history): + if chat_history is None: + chat_history = [] + + if n_clicks > 0 and input_value: + chat_history.append({ + 'sender': 'User', + 'message': input_value, + 'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S") + }) + + response = chat_gpt.chat_with_gpt(input_value) + + # Add response to chat history + chat_history.append({ + 'sender': 'Language Model', + 'message': response, + 'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S") + }) + + return format_chat_messages(chat_history), chat_history diff --git a/app/layout.py b/app/layout.py new file mode 100644 index 0000000000000000000000000000000000000000..4d82a24710224074e9123450133daf5eefd3aac2 --- /dev/null +++ b/app/layout.py @@ -0,0 +1,40 @@ +from dash import ( + html, + dcc +) + +layout = html.Div([ + dcc.Store( + id='chat-history', + data=[] + ), + html.H1( + "Simple Chat App", + style={'text-align': 'center'} + ), + html.Div( + id='chat-container', + style={'overflowY': 'scroll', 'height': '70vh', 'padding': '10px'} + ), + html.Div([ + dcc.Input( + id='user-input', + type='text', + placeholder='Type your message...', + debounce=True + ), + html.Button( + 'Send', + id='send-button', + n_clicks=0 + ) + ], style={ + 'display': 'flex', + 'alignItems': 'center', + 'justifyContent': 'center', + 'position': 'fixed', + 'bottom': 0, + 'width': '100%', + 'padding': '10px' + }) +], style={'position': 'relative'}) diff --git a/app/my_app.py b/app/my_app.py new file mode 100644 index 0000000000000000000000000000000000000000..db083be592f45c76bd6c05d3f2783614b84cfcb0 --- /dev/null +++ b/app/my_app.py @@ -0,0 +1,70 @@ +import sys +sys.path.append("/home/jovyan/") + +import argparse +import logging + +from urllib.parse import urlparse, urljoin + +from dash import Dash + +from jupyter_server.serverapp import list_running_servers + +try: + from my_layout import layout + from my_callbacks import register_callbacks +except ModuleNotFoundError: + # do not let Dash start + exit() + + +logging.basicConfig(level=logging.INFO) + +# weird trick to find base_url for the jupyterlab +def find_jupyterlab_base_url(): + servers = list_running_servers() + for server in servers: + if server["port"] == 8888: + return server['url'] + return None + + +# get the correct port from proxy +parser = argparse.ArgumentParser() +parser.add_argument("--port", type=int) +args = parser.parse_args() +port: int = args.port + +if not port: + raise ValueError(f"Port of proxy server for Dash not found in {args}.") +else: + logging.debug(f"Dash app running on port {port}.") + + +base_url = find_jupyterlab_base_url() +if base_url is None: + raise ValueError("Base URL of Jupyterlab could not be detected.") +logging.debug(f"Base URL: {base_url}") + +proxy_base_path = urlparse(urljoin(base_url + "/", f"proxy/{port}/")).path +logging.debug(f"Proxy base path: {proxy_base_path}") + +# define Dash app +app = Dash( + name=__name__, + requests_pathname_prefix=proxy_base_path +) + +# define layout +app.layout = layout + +# register all callback functions +register_callbacks(app=app) + +# Run Dash app in the notebook +app.run( + jupyter_mode="jupyterlab", + port=port, + host="0.0.0.0", + debug=True +) diff --git a/dash_proxy/dash_proxy.py b/dash_proxy/dash_proxy.py new file mode 100644 index 0000000000000000000000000000000000000000..309e977e30e4ae08b01c6f3c3734e673b94fb444 --- /dev/null +++ b/dash_proxy/dash_proxy.py @@ -0,0 +1,16 @@ +def setup_dash_proxy(): + command = [ + 'python', + '/dash/app/app.py', + '--port', + '{port}' + ] + + return { + "command": command, + "new_browser_tab": False, + "launcher_entry": { + "enabled": True, + 'title': 'Dash' + } + } diff --git a/dash_proxy/my_app_proxy.py b/dash_proxy/my_app_proxy.py new file mode 100644 index 0000000000000000000000000000000000000000..e91a8649001749f630ee85b4e6706f99049fb634 --- /dev/null +++ b/dash_proxy/my_app_proxy.py @@ -0,0 +1,16 @@ +def setup_my_app_proxy(): + command = [ + 'python', + '/dash/app/my_app.py', + '--port', + '{port}' + ] + + return { + "command": command, + "new_browser_tab": False, + "launcher_entry": { + "enabled": True, + 'title': 'MyApp' + } + } diff --git a/dash_proxy/setup.py b/dash_proxy/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..8e1ebf5761c3de7f783ce204b25474e10f2b0d8d --- /dev/null +++ b/dash_proxy/setup.py @@ -0,0 +1,17 @@ +import setuptools + +setuptools.setup( + author="Julian Rasch", + author_email="julian.rasch@fh-muenster.de", + description="A small module to run Dash inside a dockerized Jupyterlab.", + name="jupyter-dash-proxy", + py_modules=["dash_proxy", "my_app_proxy"], + entry_points={ + "jupyter_serverproxy_servers": [ + # name = packagename:function_name + "Dash = dash_proxy:setup_dash_proxy", + "MyApp = my_app_proxy:setup_my_app_proxy" + ] + }, + install_requires=["jupyter-server-proxy==4.0.0"], +) diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000000000000000000000000000000000000..eef32e175e1f18d058cee69d1e5baf91d7bdca45 --- /dev/null +++ b/environment.yml @@ -0,0 +1,7 @@ +name: "base" +channels: + - defaults +# dependencies: +# - add packages here +# - one per line +prefix: "/opt/conda" diff --git a/llm_utils/setup.py b/llm_utils/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..737729b7716520f1c36d7351fea156faee49258f --- /dev/null +++ b/llm_utils/setup.py @@ -0,0 +1,14 @@ +import setuptools + +setuptools.setup( + author="Julian Rasch", + author_email="julian.rasch@fh-muenster.de", + description="Helper modules to work with LLMs.", + name="llm_utils", + package_dir={"": "src"}, + packages=setuptools.find_packages(where="src"), + install_requires=[ + "openai", + "python-dotenv" + ] +) diff --git a/llm_utils/src/__init__.py b/llm_utils/src/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/llm_utils/src/llm_utils/__init__.py b/llm_utils/src/llm_utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/llm_utils/src/llm_utils/client.py b/llm_utils/src/llm_utils/client.py new file mode 100644 index 0000000000000000000000000000000000000000..1bca863281a30f6cca7798da46b09adbb125d0a7 --- /dev/null +++ b/llm_utils/src/llm_utils/client.py @@ -0,0 +1,81 @@ +import os +from openai import AzureOpenAI +from dotenv import load_dotenv + +from enum import Enum + + +class OpenAIModels(Enum): + GPT_3 = "gpt3" + GPT_4 = "gpt4" + EMBED = "embed" + + @classmethod + def get_all_values(cls): + return [member.value for member in cls] + + +def get_openai_client( + model: str, + config_path: str + ) -> AzureOpenAI: + if not model in OpenAIModels.get_all_values(): + raise ValueError(f"<model> needs to be one of {OpenAIModels.get_all_values()}.") + + load_dotenv( + dotenv_path=config_path, + override=True + ) + + AZURE_OPENAI_API_KEY = os.environ.get("AZURE_OPENAI_API_KEY") + AZURE_OPENAI_ENDPOINT = os.environ.get("AZURE_OPENAI_ENDPOINT") + OPENAI_API_VERSION = os.environ.get("OPENAI_API_VERSION") + + if any(p is None for p in (AZURE_OPENAI_API_KEY, AZURE_OPENAI_API_KEY, OPENAI_API_VERSION)): + raise ValueError( + f"""None of the following parameters can be None: + AZURE_OPENAI_API_KEY: {AZURE_OPENAI_API_KEY}, + AZURE_OPENAI_API_KEY: {AZURE_OPENAI_API_KEY}, + OPENAI_API_VERSION: {OPENAI_API_VERSION} + """ + ) + + client = AzureOpenAI( + api_key=AZURE_OPENAI_API_KEY, + azure_endpoint=AZURE_OPENAI_ENDPOINT, + api_version=OPENAI_API_VERSION, + azure_deployment=model + ) + return client + + +class ChatGPT: + def __init__(self, client: AzureOpenAI, model: str): + self.model = model + self.client = client + self.messages = [] + + def chat_with_gpt(self, user_input: str): + self.messages.append({ + "role": "user", + "content": user_input + }) + response = self._generate_response(self.messages) + return response + + def _generate_response(self, messages): + response = self.client.chat.completions.create( + model=self.model, + messages=messages, + temperature=0.2, + max_tokens=150, + top_p=1.0 + ) + response_message = response.choices[0].message + self.messages.append({ + "role": response_message.role, + "content": response_message.content + }) + + return response_message.content + diff --git a/my_callbacks.py b/my_callbacks.py new file mode 100644 index 0000000000000000000000000000000000000000..6640c9dab08cbf2a5826a0511b78b6e03a6e215c --- /dev/null +++ b/my_callbacks.py @@ -0,0 +1,21 @@ +from dash.dependencies import ( + Input, + Output +) +from dash import html + + +def register_callbacks(app): + @app.callback( + Output('output-container-button', 'children'), + [Input('submit-btn', 'n_clicks')], + [Input('input-text', 'value')] + ) + def update_output(n_clicks, input_value): + if n_clicks > 0: + return html.Div([ + html.Label("You entered:"), + html.P(input_value) + ]) + else: + return '' diff --git a/my_layout.py b/my_layout.py new file mode 100644 index 0000000000000000000000000000000000000000..e0cf7c4a9a74950db48449451e5e594fa8df9140 --- /dev/null +++ b/my_layout.py @@ -0,0 +1,13 @@ +from dash import html +from dash import dcc + + +layout = html.Div([ + html.H1("Yeay, my app!"), + html.Div([ + html.Label("Enter your text:"), + dcc.Input(id='input-text', type='text', value=''), + html.Button('Submit', id='submit-btn', n_clicks=0), + ]), + html.Div(id='output-container-button') +]) diff --git a/requirements.txt b/requirements.txt index c1d6a954ef9cf503b455d0e7529ff383ca13fdd6..20b821cca3adfc3d9e28610094f92494c3330ea0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,16 +1,18 @@ -jupyterlab-git==0.34.0 -jupyterlab-gitlab==3.0.0 -psycopg2-binary -psycopg2 -python-dotenv -sqlalchemy -urllib3 +jupyter-server-proxy==4.0.0 +jupyterlab-git==0.42.0 +jupyter_server>=2.0 + +flake8 + dash +dash-bootstrap-components +plotly + openai -pandas -numpy -tenacity +rapidfuzz +nltk +spacy scikit-learn -flask -dash -dash-bootstrap-components + +python-dotenv +