diff --git a/Dockerfile b/Dockerfile index 6200f5402448a9b3dae72875b499913d53d81b29..a6280ceea78475872a2e6dd45474e28701b7cb55 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,47 +1,26 @@ FROM jupyter/datascience-notebook:hub-3.1.1 -# Use root to install packages and modify permissions USER root -# Copy requirements and environment files to the container COPY requirements.txt environment.yml /tmp/ +RUN conda env update -q -f /tmp/environment.yml && \ + /opt/conda/bin/pip install -r /tmp/requirements.txt && \ + conda clean -y --all && \ + conda env export -n "root" && \ + jupyter lab build -# Update conda environment -RUN conda env update -q -f /tmp/environment.yml - -# Install pip packages and avoid caching -RUN /opt/conda/bin/pip install --no-cache-dir -r /tmp/requirements.txt - -# Fix permissions for the pip cache directory to avoid permission warnings -RUN chown -R jovyan:users /home/jovyan/.cache - -# Ensure numpy and scikit-learn are compatible -RUN /opt/conda/bin/pip install --no-cache-dir numpy==1.22.0 scikit-learn==1.2.2 - -# Clean up conda cache -RUN conda clean -y --all - -# Build Jupyter Lab -RUN jupyter lab build - -# Install NLTK and SpaCy data -RUN python -m nltk.downloader stopwords -RUN python -m nltk.downloader wordnet -RUN python -m spacy download en_core_web_sm - -# Copy and install custom packages COPY dash_proxy /tmp/dash_proxy/ -RUN pip install --no-cache-dir /tmp/dash_proxy/ +RUN pip install /tmp/dash_proxy/ COPY llm_utils /llm_utils/ -RUN pip install --no-cache-dir /llm_utils/ - -# Set environment variable for config path +RUN pip install /llm_utils/ ENV CONFIG_PATH=/home/jovyan/config.txt -# Copy application files and adjust permissions for the jovyan user COPY app /dash/app/ -RUN chown -R jovyan:users /dash/app/ +RUN chown -R jovyan /dash/app/ -# Switch back to jovyan user -USER jovyan +# install some NLTK and spaCy data +RUN python -m nltk.downloader stopwords +RUN python -m nltk.downloader wordnet +RUN python -m nltk.downloader punkt +RUN python -m spacy download en_core_web_sm diff --git a/requirements.txt b/requirements.txt index 3592eeac1767c569096aa465b728ae22386a548e..2f01d76914486ad61fecb76519511ad9d28bb01c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,8 +11,10 @@ plotly openai rapidfuzz nltk -spacy +spacy==3.4.4 numpy==1.21.6 -scikit-learn +scikit-learn==1.1.3 + +pdfplumber python-dotenv