diff --git a/docs/advanced/offline_deployment.md b/docs/advanced/offline_deployment.md deleted file mode 100644 index 906161e2..00000000 --- a/docs/advanced/offline_deployment.md +++ /dev/null @@ -1,29 +0,0 @@ -# Doccano Offline Deployment - -## Use Case -These offline deployment scripts are suited for deploying Doccano on an air gapped Ubuntu 18.04/20.04 virtual machine (VM 2) with no internet connectivity (such as in clinical environments). - -The preparation requires another machine (VM 1) with internet access and `docker`/`docker-compose` preinstalled (with $USER in `docker` group) and running the same Ubuntu distribution as VM 2. - -The focus is primarily on the `docker-compose`-based production deployment. -The files mentioned in this document are located in the `tools/offline_deployment/` directory. - -## Setup Steps - -Run the following steps on VM 1: -1. Clone this repository -2. Run the scripts `offline_01_*.sh` in ascending order - Skip OR modify and run the script `offline_01_1-optional_use_https` - Do NOT run these scripts as `sudo`! The scripts will ask for sudo-permissions when it is needed. - -Now, move over to VM 2 - -3. Copy the repository folder from VM 1 to VM 2 -4. Run the scripts `offline_02_*.sh` in ascending order - Do NOT run these scripts as `sudo`! The scripts will ask for sudo-permissions when it is needed. -5. Make minor changes on `docker-compose.prod.yml` to change the admin credentials -6. Run `docker-compose -f docker-compose.prod.yml up` in the repository root directory or use the script `offline_03_*.sh` - -## Remarks - -The setup was tested on Ubuntu 18.04 machines. \ No newline at end of file diff --git a/tools/offline_deployment/offline_01_1-optional_use_https.sh b/tools/offline_deployment/offline_01_1-optional_use_https.sh deleted file mode 100755 index 3a5a8115..00000000 --- a/tools/offline_deployment/offline_01_1-optional_use_https.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -cd $DIR -cd ../.. -unset DIR - -# create certificate pair -sudo apt-get install -y openssl -openssl req -new -newkey rsa:4096 -sha256 -nodes -x509 -keyout ./nginx/cert.key -out ./nginx/cert.crt \ - -subj "/C=US/ST=StateCode/L=LocationName/O=OrganizationName/OU=OrganizationUnit/CN=doccano.herokuapp.com" - -# define cert paths inside container -ssl_cert="/certs/cert.crt" -ssl_cert_key="/certs/cert.key" - -# edit default.conf -sed -i "s|listen 80;|listen 443 ssl;\n ssl_certificate $ssl_cert;\n ssl_certificate_key $ssl_cert_key;|g" nginx/default.conf - -# edit nginx Dockerfile -echo "RUN mkdir -p /certs/" >> nginx/Dockerfile -echo "COPY nginx/cert.key /certs/cert.key" >> nginx/Dockerfile -echo "COPY nginx/cert.crt /certs/cert.crt" >> nginx/Dockerfile - -# edit published port -sed -i "s|- 80:80|- 443:443|g" docker-compose.prod.yml - -echo "Switched to HTTPS" diff --git a/tools/offline_deployment/offline_01_2-patch_and_extract_Docker_images.sh b/tools/offline_deployment/offline_01_2-patch_and_extract_Docker_images.sh deleted file mode 100755 index 1024af24..00000000 --- a/tools/offline_deployment/offline_01_2-patch_and_extract_Docker_images.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env bash - -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -cd $DIR -unset DIR - -# WORKAROUND: Downgrade docker-compose version to match Ubuntu 18.04 default compose package -echo "Patching docker-compose to match Ubuntu 18.04 compose package" -sed -i 's|version: "3.7"|version: "3.3"|g' ../../docker-compose.prod.yml - -sed -i 's^dockerfile: backend/Dockerfile.prod^dockerfile: backend/Dockerfile.prod\n image: doccano-backend:custom^g' ../../docker-compose.prod.yml -sed -i 's^dockerfile: nginx/Dockerfile^dockerfile: nginx/Dockerfile\n image: doccano-nginx:custom^g' ../../docker-compose.prod.yml - -# Modify Dockerfile for nginx to add python3 and offline patch -sed -i 's|FROM nginx|COPY tools/offline_deployment/offline_patcher.py /patch.py\ -RUN apk add -U --no-cache py3-requests \\\ - \&\& mkdir -p /app/dist/offline \&\& python3 /patch.py /app/dist /app/dist/offline /offline\ -\ -FROM nginx|' ../../nginx/Dockerfile - -# Modify Dockerfile for backend to add python3 and offline patch -# TODO: Remark: Not needed due to SPA frontend -#sed -i 's|COPY ./Pipfile\* /backend/|COPY ./Pipfile\* /backend/\ -#COPY tools/offline_deployment/offline_patcher.py /patch.py\ -#RUN apt-get update \ -# \&\& apt-get install -y --no-install-recommends \ -# python3 python3-requests \ -# \&\& apt-get clean \\\ -# \&\& rm -rf /var/lib/apt/lists/\*\ -# \&\& mkdir -p /backend/server/static/offline \&\& python3 /patch.py /backend/server /server/static/offline\ -#\ -#|' ../../backend/Dockerfile.prod - -docker-compose -f ../../docker-compose.prod.yml pull -docker-compose -f ../../docker-compose.prod.yml build - -docker image save -o doccano-backend.tar doccano-backend:custom -docker image save -o doccano-nginx.tar doccano-nginx:custom -docker image save -o postgres.tar postgres:13.1-alpine -docker image save -o rabbitmq.tar rabbitmq:3.8 diff --git a/tools/offline_deployment/offline_01_3-download_APT_packages.sh b/tools/offline_deployment/offline_01_3-download_APT_packages.sh deleted file mode 100755 index 95342f59..00000000 --- a/tools/offline_deployment/offline_01_3-download_APT_packages.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env bash - -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -cd $DIR -unset DIR - -# Prepare and download packages -pdir="/offline_packages" -mkdir -p "$(pwd)${pdir}" -cd "$(pwd)${pdir}" - -SELECTED_PACKAGES="wget unzip curl tar docker.io docker-compose" - -apt-get download $(apt-cache depends --recurse --no-recommends --no-suggests \ - --no-conflicts --no-breaks --no-replaces --no-enhances \ - --no-pre-depends ${SELECTED_PACKAGES} | grep "^\w") - -# Build package index -sudo apt-get install -y dpkg-dev -dpkg-scanpackages "." /dev/null | gzip -9c > Packages.gz - -echo "Packages extracted to: $(pwd)${pdir}" \ No newline at end of file diff --git a/tools/offline_deployment/offline_02_1-install_APT_packages.sh b/tools/offline_deployment/offline_02_1-install_APT_packages.sh deleted file mode 100755 index 6f23f226..00000000 --- a/tools/offline_deployment/offline_02_1-install_APT_packages.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env bash - -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -cd $DIR -unset DIR - -# Import APT packages -pdir="/offline_packages" -abs_pdir="$(pwd)${pdir}" -sudo mv /etc/apt/sources.list /etc/apt/sources.list.bak -cat < sources.list -deb [trusted=yes] file://${abs_pdir} ./ -EOF -sudo mv sources.list /etc/apt/sources.list - -# Install APT packages -sudo apt-get update -SELECTED_PACKAGES="wget unzip curl tar docker.io docker-compose" -sudo apt-get install -y $SELECTED_PACKAGES - -# Cleanup -sudo apt-get clean -sudo mv /etc/apt/sources.list.bak /etc/apt/sources.list - -# Setup Docker -sudo usermod -aG docker $(whoami) -sudo systemctl enable docker.service - -echo "Packages were installed. We need to reboot!" - diff --git a/tools/offline_deployment/offline_02_2-import_Docker_images.sh b/tools/offline_deployment/offline_02_2-import_Docker_images.sh deleted file mode 100755 index e2509e7f..00000000 --- a/tools/offline_deployment/offline_02_2-import_Docker_images.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env bash - -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -cd $DIR -unset DIR - -# Info: Docker image name is already set in previous scripts -## Set image tag in Compose to avoid image build -#sed -i 's^dockerfile: backend/Dockerfile.prod^dockerfile: backend/Dockerfile.prod\n image: doccano-backend:custom^g' ../../docker-compose.prod.yml -#sed -i 's^dockerfile: nginx/Dockerfile^dockerfile: nginx/Dockerfile\n image: doccano-nginx:custom^g' ../../docker-compose.prod.yml - -# Load docker images -docker image load -i doccano-backend.tar -docker image load -i doccano-nginx.tar -docker image load -i postgres.tar -docker image load -i rabbitmq.tar diff --git a/tools/offline_deployment/offline_03_1-runDoccano.sh b/tools/offline_deployment/offline_03_1-runDoccano.sh deleted file mode 100755 index 95ac7ec2..00000000 --- a/tools/offline_deployment/offline_03_1-runDoccano.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/env bash - -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -cd $DIR -unset DIR - -docker-compose -f ../../docker-compose.prod.yml up -d diff --git a/tools/offline_deployment/offline_patcher.py b/tools/offline_deployment/offline_patcher.py deleted file mode 100644 index 24ecd9b5..00000000 --- a/tools/offline_deployment/offline_patcher.py +++ /dev/null @@ -1,202 +0,0 @@ -#!/usr/bin/env python3 -import sys, os, re -import uuid -from urllib.parse import urljoin -import requests - -"""Script Information - -This script scans all files of a given directory [1] for URL addresses and -hyperlink references. -All found URLs are requested for Content-Type. -For certain Content-Types (like js, css, or fonts), the file is downloaded and -stored locally into a given directory [2] and the existing URLs are altered -to a local URL location (with a given URL prefix [3]). - -Downloaded files are scanned for URLs recursively. -Relative references in CSS files are an edge case that is -handled separately by a specific regex pattern. - -Arguments: - 1. - 2. - 3. - -Example: - - Given: - - File ./webspace/index.html, containing URL: https://example.com/library.js - - Directory ./webspace/static, containing static files, - serving content on HTTP location: /staticfiles - - - Call: - $> python3 offline_patcher.py webspace/ webspace/static /staticfiles - - - Result: - - Library from https://example.com/library.js is stored as file: - webspace/static/offline_.js - - Link in file webspace/index.html is replaced to: - /staticfiles/offline_.js - - File webspace/static/offline_.js is scanned recursively for URLs - -Author: Johann Frei -""" - - -def main(): - # root folder to scan for URLs - root_folder = sys.argv[1] - # offline folder to store static offline files - offline_folder = sys.argv[2] - # offline link prefix - offline_prefix = sys.argv[3] - - offline_file = os.path.join(offline_folder, "offline_{}.{}") - offline_link = offline_prefix + "/offline_{}.{}" - - mime_ptn = re.compile(r"(?P(?P[\w^\/]+)\/(?P[\S\.^\;]+))(\;|$)", re.IGNORECASE) - - # regex to find matches like: "https://[:]/a/link/location.html" - link_ptn = re.compile(r"[\(\'\"\ ](?Phttps?:\/\/(?P(?P((?=[^\(\)\'\"\ \:\/])(?=[\S]).)+))(?P\:[0-9]+)?\/[^\(\)\'\"\ ]+)(?P[\(\)\'\"\ ])") - # regex to find matches like: url(../relative/parent_directory/links/without/quotes/are/hard) - link_ptn_url = re.compile(r"url\([\"\']?(?P((?=[^\)\"\'])(?=[\S]).)+)[\"\']?\)") - - # block special hosts - forbidden_hosts = [ - re.compile(r"^.*registry\.npmjs\.org$"), # No yarnpkg repository - re.compile(r"^.*yarnpkg\.com$"), # No yarnpkg repository - re.compile(r"^[0-9\.]+$"), # avoid IP addresses - re.compile(r"^[^\.]+$"), # needs a dot in host - ] - - # only support certain content types - supported_mime_types = [ - # (filter function -> bool, file extension -> str) - (lambda m: m["t2"] == "javascript", lambda m: "js"), - (lambda m: m["t2"] == "css", lambda m: "css"), - (lambda m: m["t1"] == "font", lambda m: m["t2"]), - ] - - - # load all initial files - files_to_check = [] - for cur_dir, n_dir, n_files in os.walk(root_folder): - files_to_check += [ os.path.join(cur_dir, f) for f in n_files ] - - cached_urls = {} - valid_urls = {} - file_origins = {} - - i = 0 - while i < len(files_to_check): - file_i = files_to_check[i] - try: - print("Inspect", file_i) - with open(file_i, "r", encoding="utf-8") as f: - t = f.read() - - link_findings_default = [ { - "abs": match.group("link"), - "found": match.group("link"), - "host": match.group("host") - } for match in link_ptn.finditer(t) ] - - # extract relative urls and convert them to absolute http urls - link_findings_url_prefix = [] - for match in link_ptn_url.finditer(t): - if os.path.abspath(file_i) in file_origins and not match.group("link").startswith("http"): - link_abs = urljoin(file_origins[os.path.abspath(file_i)], match.group("link")) - item = { - "abs": link_abs, - "found": match.group("link"), - "host": link_ptn.match( "\"" + link_abs + "\"").group("host") - } - link_findings_url_prefix.append(item) - - for spot in link_findings_default + link_findings_url_prefix: - absolute_link = spot["abs"] - found_link = spot["found"] - - if absolute_link not in valid_urls: - # check link - if True in [ True for fh in forbidden_hosts if fh.match(absolute_link) is not None ]: - # host is forbidden - valid_urls[absolute_link] = False - else: - # host is not forbidden - # check mime type - response = requests.head(absolute_link, allow_redirects=True) - mime = response.headers.get("Content-Type", None) - if mime is None: - valid_urls[absolute_link] = False - else: - mime_match = mime_ptn.match(mime) - if mime_match is None: - valid_urls[absolute_link] = False - else: - final_fext = None - # try supported content types - for smt, get_fext in supported_mime_types: - if smt(mime_match): - final_fext = get_fext(mime_match) - break - if final_fext is None: - # mime not supported - valid_urls[absolute_link] = False - else: - # mime is supported -> store and remember file - valid_urls[absolute_link] = True - file_unique = uuid.uuid4() - target_link = offline_link.format(file_unique, final_fext) - target_file = offline_file.format(file_unique, final_fext) - - # download file - try: - file_response = requests.get(absolute_link, allow_redirects=True) - file_response.raise_for_status() - with open(target_file, 'wb') as download_file: - for chunk in file_response.iter_content(100000): - download_file.write(chunk) - # also check downloaded file for links later - files_to_check.append(target_file) - - print("Downloaded file:", absolute_link) - except: - print("Link could not been downloaded:", absolute_link) - - # register downloaded file - cached_urls[absolute_link] = { - "input_link": absolute_link, - "target_link": target_link, - "file": target_file, - "fext": final_fext, - "found": [ {"file": file_i, "found_link": found_link} ] - } - # store reverse lookup for recursive url("../rel/link") patterns - file_origins[os.path.abspath(target_file)] = absolute_link - - if valid_urls[absolute_link]: - # add to cached urls entries - cached_urls[absolute_link]["found"].append({"file": file_i, "found_link": found_link}) - - print("Checked file:", file_i) - except UnicodeDecodeError: - print("Skip file (No unicode):", file_i) - except: - print("Unknown error... Skip file:", file_i) - - # look at next file - i+= 1 - - # replace files with offline link - for _, cached in cached_urls.items(): - for edit_file in cached["found"]: - with open(edit_file["file"], "r", encoding="utf-8") as f: - file_content = f.read() - with open(edit_file["file"], "w", encoding="utf-8") as f: - f.write(file_content.replace(edit_file["found_link"], cached["target_link"])) - print("Patched to", len(cached["found"]), "file with link:", cached["target_link"]) - - print("Done") - -if __name__ == "__main__": - main()