BigData

Airflow Docker-compose

Kyle79 2020. 7. 1. 12:51

Airflow 1.10.10

version: "3.7"
x-airflow-environment: &airflow-environment
  AIRFLOW__CORE__EXECUTOR: CeleryExecutor
  AIRFLOW__WEBSERVER__RBAC: "True"
  AIRFLOW__CORE__LOAD_EXAMPLES: "False"
  AIRFLOW__CELERY__BROKER_URL: "redis://:@redis:6379/0"
  AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres:5432/airflow

services:
  postgres:
    image: postgres:11.5
    environment:
      POSTGRES_USER: airflow
      POSTGRES_DB: airflow
      POSTGRES_PASSWORD: airflow
  redis:
    image: redis:5
    environment:
      REDIS_HOST: redis
      REDIS_PORT: 6379
    ports:
      - 6379:6379
  init:
    image: 885622592851.dkr.ecr.us-east-1.amazonaws.com/dev:airflow-1.10.10
    environment:
      <<: *airflow-environment
    depends_on:
      - redis
      - postgres
    volumes:
      - ./dags:/opt/airflow/dags
    entrypoint: /bin/bash
    command: >
      -c "airflow list_users || (airflow initdb
      && airflow create_user --role Admin --username admin --password admin -e airflow@airflow.com -f airflow -l airflow)"
    restart: on-failure
  webserver:
    image: 885622592851.dkr.ecr.us-east-1.amazonaws.com/dev:airflow-1.10.10
    ports:
      - 8080:8080
    environment:
      <<: *airflow-environment
    depends_on:
      - init
    volumes:
      - ./dags:/opt/airflow/dags
    command: "webserver"
    restart: always
  flower:
    image: 885622592851.dkr.ecr.us-east-1.amazonaws.com/dev:airflow-1.10.10
    ports:
      - 5555:5555
    environment:
      <<: *airflow-environment
    depends_on:
      - redis
    command: flower
    restart: always
  scheduler:
    image: 885622592851.dkr.ecr.us-east-1.amazonaws.com/dev:airflow-1.10.10
    environment:
      <<: *airflow-environment
    depends_on:
      - webserver
    volumes:
      - ./dags:/opt/airflow/dags
    command: scheduler
    restart: always
  worker:
    image: 885622592851.dkr.ecr.us-east-1.amazonaws.com/dev:airflow-1.10.10
    environment:
      <<: *airflow-environment
    depends_on:
      - scheduler
    volumes:
      - ./dags:/opt/airflow/dags
    command: worker
    restart: always

 

Airflow 2.0.0

- GE_JUPYTER_CMD 는 현재로선 없어도 무방하고, 38888 에 대한 포트 또한 마찬가지이다.

version: '3.7'
x-airflow-common: &airflow-common
  environment:
    - HOST_HOME=${HOME}
    - HOST_ENVIRONMENT_PATH=${PWD}
    - AIRFLOW_HOME=/opt/airflow/1-orchestration
    - AIRFLOW__CORE__EXECUTOR=LocalExecutor
    - AIRFLOW__CORE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@airflow_db:5432/airflow
    - AIRFLOW__CORE__FERNET_KEY=FB0o_zt4e3Ziq3LdUUO7F2Z95cvFFx16hU8jTeR1ASM=
    - AIRFLOW__SECRETS__BACKEND=airflow.providers.amazon.aws.secrets.systems_manager.SystemsManagerParameterStoreBackend
    - GE_JUPYTER_CMD='jupyter notebook --ip 0.0.0.0'
  image: apache/airflow:master-python3.7
services:
  airflow_db:
    image: library/postgres:10
    container_name: airflow_db
    environment:
      - POSTGRES_USER=airflow
      - POSTGRES_PASSWORD=airflow
      - POSTGRES_DB=airflow
    ports:
      - 35432:5432
    restart: always
  airflow_db_init:
    <<: *airflow-common
    build:
      context: https://github.com/apache/airflow.git
      args:
        CONSTRAINT_REQUIREMENTS: https://raw.githubusercontent.com/feluelle/airflow/marshmallow-update/requirements/requirements-python3.7.txt
        AIRFLOW_EXTRAS: amazon,postgres,google,docker
        ADDITIONAL_PYTHON_DEPS: great_expectations
    container_name: airflow_db_init
    command: db upgrade
    depends_on:
      - airflow_db
  airflow_users_init:
    <<: *airflow-common
    container_name: airflow_users_init
    command: users create --role Admin --username admin --email admin@airflow.community --firstname Admin --lastname Airflow --password admin
    depends_on:
      - airflow_webserver
  airflow_webserver:
    <<: *airflow-common
    container_name: airflow_webserver
    command: webserver
    ports:
      - 38080:8080  # airflow webserver
      - 38888:8888  # great_expectations jupyter notebooks
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock
      - ~/.aws:/home/airflow/.aws
      - ./1-orchestration:/opt/airflow/1-orchestration
      - ./2-expectations:/opt/airflow/2-expectations
    depends_on:
      - airflow_db_init
    restart: always
  airflow_scheduler:
    <<: *airflow-common
    container_name: airflow_scheduler
    command: scheduler
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock
      - ~/.aws:/home/airflow/.aws
      - ./1-orchestration:/opt/airflow/1-orchestration
      - ./2-expectations:/opt/airflow/2-expectations
    depends_on:
      - airflow_db_init
    restart: always