#!/bin/bash
if [[ "${DEBUG}" == "true" ]]; then
    set -x
fi

# Back up existing config file if it exists
if [ -f /etc/teleport.yaml ]; then
    rm -f /etc/teleport.yaml.old
    cp /etc/teleport.yaml /etc/teleport.yaml.old
fi

# Setup Teleport config file
LOCAL_IP=$(curl -sS http://169.254.169.254/latest/meta-data/local-ipv4)
LOCAL_HOSTNAME=$(curl -sS http://169.254.169.254/latest/meta-data/local-hostname)
LOCAL_HOSTNAME=${LOCAL_HOSTNAME//./-}

# Source variables from user-data
source /etc/teleport.d/conf

chown -R teleport:adm /var/lib/teleport
touch /etc/teleport.yaml
chmod 664 /etc/teleport.yaml

# Use letsencrypt by default unless we are explicitly using ACM here
if [[ "${USE_ACM}" != "true" ]]; then
    rm -f /etc/teleport.d/role.all-acm
    echo "use-letsencrypt" > /etc/teleport.d/use-letsencrypt
fi

# Determine whether this is a FIPS AMI or not
# We do this by looking at the ExecStart command for teleport.service to see whether it contains 'fips' or not (which is set during packer build)
# We use this to modify the auth service's configuration depending on whether FIPS is in use or not
# With FIPS: auth_service.authentication.local_auth must be 'false' or Teleport will not start
# Without FIPS: auth.service.authentication.second_factor should be set to 'otp'
FIPS_AMI=false
AUTHENTICATION_STANZA="second_factor: otp"
if grep "ExecStart" /etc/systemd/system/teleport.service | grep -q "fips"; then
  FIPS_AMI=true
  AUTHENTICATION_STANZA="local_auth: false"
fi

if [[ "${TELEPORT_ROLE}" == "auth" ]]; then
    echo "auth" > /etc/teleport.d/role.auth
    # Teleport Auth server is using DynamoDB as a backend
    # On AWS, see dynamodb.tf for details
    cat >/etc/teleport.yaml <<EOF
teleport:
  nodename: ${LOCAL_HOSTNAME}
  advertise_ip: ${LOCAL_IP}
  log:
    output: syslog
    severity: INFO

  data_dir: /var/lib/teleport
  storage:
    type: dynamodb
    region: ${EC2_REGION}
    table_name: ${TELEPORT_DYNAMO_TABLE_NAME}
    audit_table_name: ${TELEPORT_DYNAMO_EVENTS_TABLE_NAME}
    audit_sessions_uri: s3://${TELEPORT_S3_BUCKET}/records

ssh_service:
  enabled: no

proxy_service:
  enabled: no

auth_service:
  enabled: yes
  listen_addr: 0.0.0.0:3025
  authentication:
    ${AUTHENTICATION_STANZA}
  cluster_name: ${TELEPORT_CLUSTER_NAME}
EOF

    # copy and set up license if provided
    if [[ "${TELEPORT_LICENSE_PATH}" != "" ]]; then
        aws ssm get-parameter --with-decryption --name /teleport/${TELEPORT_CLUSTER_NAME}/license --region ${EC2_REGION} --query 'Parameter.Value' --output text > /var/lib/teleport/license.pem
        chown teleport:adm /var/lib/teleport/license.pem
        echo "  license_file: /var/lib/teleport/license.pem" >> /etc/teleport.yaml
    fi

    # enable/start token services and timers
    systemctl enable teleport-ssm-publish-tokens.service teleport-ssm-publish-tokens.timer
    systemctl start teleport-ssm-publish-tokens.timer

    # enable/start cert services and timers
    systemctl enable teleport-get-cert.service teleport-get-cert.timer
    systemctl enable teleport-renew-cert.service teleport-renew-cert.timer
    systemctl start --no-block teleport-get-cert.timer
    systemctl start --no-block teleport-renew-cert.timer

    # enable auth service and disable all-in-one
    systemctl disable teleport.service
    systemctl enable teleport-auth.service
    systemctl start --no-block teleport-auth.service

elif [[ "${TELEPORT_ROLE}" == "proxy" ]]; then
    if [[ "${USE_ACM}" == "true" ]]; then
        echo "proxy-acm" > /etc/teleport.d/role.proxy-acm
        # tunnel_listen_addr needs to be changed if we're using ACM as the listener cannot understand HTTP, only HTTPS
        TUNNEL_LISTEN_PORT=3024
    else
        echo "proxy" > /etc/teleport.d/role.proxy
        TUNNEL_LISTEN_PORT=3080
    fi
    # Teleport proxy proxies and optionally records
    # SSH sessions
    cat >/etc/teleport.yaml <<EOF
teleport:
  auth_token: /var/lib/teleport/token
  ca_pin: CA_PIN_HASH_PLACEHOLDER
  nodename: ${LOCAL_HOSTNAME}
  advertise_ip: ${LOCAL_IP}
  log:
    output: syslog
    severity: INFO

  data_dir: /var/lib/teleport
  storage:
    type: dir
    path: /var/lib/teleport/backend
  auth_servers:
    - ${TELEPORT_AUTH_SERVER_LB}

auth_service:
  enabled: no

ssh_service:
  enabled: no

proxy_service:
  enabled: yes
  listen_addr: 0.0.0.0:3023
  tunnel_listen_addr: 0.0.0.0:${TUNNEL_LISTEN_PORT}
  web_listen_addr: 0.0.0.0:3080
  public_addr: ${TELEPORT_DOMAIN_NAME}:443
EOF
    # if we have an SSH proxy LB address, append it
    if [[ "${TELEPORT_PROXY_SERVER_LB}" != "" ]]; then
        echo "  ssh_public_addr: ${TELEPORT_PROXY_SERVER_LB}" >> /etc/teleport.yaml
    fi

    # if we are using letsencrypt (i.e. not ACM) then append config lines
    if [[ "${USE_ACM}" != "true" ]]; then
        echo "  https_cert_file: /var/lib/teleport/fullchain.pem" >> /etc/teleport.yaml
        echo "  https_key_file: /var/lib/teleport/privkey.pem" >> /etc/teleport.yaml
    fi

    # enable/start cert services and timers
    systemctl enable teleport-check-cert.service teleport-check-cert.timer
    systemctl start --no-block teleport-check-cert.timer

    # enable proxy service and disable all-in-one
    # skip TLS verification if we are using ACM (as we can't get the cert for use locally, it's on the load balancer)
    systemctl disable teleport.service
    if [[ "${USE_ACM}" == "true" ]]; then
        systemctl enable teleport-proxy-acm.service
        systemctl start --no-block teleport-proxy-acm.service
    else
        systemctl enable teleport-proxy.service
        systemctl start --no-block teleport-proxy.service
    fi

elif [[ "${TELEPORT_ROLE}" == "node" ]]; then
    echo "node" > /etc/teleport.d/role.node
    # Teleport node handles incoming connections
    cat >/etc/teleport.yaml <<EOF
teleport:
  auth_token: /var/lib/teleport/token
  ca_pin: CA_PIN_HASH_PLACEHOLDER
  nodename: ${LOCAL_HOSTNAME}
  advertise_ip: ${LOCAL_IP}
  log:
    output: syslog
    severity: INFO

  data_dir: /var/lib/teleport
  storage:
    type: dir
    path: /var/lib/teleport/backend
  auth_servers:
    - ${TELEPORT_AUTH_SERVER_LB}

auth_service:
  enabled: no

ssh_service:
  enabled: yes
  listen_addr: 0.0.0.0:3022

proxy_service:
  enabled: no
EOF

    # enable node service and disable all-in-one
    systemctl disable teleport.service
    systemctl enable teleport-node.service
    systemctl start --no-block teleport-node.service

elif [[ "${TELEPORT_ROLE}" == "monitor" ]]; then
    echo "monitor" > /etc/teleport.d/role.monitor
    # disable teleport service if this has the monitor role
    systemctl disable teleport.service
    # no teleport config needed
    rm -f /etc/teleport.yaml
    # run monitor setup as an independent service
    systemctl start --no-block teleport-monitor-setup.service

else
    echo "No Teleport role provided via TELEPORT_ROLE; using all-in-one config"
    # if the instance doesn't have a public IPv4 address, we get a 404 from the metadata
    # which will break the generated config, so we use the local IP instead
    if curl -sS -i http://169.254.169.254/latest/meta-data/public-ipv4 | grep -q 404; then
      PUBLIC_IP=${LOCAL_IP}
    else
      PUBLIC_IP=$(curl -sS http://169.254.169.254/latest/meta-data/public-ipv4)
    fi

    # tunnel_listen_addr needs to be changed if we're using ACM as the listener cannot understand HTTP, only HTTPS
    if [[ "${USE_ACM}" == "true" ]]; then
        rm -f /etc/teleport.d/use-letsencrypt
        rm -f /etc/teleport.d/role.all
        echo "all-acm" > /etc/teleport.d/role.all-acm
        TUNNEL_LISTEN_PORT=3024
    else
        rm -f /etc/teleport.d/role.all-acm
        echo "all" > /etc/teleport.d/role.all
        TUNNEL_LISTEN_PORT=3080
    fi

    cat >/etc/teleport.yaml <<EOF
teleport:
  nodename: ${LOCAL_HOSTNAME}
  advertise_ip: ${LOCAL_IP}
  log:
    output: syslog
    severity: INFO

  data_dir: /var/lib/teleport
  storage:
    type: dir
    path: /var/lib/teleport/backend

auth_service:
  enabled: yes
  listen_addr: 0.0.0.0:3025
  authentication:
    ${AUTHENTICATION_STANZA}

ssh_service:
  enabled: yes
  listen_addr: 0.0.0.0:3022

proxy_service:
  enabled: yes
  listen_addr: 0.0.0.0:3023
  tunnel_listen_addr: 0.0.0.0:${TUNNEL_LISTEN_PORT}
  web_listen_addr: 0.0.0.0:3080
  public_addr: ${TELEPORT_EXTERNAL_HOSTNAME:-$PUBLIC_IP}:${TELEPORT_EXTERNAL_PORT:-3080}
EOF

    # if we have an SSH proxy LB address, append it
    if [[ "${TELEPORT_PROXY_SERVER_LB}" != "" ]]; then
        echo "  ssh_public_addr: ${TELEPORT_PROXY_SERVER_LB}" >> /etc/teleport.yaml
    fi

    # if we are using letsencrypt, append config lines
    if [[ "${USE_LETSENCRYPT}" == "true" ]]; then
        echo "Letsencrypt support enabled"
        # these variables must all be set for letsencrypt to work
        # it also needs the running instance to have permissions to read from/write to the S3 bucket
        if [[ "${TELEPORT_DOMAIN_ADMIN_EMAIL}" != "" && "${TELEPORT_DOMAIN_NAME}" != "" && "${TELEPORT_S3_BUCKET}" != "" ]]; then
            echo "  https_cert_file: /var/lib/teleport/fullchain.pem" >> /etc/teleport.yaml
            echo "  https_key_file: /var/lib/teleport/privkey.pem" >> /etc/teleport.yaml

            # enable/start cert services and timers
            systemctl enable teleport-get-cert.service teleport-get-cert.timer
            systemctl start teleport-get-cert.timer

            systemctl enable teleport-renew-cert.service teleport-renew-cert.timer
            systemctl start --no-block teleport-get-cert.timer

            systemctl enable teleport-check-cert.service teleport-check-cert.timer
            systemctl start --no-block teleport-check-cert.timer

            systemctl start --no-block teleport-get-cert.service
        fi
    elif [[ "${USE_ACM}" == "true" ]]; then
        echo "ACM support enabled"
        systemctl disable teleport.service
        systemctl stop --no-block teleport.service

        systemctl enable teleport-acm.service
        systemctl start --no-block teleport-acm.service
    fi
fi

# enable telegraf service if running in cluster mode
if [[ "${TELEPORT_ROLE}" == "auth" || "${TELEPORT_ROLE}" == "node" || "${TELEPORT_ROLE}" == "proxy" ]]; then
    # Install teleport telegraf configuration
    # Telegraf will collect prometheus metrics and send to influxdb collector
    cat >/etc/telegraf/telegraf.conf <<EOF
# Configuration for telegraf agent
[agent]
  ## Default data collection interval for all inputs
  interval = "10s"
  ## Rounds collection interval to 'interval'
  ## ie, if interval="10s" then always collect on :00, :10, :20, etc.
  round_interval = true

  ## Telegraf will send metrics to outputs in batches of at
  ## most metric_batch_size metrics.
  metric_batch_size = 1000
  ## For failed writes, telegraf will cache metric_buffer_limit metrics for each
  ## output, and will flush this buffer on a successful write. Oldest metrics
  ## are dropped first when this buffer fills.
  metric_buffer_limit = 10000

  ## Collection jitter is used to jitter the collection by a random amount.
  ## Each plugin will sleep for a random time within jitter before collecting.
  ## This can be used to avoid many plugins querying things like sysfs at the
  ## same time, which can have a measurable effect on the system.
  collection_jitter = "0s"

  ## Default flushing interval for all outputs. You shouldn't set this below
  ## interval. Maximum flush_interval will be flush_interval + flush_jitter
  flush_interval = "10s"
  ## Jitter the flush interval by a random amount. This is primarily to avoid
  ## large write spikes for users running a large number of telegraf instances.
  ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
  flush_jitter = "0s"

  ## By default, precision will be set to the same timestamp order as the
  ## collection interval, with the maximum being 1s.
  ## Precision will NOT be used for service inputs, such as logparser and statsd.
  precision = ""
  ## Run telegraf in debug mode
  debug = false
  ## Run telegraf in quiet mode
  quiet = false
  ## Override default hostname, if empty use os.Hostname()
  hostname = ""
  ## If set to true, do no set the "host" tag in the telegraf agent.
  omit_hostname = false

###############################################################################
#                            INPUT PLUGINS                                    #
###############################################################################

[[inputs.procstat]]
  exe = "teleport"
  prefix = "teleport"

[[inputs.prometheus]]
  # An array of urls to scrape metrics from.
  urls = ["http://127.0.0.1:3434/metrics"]
  # Add a metric name prefix
  name_prefix = "teleport_"
  # Add tags to be able to make beautiful dashboards
  [inputs.prometheus.tags]
    teleservice = "teleport"

# Read metrics about cpu usage
[[inputs.cpu]]
  ## Whether to report per-cpu stats or not
  percpu = true
  ## Whether to report total system cpu stats or not
  totalcpu = true
  ## If true, collect raw CPU time metrics.
  collect_cpu_time = false
  ## If true, compute and report the sum of all non-idle CPU states.
  report_active = false

# Read metrics about disk usage by mount point
[[inputs.disk]]
  ## By default, telegraf gather stats for all mountpoints.
  ## Setting mountpoints will restrict the stats to the specified mountpoints.
  # mount_points = ["/"]

  ## Ignore some mountpoints by filesystem type. For example (dev)tmpfs (usually
  ## present on /run, /var/run, /dev/shm or /dev).
  ignore_fs = ["tmpfs", "devtmpfs", "devfs"]

# Read metrics about disk IO by device
[[inputs.diskio]]

# Get kernel statistics from /proc/stat
[[inputs.kernel]]
  # no configuration

# Read metrics about memory usage
[[inputs.mem]]
  # no configuration

# Get the number of processes and group them by status
[[inputs.processes]]
  # no configuration

# Read metrics about swap memory usage
[[inputs.swap]]
  # no configuration

# Read metrics about system load & uptime
[[inputs.system]]
  # no configuration

###############################################################################
#                            OUTPUT PLUGINS                                   #
###############################################################################

# Configuration for influxdb server to send metrics to
[[outputs.influxdb]]
  ## The full HTTP or UDP endpoint URL for your InfluxDB instance.
  ## Multiple urls can be specified as part of the same cluster,
  ## this means that only ONE of the urls will be written to each interval.
  urls = ["${TELEPORT_INFLUXDB_ADDRESS}"] # required
  ## The target database for metrics (telegraf will create it if not exists).
  database = "telegraf" # required

  ## Retention policy to write to. Empty string writes to the default rp.
  retention_policy = ""
  ## Write consistency (clusters only), can be: "any", "one", "quorum", "all"
  write_consistency = "any"

  ## Write timeout (for the InfluxDB client), formatted as a string.
  ## If not provided, will default to 5s. 0s means no timeout (not recommended).
  timeout = "5s"
EOF
    systemctl enable telegraf.service
    systemctl restart telegraf.service
fi

# make sure config file can be edited by pre-start commands running later (assuming it exists)
if [ -f /etc/teleport.yaml ]; then
  chown teleport:adm /etc/teleport.yaml
fi
