Skip to main content

Reference 'values.yaml'

The following file references all options in values.yaml helm configuration.

global:
# Spider system version to use
# - use 'latest' to be always up-to-date, frequent upgrades
# - use a date version. Ex: 2020.04.04
version: 2023.09.21
endpoint:
host: spider.mycompany.io # host where ingress will be deployed
publicPath: http://spider.mycompany.io # endpoint on which the UIs and services will be exposed. Ex: http(s)://path.to.server
ip: "" # ip where server runs.
# used when running dev / demo whisperers to inject in host file
# used when running behind a firewall that forces you to use a hostAlias for local-controller to talk with the server
isDemo: false # true to activate demo features (injected whisperers, team and customers, self capture whisperer)
isDev: false # true to activate dev features (change license server)
license:
key: idOfTheLicense
privateKey: 'PEM' # use simple quotes to protect '\n's
jwt: # Key pair to secure communications (jwt)
privateKey: 'PEM'
publicKey: 'PEM'
extraCaCerts: '-----BEGIN CERTIFICATE-----\n...\n-----END CERTIFICATE-----\n' # if using self signed S3 or SMTP
smtp:
# email used to send mails
email: spider.analyzer@gmail.com
# smtp server to use
server: smtp.gmail.com
# port of smtp server
port: 465
# is smtp server secured
secure: true
ignoreTLS: false
type: LOGIN # or OAUTH2
loginAuth:
# login and password to use of smtp server is secured
login: spider.analyzer@gmail.com
password: ""
# oAuth options, if smtp server requires oAuth (gmail does)
oAuth:
user:
clientId:
clientSecret:
accessToken:
refreshToken:
expires: 3600
elasticsearch:
password: password # password for elastic user
stores:
# used for customers, whisperers
config:
shards: 1
replicas: 2
# used for links, sessions, jobs
utils:
shards: 1
replicas: 1
#duration before purge - Links
ttl: 6d
rollover: 5d
# used for infra monitoring, beats and raw status
monitoring:
shards: 3
replicas: 0
#duration before purge
ttl: 6d
rollover: 1d
# used for all coms, except UI uploaded data
streamedData:
- name: default
# how many shards to use, should be <= to the number of servers in your Elasticsearch cluster
shards: 3
# how many replicas to use, should be <= to the number of servers-1.
replicas: 0
ttl: 5d
rollover: 12h
- name: shortLife
shards: 3
replicas: 0
ttl: 1d
rollover: 1d
- name: longLife
shards: 1
replicas: 0
ttl: 15d
rollover: 1d
# used for uploaded
uploadedData:
- name: default
shards: 1
replicas: 0
ttl: 5d
registries:
serverRegistryAuth:
login: test # login to connect to Spider docker registry for servers
password: token # password/token to connect to Spider docker registry
whispererRegistryAuth:
login: test # login to connect to Spider docker registry for whisperers
password: token # password/token to connect to Spider docker registry
ingress:
useTls: true
tlsSecretName: mySecret # secret for TlS ingress, to be provided outside Spider
className: nginx # name of ingressController to use
annotations: {}
customers:
# array of mails used to send notification mails:
# - account creation notification
supportEmails:
- thibaut.raballand@gmail.com
# if LDAP auth is used
ldapAuth:
active: false
uri: ldap://spider.io:20389
# base path of the users
base: ou=users,dc=streetsmart,dc=global
#other options are available if needed
# if OIDC auth is used
oidcAuth:
active: true
providers:
- name: Google
client_id: ""
client_secret: ""
authorization_endpoint: "https://accounts.google.com/o/oauth2/v2/auth"
token_endpoint: "https://oauth2.googleapis.com/token"
jwks_uri: "https://www.googleapis.com/oauth2/v3/certs"
accounts:
# should account creation form be displayed on login page
creationForm: true
# should reset password form be displayed on login page
# set to no of you only use LDAP auth
resetPasswordForm: true
# should accounts require admin activation
# is not taken into account for LDAP accounts
adminActivation: false
# should send mail to supportEmails on account creation
mailOnCreation: true
teams:
# should send mail to supportEmails on team creation
mailOnCreation: true
admins: # create an admin account at installation (only then)
createAccountsAtInstallation: false
accounts:
- givenName: ""
familyName: ""
email: ""
# when ldapAuth, account is created as admin, and user will authenticate with LDAP
# when Spider auth, account is created with random password,
# and user can reset password with standard feature
ldapAuth: false
controller:
privateKey: 'PEM' # private pem for controller security, same format as jwt key and license key above
publicKey: 'PEM' # public pem for controller security
namespaces: # see controller documentation
blackList:
- spider-system
- kubernetes-dashboard
- cattle.*
backup:
active: true # backups configuration data everyday to S3
restoreAtInstallation:
active: false # reloads previous backed up data at installation
date: "2023-01-31" # data of backup restoration
alerting:
tooManyLogs:
# Alerts when last minutes are showing too many errors
active: true
maxErrorLogsPerMin: 10
noNewStatus:
# Alerts when Whisperers are not sending anything
active: true
minStatusPerMin: 5
infraChange:
# Alerts when servers are added / removed
active: true
lowESFreeSpace:
# Alerts when ES is getting low free space
active: true
minESFreeSpaceInGB: 10
healthcheckES:
# Alerts when ES health is yellow or red
active: true
healthcheckRedis:
# Alerts when the free memory in Redis gets too low
active: true
minRedisFreeMemoryInMB: 100
parsingDelay:
# Alerts when the delay for parsing gets too long
active: true
threshold: PT30S
licenseInvalid:
# Alerts when license is getting invalid in the month
active: true
oldLicenseStats:
# Alerts when license stats are too old
active: true
threshold: PT13H
pollingQueued:
# Alerts when too many items are in the queues
active: true
threshold: 20000
gui:
networkview:
#Tell if usage statistics for Users sessions sent to server default to anonymous or with user's id+email
namedStatistics: false
pluginsStore:
#Tell if plugins store should be global (in the cloud) or local to this instance
global: true
api:
authenticatedHealthApi: false
tuning:
shutdownDelay: 3 # duration in s before shutting down services (allow scaledown without issue)
# Tell if ES indices are sharded / routed by the whisperers id.
# Activate if you have many different whisperers over many shards to route indexing and searching on current indices
# Deactivate if you have few whisperers and you want to scale indexing over all shards
useWhispererRoutingWithES: false
# Tell if Spider should compress Payloads before saving in Redis
# See documentation for details and effects
compressRedisPayloads:
packets: false
tcpSessions: false
httpComs: true
httpComContents: true
httpPers: true
# Tell if most used APIs calls should be compressed
# See documentation/blog for details and effects
compressApis:
packRead:
getPacketsOfTcpSession: false
tcpUpdate:
postParsingJob: true
webWrite:
tcpSessions: true
webWriteWarnings:
tcpSessions: true

############# Spider infrastructure

# Elastic, Kibana, Filebeat, Metricbeat
elasticsearch:
nodes: 1 # count of requested nodes in the cluster
resources:
requests:
cpu: 1 # cpu requests and limit
memory: 1Gi # memory size request (Gi)
limits:
cpu: 1 # cpu requests and limit
memory: 1Gi # memory size request (Gi)
volume:
size: 1Gi # size (Gi) of volume to create for each node
storageClassName: # storageClass to use

# Avaiable redis in memory stores
redis:
redisPack:
maxmemory: 1Gb
redisTcp:
maxmemory: 1Gb
redisShared:
maxmemory: 1Gb

# S3 backup
s3Backup:
accessKeyId:
secretAccessKey:
region:
endpoint:
bucket:

# Traefik gateway
gateway:
replicas:
min: 1
max: 20
cpuThreshold: 500m

# Spider microservices
# Extra Kubernetes options are available for taints, debug and demo
# Below are classical options that you might want to change
services:
captureStatusPoller:
autoscaling:
min: 1
max: 20
cpuThreshold: 300m
threadsCount: 1
config:
autoscaling:
min: 1
max: 20
cpuThreshold: 500m
customer:
autoscaling:
min: 1
max: 20
cpuThreshold: 300m
guiLogs:
autoscaling:
min: 1
max: 20
cpuThreshold: 300m
guiSettings:
autoscaling:
min: 1
max: 20
cpuThreshold: 300m
hosts:
autoscaling:
min: 1
max: 20
cpuThreshold: 300m
hostsPoller:
autoscaling:
min: 1
max: 20
cpuThreshold: 300m
threadsCount: 1
jobs:
autoscaling:
min: 1
max: 20
cpuThreshold: 300m
links:
autoscaling:
min: 1
max: 20
cpuThreshold: 300m
mailSender:
autoscaling:
min: 1
max: 20
cpuThreshold: 300m
monitorRead:
autoscaling:
min: 1
max: 20
cpuThreshold: 300m
packPoller:
autoscaling:
min: 1
max: 20
cpuThreshold: 300m
threadsCount: 1
packRead:
autoscaling:
min: 1
max: 20
cpuThreshold: 300m
packUpdate:
autoscaling:
min: 1
max: 20
cpuThreshold: 300m
packWrite:
autoscaling:
min: 1
max: 20
cpuThreshold: 300m
parsingStatusHttpPersPoller:
autoscaling:
min: 1
max: 20
cpuThreshold: 300m
threadsCount: 1
parsingStatusTcpSessionPoller:
autoscaling:
min: 1
max: 20
cpuThreshold: 300m
threadsCount: 1
plugins:
autoscaling:
min: 1
max: 20
cpuThreshold: 300m
sessions:
autoscaling:
min: 1
max: 20
cpuThreshold: 300m
tcpPoller:
autoscaling:
min: 1
max: 20
cpuThreshold: 300m
threadsCount: 1
tcpRead:
autoscaling:
min: 1
max: 20
cpuThreshold: 300m
tcpUpdate:
autoscaling:
min: 1
max: 20
cpuThreshold: 300m
tcpWrite:
autoscaling:
min: 1
max: 20
cpuThreshold: 300m
teams:
autoscaling:
min: 1
max: 20
cpuThreshold: 300m
webHttpComContentPoller:
autoscaling:
min: 1
max: 20
cpuThreshold: 300m
threadsCount: 1
webHttpComPoller:
autoscaling:
min: 1
max: 20
cpuThreshold: 300m
threadsCount: 1
webHttpPersPoller:
autoscaling:
min: 1
max: 20
cpuThreshold: 300m
threadsCount: 1
webRead:
autoscaling:
min: 1
max: 20
cpuThreshold: 300m
webUpload:
autoscaling:
min: 1
max: 20
cpuThreshold: 300m
webWrite:
autoscaling:
min: 1
max: 20
cpuThreshold: 300m
webWriteWarnings:
autoscaling:
min: 1
max: 20
cpuThreshold: 300m
whispStatusPoller:
autoscaling:
min: 1
max: 20
cpuThreshold: 300m
threadsCount: 1
whisp:
autoscaling:
min: 1
max: 20
cpuThreshold: 300m
whispsStatus:
autoscaling:
min: 1
max: 20
cpuThreshold: 300m

Version

global:
# Spider system version to use
# - use 'latest' to be always up-to-date, frequent upgrades
# - use a date version. Ex: 2020.04.04
version: 2023.04.11

The version specifies what version of Spider to deploy.
Spider uses systems version, which means that all components are tagged with the same version, ensuring consistency of the release.

Obtaining the versions list

Latest stable version is listed on Spider homepage.
Available systems versions are listed in CHANGELOG

caution

For now, only the version latest and the last dated stable are guaranteed to be available.

Previous versions may be removed from the store without warning.

Endpoint

global:
endpoint:
host: spider.mycompany.io # host where ingress will be deployed
publicPath: http://spider.mycompany.io # endpoint on which the UIs and services will be exposed. Ex: http(s)://path.to.server
ip: "" # ip where server runs. used when running dev / demo whisperers to inject in host file
  • publicPath and host are often related values, but they may be different.
  • When ip is set, a HostAlias is created for the local-controller to talk to the server

License

global:
license:
key: idOfTheLicense
privateKey: 'PEM' # use simple quotes to protect '\n's

License is obtained by purchasing Spider at Floocus.

The key identifies your license, and the privateKey is used to secure the authentication with Floocus central system.
Without a license, Spider does not capture nor parse communications.

note

As for the JWT keys, the PEM is expected to be given as a one-liner with \n.

Example:

global:
jwt:
publicKey: '-----BEGIN RSA PUBLIC KEY-----\nMIGJAoGBAL8sa4fGv/dWCwe8Ssff+bIxWWEPVIfGByhPd0MbEMVyZk0p2Ex4Wokn\naDxqax2+am+SYRoQcgBR8MUMlopGYzoioWvqR8kaBR4SPU2v34MRlGxLE/YMs4tJ\ntg7r/wdVa2IXjUoZcFoC0IqdSmwQuYKWQlNOEvcCxe0MfERS2JzfAgMBAAE=\n-----END RSA PUBLIC KEY-----'

JWT keys

global:
jwt: # Key pair to secure communications (jwt)
privateKey: 'PEM'
publicKey: 'PEM'

This keypair is used to sign JWT tokens issues by customer, team and whisperer services for their related identity tokens.

Generating JWT key pair with OpenSSL

To generate the key pair, you may use OpenSSL:

Private key
$ openssl genrsa -out privateKey.pem 2048
Generating RSA private key, 2048 bit long modulus
..........+++
..........................................................................+++
e is 65537 (0x10001)
Public key
$ openssl rsa -in privateKey.pem -outform PEM -pubout -out publicKey.pem
writing RSA key

You need then to transform the PEM files to provide them as values:

$ cat publicKey.pem | sed ':a;N;$!ba;s/\n/\\n/g'
$ cat privateKey.pem | sed ':a;N;$!ba;s/\n/\\n/g'

Demo mode

global:
isDemo: false # true to activate demo features (injected whisperers, team and customers, self capture whisperer)

When set to true:

  • A Demo team is created
  • 2 users are created
    • Owner user, that owns whisperers
    • Basic user, that are only shared access
  • 3 whisperers are created
    • Self capture, belonging to Demo team
    • Own whisp, belonging to Owner user
    • Upload, belonging to Owner user

Self capture whisperers is capturing a few services from Spider itself:

  • Web-read
  • Hosts
  • Whisp

These resources allow you to test with Spider configuration and capture without any system to capture.

Dev mode

global:
isDev: false # true to activate dev features (change license server)

When set, Spider connects to a Floocus development license server, and some debug features are made available.

Extra CA

global:
extraCaCerts: '-----BEGIN CERTIFICATE-----\n...\n-----END CERTIFICATE-----\n' # if self signed S3 or SMTP

If you have secured your S3 or SMTP server with a custom Certificate Authority, you may provide its certificate in extraCaCerts so that Spider will accept connecting to them.

SMTP

global:
smtp:
# email used to send mails
email: spider.analyzer@gmail.com
# smtp server to use
server: smtp.gmail.com
# port of smtp server
port: 465
# is smtp server secured
secure: true
ignoreTLS: false
type: LOGIN # or OAUTH2
loginAuth:
# login and password to use of smtp server is secured
login: spider.analyzer@gmail.com
password: ""
# oAuth options, if smtp server requires oAuth (gmail does)
oAuth:
user:
clientId:
clientSecret:
accessToken:
refreshToken:
expires: 3600

Spider uses SMTP to send mail notifications for:

  • Accounts changes
  • Teams changes
  • Alerting
  • ...

The main configuration options are self explanatory:

  • email - address used as 'sender' when sending email
  • server & port - smtp server
  • secure - use secure SMTP protocol (with TLS) or not, with credentials
  • ignoreTLS - accept self-signed certificate

Then, the type tells if the authentication when secured is done using LOGIN auth or OAUTH2.

  • In LOGIN, specify the loginAuth.login and loginAuth.password params.
  • In OAUTH2, specify the oAuth.* parameters.

Elasticsearch indices setup

global:
elasticsearch:
password: password # password for elastic user
stores:
# used for customers, whisperers
config:
shards: 1
replicas: 2
# used for links, sessions, jobs
utils:
shards: 1
replicas: 1
#duration before purge - Links
ttl: 6d
rollover: 5d
# used for infra monitoring, beats and raw status
monitoring:
shards: 3
replicas: 1
#duration before purge
ttl: 6d
rollover: 1d
# used for all coms, except UI uploaded data
streamedData:
- name: default
# how many shards to use, should be <= to the number of servers in your Elasticsearch cluster
shards: 3
# how many replicas to use, should be <= to the number of servers-1.
replicas: 0
ttl: 5d
rollover: 12h
- name: shortLife
shards: 3
replicas: 0
ttl: 1d
rollover: 1d
- name: longLife
shards: 1
replicas: 0
ttl: 15d
rollover: 1d
# used for uploaded
uploadedData:
- name: default
shards: 1
replicas: 0
ttl: 5d

This part configures the indices used by Spider to store data.

Password

The password that will be used for the elastic user used by Spider to connect to Elasticsearch.

  • You might also connect to Kibana with it
  • Use a strong one!

We recommend you to create other users in Kibana on your own, should you want to perform direct queries on the captured data.

Shards & replicas

Indices, in Elasticsearch, may have:

  • shards - to spread data across nodes.
    • Should be lower than the number of nodes in the cluster
    • Should be > 1 only for big data indices
  • replicas - to have high availability, and read copy of the data for more throughput in search
caution

Having replicas of streamed data will lead to A LOT OF volume requirements!
All data is copied to all replicas.

Index lifecycle management

Spider indices are already optimised with their own Index Lifecycle Management rules (ILM).
ILM define how the time based indices move from a write optimisation to a read optimisation during their life, and when they finally are deleted.
They allow continuous storage of Spider data with a limit on how long data is stored, allowing to control the volume required.

ILM have a few parameters to tune them:

  • ttl - duration of data availability
  • rollover - duration of the 'write' index (and of each index in the system), before it becomes read only
    • When the load is big, you should reduce this duration, to have smaller indices.

Index categories

The indices are split in 4 categories:

  • config
    • Indices that store the configuration data (customers, teams, whisperers...)
    • They don't have ILM as the data is kept forever (except deleted data).
  • utils
    • Indices used to store data for UI (links, jobs...)
  • monitoring
    • Indices used to store all monitoring data
  • streamedData
    • Indices that store the captured communication data (HTTP communication, TCP sessions, Packets, Hosts, Parsing status...)
    • They have an ILM defined
  • uploadedData
    • Indices that store the communication data uploaded from the UI (HTTP communication, TCP sessions, Packets, Hosts, Parsing status...)
    • Upload indices have a specific ILM with - usually - a longer TTL

Data Storage Policies

streamedData and uploadedData sections are arrays.

Indeed, you may define here several storage policies, each with a distinct name and a different TTL.
This allows having part of your captured data stored with different retention time depending on the Whisperer that captures them.

Rules:

  • You have to setup DSP for both streamedData and uploadedData.
  • You have to define a default DSP, or Spider pollers will not start.
  • The name you give is used
    • to name Elasticsearch ILM, templates and indices
    • to display in the Whisperer configuration tabs
  • The name may contain uppercase letters, but not spaces. They must be unique.

::: warning No control is made on the uniqueness or on the absence of spaces. Result will be bad 😅 :::

Limits:

  • When removing a policy,
    • the existing data indices are not removed straight, they will be removed at the end of their existing TTL
    • the existing Elasticsearch objects (templates, ILM...) are not removed automatically
  • When updating a policy
    • the existing data will stay associated with the policy at time of their indexing
    • only the new data will be associated to the new policy
warning

Take care not to have too many different policies:

  • Each policy will create a set of indices to store data and the number of indices an Elasticsearch cluster may manage is limited
  • When searching data, the search will be done on indices linked to ALL data policies. The more, the slower.

Registries

registries:
serverRegistryAuth:
login: test # login to connect to Spider docker registry for servers
password: token # password/token to connect to Spider docker registry

Sets the credentials to connect to Spider Docker registries.

  • serverRegistryAuth is mandatory as it is required to download Spider services images.

The tokens are issued with Spider license.

Ingress

ingress:
useTls: true
tlsSecretName: mySecret # secret for TlS ingress, to be provided outside Spider
className: nginx # name of ingressController to use
annotations: {}

Spider needs to expose its own gateway for Whisperers to upload data, and the UIs to be accessible.

The ingress may or not be HTTPS, using Tls to secure communications.
In this case, the tlsSecretName is required to associate the SSL certificate to the ingress.
The secret being often created (and even reflected) from certmanager or another tool, only the name is expected here.

Spider is not coupled to any ingress className.
Even if Spider uses Traefik internally as API gateway, the later may be exposed using any other ingress, as in the example above, using nginx.

Customers

The customers block allows configuring:

  • Login form options
  • Connection means
  • Creation of administrators

Options

customers:
# array of mails used to send notification mails:
# - account creation notification
supportEmails:
- thibaut.raballand@gmail.com
accounts:
# should account creation form be displayed on login page
creationForm: true
# should reset password form be displayed on login page
# set to no of you only use LDAP auth
resetPasswordForm: true
# should accounts require admin activation
# is not taken into account for LDAP accounts
adminActivation: false
# should send mail to supportEmails on account creation
mailOnCreation: true
teams:
# should send mail to supportEmails on team creation
mailOnCreation: true

Accounts creation

  • accounts.creationForm will display a linked to allow a user to create its own Spider account on the login page.

When not active, then account creation may only be done through:

  • Manual account creation by an administrator on the UI.
  • Automatic account creation by LDAP authentication.

You may receive a mail in all account creations with accounts.mailOnCreation option. Then all supportEmails will be notified.

Once the account is created, you may wish to validate the account manually.

  • This is done by setting accounts.adminActivation option
  • The mails listed in supportEmails will be notified that an account has to be activated
  • Activation is done by editing the user profile on the UI and switching its state from DRAFT to ACTIVE.

Teams creation

You may also be notified on teams creation using teams.mailOnCreation option.

Forgot your password

accounts.resetPasswordForm activates a link on the login page to reset a users password in case of a forgotten password.

Process:

  1. The user enters his/her email address
  2. A mail is sent to the user with link to reset its password
    • The link includes a limited in time token
  3. The link routes the user to a form to set its password (with confirmation)
  4. The new password is sent to the server with the token id. The token is checked before accepting the new password.
caution

This feature is not possible for LDAP authenticated users.

Authentication means

LDAP

customers:
# if LDAP auth is used
ldapAuth:
active: false
uri: ldap://spider.io:20389
# base path of the users
base: ou=users,dc=streetsmart,dc=global

Users may authenticate natively to Spider, but they also may connect with a LDAP account.

The options above allows LDAP configuration.
The base option sets where the user account are located in the LDAP.

Process:

  1. At login, Spider searches the user in the LDAP using the email provided in input.
  2. If a user is found in the LDAP, Spider tries to bind the user with its password.
  3. If successful, Spider checks if the user exists in its own database to:
    • Either connect it,
    • Or create a new account, in case of a new user.

OpenId Connect

customers:
oidcAuth:
active: true
providers:
- name: Google
client_id: ""
client_secret: ""
authorization_endpoint: "https://accounts.google.com/o/oauth2/v2/auth"
token_endpoint: "https://oauth2.googleapis.com/token"
jwks_uri: "https://www.googleapis.com/oauth2/v3/certs"
- name: Gitlab
client_id: ""
client_secret: ""
authorization_endpoint: "https://gitlab.com/oauth/authorize"
token_endpoint: "https://gitlab.com/oauth/token"
jwks_uri: "https://gitlab.com/oauth/discovery/keys"
- name: GitHub
client_id: ""
client_secret: ""
authorization_endpoint: "https://github.com/login/oauth/authorize"
token_endpoint: "https://github.com/login/oauth/access_token"
userinfo_endpoint: "https://api.github.com/user"
jwks_uri: "https://token.actions.githubusercontent.com/.well-known/jwks"
scope: "openid email profile user"

Users may also authenticate to an external Identity Provider (IP) using OpenId Connect protocol.

Configuration

The options above demonstrate several OIDC configurations at once.

  • You may define as many IPs as you need
  • The options naming is matching standard labels for OIDC parameters
  • client_id and client_secret are specific to Spider (see below), and generated by the IP
  • The URLs are often listed in a well-known endpoint such as
  • The scope is the list of OAuth2 scope requested by Spider when connected
    • They must allow fetching the email, family_name, given_name of a connected user
    • Usually:
      • openid allows getting an id_token from the original code received, while calling the token_endpoint
      • email is required for Spider to authenticate the user
      • profile or user are often required to get the user info to create the account
        • If not provided, Spider will create the account with CHANGE ME value
  • scope and userinfo_endpoint are optional

To get the values to fill in, usually, you have to:

  1. Register an application in your identity provider application / client list
  2. Give it a name, such as Spider Analyzer
  3. Provide the application URL that the IP will redirect to.
    • For Spider: https://{global.endpoint.publicPath}/login/ and https://{global.endpoint.publicPath}/login if you are using https
    • Replace global.endpoint.publicPath by the value you've set earlier 😉
  4. Get in return the client_id and client_secret to provide here
  5. Find the .well-kown/openid-configuration of your IP
Authentication process

OIDC architecture.png

Spider implements the authorization code grant:

  1. Network View and Monitoring UI redirects to Login UI
  • With user email and identity provider (IP) if the user was previously connected with OIDC
  1. Login UI redirects to the IP to get a code, using authorization_endpoint, client_id and scope
  • Automatically with an email hint when email has provided by the UIs
  • When user click on the IP name in the login page
  1. The IP validate the user session or display a login page, then validates the credentials.
  2. The IP redirects to Login UI with a code
  3. Login UI calls Customers service with the code and provider name to get the user token
  4. Customers service takes the code and exchange it to an access_token and and id_token using the client_id and client_secret
  5. Customers service validates the id_token with the public keys provided on jwks_uri
  6. Customers service gets the email from the id_token or the userinfo_endpoint using the access_token when the former is not provided
  7. Customers sends a signed Spider token when everything goes well
  8. The token is saved in local storage by the UI then used between Spider components as usual. Login redirects back to orginal UI.

This flow allows to keep the secret in the backend, not revealing it on the client side.

When a user is using a token generated with an OIDC flow, on page load or refresh, it redirects to the login UI with predefined parameters to check if the IP connection is still valid.
It does it with a grace period of 1 minute after login. Not to do infinite redirects 😅

note

Spider does not support Single Log Out, nor Front or Back-channel logouts.

Logging out from Spider will only log the user out from Spider.
By choice.

Admin creation

customers:
admins: # create an admin account at installation (only then)
createAccountsAtInstallation: false
accounts:
- givenName: ""
familyName: ""
email: ""
# when ldapAuth, account is created as admin, and user will authenticate with LDAP
# when Spider auth, account is created with random password,
# and user can reset password with standard feature
ldapAuth: false

When installing Spider, no 'standard' or 'generic' administration account exists. That ensures better security.

You may set a list of accounts to 'pre-create' at installation.
These account will be created as administrators, with a random, unknown password.

Then,

  • Either the administrator set its own password using the Reset password form.
  • Either it is connecting with LDAP (when set)

This process allows to never share or communicate an administrator password.

Controller

controller:
privateKey: 'PEM'
publicKey: 'PEM'

When installing Spider, it automatically setups a Controller on the cluster it is installed.

The Controller is used to attach Whisperers to any Workload of the cluster through the UI.
The Controller needs two keys to allow it to access the server:

  • The privateKey for its secret configuration
  • The publicKey is saved in the Controller resource in the server

See Generating JWT key pair with OpenSSL.

Limit namespaces

controller:
namespaces:
blackList:
- spider-system
- kubernetes-dashboard
- cattle.*
whiteList:
- myNamespace

You may limit on which namespaces the Whisperers may be spawned using blackList and whiteList.
They both accept regular expressions:

  • blacklist prevents using these namespaces
  • whiteList limits access to only these namespaces

Resource generation

controller:
createLocalController: true

The local controller resource record is generated by Helm and injected in Elasticsearch the first time only.
Its template includes the controller date of creation set to the generation date.

warning

This 'dynamic' date causes issue with Argo CD as it considers the deployment out of sync due to the ever-changing date.

To avoid this out-of-sync, you may set createLocalController to false after the initial setup.
This will remove the generation of the record by Helm.

Backup

backup:
active: true # backups configuration data everyday to S3

backup.active activates regular backup of configuration resources.

  • Backup is done every day to the S3 store configured at s3Backup key in the infrastructure part of the values file.
  • The configuration resources saved are:
    • Customers
    • Teams
    • Whisperers
    • UiSettings
  • They are saved as plain bulk creation request for Elasticsearch
  • A new file is saved every day
caution

No automatic removal of old files is done.
We advise you to set some bucket lifecycle policy to automatically remove oldest files.

Restore at installation

backup:
restoreAtInstallation:
active: false # reloads previous backed up data at installation
date: "2023-01-31" # data of backup restoration

You may wish to restore a previous backup when installing Spider.
The backup files from the reuested date are loaded from the S3 configured in s3Backup key.

This is done by a post install Kubernetes Job that runs only at Helm installation, and that waits for Elasticsearch (and indices) availability.

caution

Do not combine automatic administrator accounts creation and backup restoration if administrator accounts exist in the backup.

Then two accounts would exist with the same email, which is not managed by Spider.

Alerting

alerting:
tooManyLogs:
# Alerts when last minutes are showing too many errors
active: true
maxErrorLogsPerMin: 10
noNewStatus:
# Alerts when Whisperers are not sending anything
active: true
minStatusPerMin: 5
infraChange:
# Alerts when servers are added / removed
active: true
lowESFreeSpace:
# Alerts when ES is getting low free space
active: true
minESFreeSpaceInGB: 10
healthcheckES:
# Alerts when ES health is yellow or red
active: true
healthcheckRedis:
# Alerts when the free memory in Redis gets too low
active: true
minRedisFreeMemoryInMB: 100
parsingDelay:
# Alerts when the delay for parsing gets too long
active: true
threshold: PT30S
licenseInvalid:
# Alerts when license is getting invalid in the month
active: true
oldLicenseStats:
# Alerts when license stats are too old
active: true
threshold: PT13H
pollingQueued:
# Alerts when too many items are in the queues
active: true
threshold: 20000

Alerting let you configure the alerts to activate and their thresholds.
See Alerting for a complete documentation.

GUI options

gui:
networkview:
#Tell if usage statistics for Users sessions sent to server default to anonymous or with user's id+email
namedStatistics: false
pluginsStore:
#Tell if plugins store should be global (in the cloud) or local to this instance
global: true

gui.networkview.namedstatistics tells if statistics captured on the ui (actions clicked, options active, whisperers selected) are associated with the user id, or an anonymous random id.
This set the 'default setting'. The user may always change its own setting.

gui.networkview.pluginsStore.global tells if the plugins store to search and store plugins is global, Floocus one, or a local one (from current setup).
See Plugins Store from more information.

API options

api:
authenticatedHealthApi: false

authenticatedHealthApi tells if the /health API should be exposed with authentication or not.
If set to true, then a Spider token will be expected, but not authorization check will be perfomed.

Tuning

tuning:
shutdownDelay: 3 # duration in s before shutting down services (allow scaledown without issue)
# Tell if ES indices are sharded / routed by the whisperers id.
# Activate if you have many different whisperers over many shards to route indexing and searching on current indices
# Deactivate if you have few whisperers and you want to scale indexing over all shards
useWhispererRoutingWithES: false

Shutdown Delay

shutdownDelay defines how long to wait for services before shutting down.
You may need to increase it to avoid errors when autoscaling scales down the nodes.

info

This depends on your Kubernetes distribution:

  • On AWS EKS, 1s seems enough.
  • On Nutanix Karbon, setting it to 10s was required.

See this blog post for more details.

Sharding routing

useWhispererRoutingWithES tells if Spider should use Whisperers' id to route the data across ES shards.

Should be used only for high cardinality setups.
See this blog item on the topic.

Compress Redis payloads

tuning:
compressRedisPayloads:
packets: false
tcpSessions: false
httpComs: true
httpComContents: true
httpPers: true

Allow activating or not Snappy compression of payloads stored in Redis.
See study in this blog post.

Compress APIs

tuning:
packRead:
getPacketsOfTcpSession: false
tcpUpdate:
postParsingJob: true
webWrite:
tcpSessions: true
webWriteWarnings:
tcpSessions: true

Activate or not compression of main used internal API calls.
See study in this blog post.

Infrastructure setup

This part explains infrastructure options of Spider setup.

Elasticsearch

# Elastic, Kibana, Filebeat, Metricbeat
elasticsearch:
nodes: 1 # count of requested nodes in the cluster
resources:
requests:
cpu: 1 # cpu requests and limit
memory: 1Gi # memory size request (Gi)
limits:
cpu: 1 # cpu requests and limit
memory: 1Gi # memory size request (Gi)
volume:
size: 1Gi # size (Gi) of volume to create for each node
storageClassName: # storageClass to use

Those settings allow to set:

  • The number of nodes in the Elasticsearch cluster.
    • They will all be master, data and ingest with current settings.
    • The more nodes, the more ingestion capabilities you will have.
  • Kubernetes resources for each node.
  • The persistent volume claim for each node. With its size and storageClassName.
    • Depending on the following settings for retention time of indices, you might need hundreds of Gigabytes.

Redis

redis:
redisPack:
maxmemory: 1Gb
redisTcp:
maxmemory: 1Gb
redisShared:
maxmemory: 1Gb

As current configuration, Spider uses 3 Redis nodes, without High Availability.

  • redisPack - stores unitary Packets data
  • redisTcp - stores Tcp sessions and parsing status
  • redisShared - stores all other data

You may change the maximum memory used by Redis.
When Redis reaches this amount, it starts dropping data.

S3

# S3 backup
s3Backup:
accessKeyId:
secretAccessKey:
region:
endpoint: # include prefix "https://" if you are connecting to secure endpoint
bucket:

These settings define connection settings to the S3 - compatible - store to backup and restore configuration data.

  • region may not be required if you have a local S3 compatible setup
  • endpoint requires https:// prefix to connect to a secured endpoint. Otherwise, you might end up with ERR_INVALID_URL errors.

Gateway

# Traefik gateway
gateway:
replicas:
min: 1
max: 20
cpuThreshold: 500m

Spider uses Traefik to expose:

  • APIs for the Whisperers
  • APIs for the UI
  • Kibana

You may define its autoscaling settings.
You may also customise the services to expose by customising servicesToExpose key (advanced configuration).

gateway:
servicesToExpose:
- name: customer
- name: gui-logs
- name: gui-settings
- name: hosts
- name: link
- name: login
- name: job
- name: monitor-read
- name: networkview
prefix: network
- name: pack-read
- name: pack-write
- name: plugins
- name: self-monitoring
- name: session
- name: tcp-read
- name: tcp-write
- name: teams
- name: web-read
- name: web-upload
- name: whisp
- name: whisps-status
- name: kibana-kb-http
prefix: kibana
port: 5601

Services

# Spider microservices
# Extra Kubernetes options are available for taints, debug and demo
# Below are classical options that you might want to change
services:
captureStatusPoller:
autoscaling:
min: 1
max: 20
cpuThreshold: 300m
resources:
requests:
cpu: 2m
memory: 110M
threadsCount: 1
extraConfiguration: {}
...: ...

autoscaling let you customise the scaling capabilities of Spider microservices.

  • The microservices will autoscale to have a maximum of cpuThreshold cpu usage for each replica of the service.
  • You may set the minimum and maximum count of replicas.

resources let you change the requests Kubernetes resources for each service.

threadsCount let you define you many 'threads' are polling in parallel in the same poller.
This setting allows you to scale polling without adding more PODs and using much more RAM.
If the threadsCount is too high, the CPU usage of the poller may raise above the cpuThreshold... and it will scale.

extraConfiguration let you override any default configuration of any service.

Services list

The complete services list is available in Services configuration.

In the helmchart:

  • The default values.yaml of the main Chart lists the services and settings that you may change.
  • The values.yaml in the sub-chart spider-microservices list all services, but you should avoid playing with the settings.

Dev & demo

Other options are available for development and demo, but they are out of this page scope.