Quine Configuration

Configuration is supported by Typesafe Config, enabling multiple ways to pass in options. Most commonly, configuration is provided via either Java system properties (passed as command-line options) or via a HOCON config file. HOCON is a JSON-like format that is very flexible and human-readable. The reference config below is in HOCON format.

# Example of setting configuration via configuration file
java \
  -Dconfig.file=quine.conf \
  -jar quine-1.7.3.jar

# Example of overriding configuration via system properties
java \
  -Dquine.webserver.port=9000 \
  -Dquine.id.type=uuid-3 \
  -jar quine-1.7.3.jar

# Example of overriding configuration via environment variables
CONFIG_FORCE_quine_webserver_port=9000 \
CONFIG_FORCE_quine_id_type=uuid-5 \
java \
  -Dconfig.override_with_env_vars=true \
  -jar quine-1.7.3.jar

Uncommented values are the defaults, unless otherwise noted. Unexpected configuration keys or values in the quine block will report an error at startup.

A single underscore _ is used to indicate a required property with no default value. There are none of these in the default configuration.

quine {

  # webserver binding configuration
  webserver {
    # whether the webserver should be enabled
    enabled = true

    # Hostname or address of the interface to which the HTTP server should
    # be bound - 0.0.0.0 means "all interfaces"
    # There are two special values which are interpreted dynamically:
    #   1.) "<getHostAddress>" uses the host IP found at runtime
    #   2.) "<getHostName>" uses the host DNS name found at runtime
    address = "0.0.0.0"

    # port to which the HTTP server should be bound
    # setting to `0` will choose an available port at random.
    port = 8080
  }
  # (optional) Configuration to use when advertising this server
  # (e.g., canonical address), if different than bind
  # configuration (e.g., when deployed behind a reverse proxy).
  # webserver-advertise {
    # Hostname or address using which the application should generate
    # user-facing hyperlinks to itself. This should be uniquely
    # resolvable from the end-users' client.
    # address = "localhost"

    # port (on `address`) via which the HTTP server can be reached
    # port = 8080
  # }

  # configuration for the id-assignment scheme the application should use.
  id {
    # one of [uuid-3, uuid-4, uuid-5, long, byte-array, uuid]
    # - uuid-3:     generate UUIDv3s according to the RFC specification
    # - uuid-4:     generate UUIDs labelled as v4, with id() and strId()
    #               returning random UUIDs, and idFrom returning
    #               deterministic UUIDs with version 4 identifying bytes
    # - uuid-5:     generate UUIDv5s according to the RFC specification
    # - long:       generate random integer IDs in the range
    #               [-(2^53-1), 2^53-1] -- these may be safely used as
    #               IEEE double-precision floating-point values without
    #               loss of precision. This id scheme is not appropriate
    #               for large-scale datasets because of the high
    #               likelihood of a collision
    # - byte-array: generate unstructured byte arrays as IDs
    # - uuid:       generate UUIDs with a mix of versions and namespaces
    type = uuid

    # whether the id scheme should be extended with a host-aware
    # partitioning schema. When "true", ids will be prefixed with a
    # "partition" key, and two IDs with the same partition key will
    # always be managed by the same shard
    partitioned = false

    # for uuid-5 and uuid-3 configuration, a UUID namespace may also be
    # set.
    # namespace = "00000000-0000-0000-0000-000000000000"
  }

  # Selects the order edges between nodes are returned in queries
  # one of [reverse-insertion, unordered]
  # reverse-insertion means the edges are returned in the reverse
  # of the order they were added (that is, from newest to oldest).
  edge-iteration = reverse-insertion

  # (optional) The number of nodes in a shard's cache before that shard
  # will begin to expire nodes from its cache.
  in-memory-soft-node-limit = 10000

  # (optional) A limit to the total number of nodes in a shard's cache.
  # Attempts to create a node that would exceed this limit will return
  # an error. This value must always be higher than
  # `in-memory-soft-node-limit`
  in-memory-hard-node-limit = 75000

  # configuration for which data to save about nodes and when to do so
  persistence {
    # whether to save node journals. "true" uses more disk space and
    # enables more functionality, such as historical queries
    journal-enabled = true

    # one of [on-node-sleep, on-node-update, never]. When to save a
    # snapshot of a node's current state, including any DistinctId Standing
    # Queries registered on the node
    snapshot-schedule = on-node-sleep

    # whether only a single snapshot should be retained per-node. If
    # false, one snapshot will be saved at each timestamp against which
    # a historical query is made
    snapshot-singleton = false

    # when to save Standing Query partial result (only applies for the
    # `MultipleValues` mode -- `DistinctId` Standing Queries always save
    # when a node saves a snapshot, regardless of this setting)
    standing-query-schedule = on-node-sleep

    # whether effects in-memory occur before or after updates are confirmed
    # persisted to disk.
    # Possible values: memory-first, persistor-first
    effect-order = persistor-first
  }

  # storage backend / "persistor" configuration. There are several
  # possible "type"s, non-default options are below (commented out)
  store {
    # store data in a local filesystem using RocksDB. This is not
    # supported in a multi-host cluster
    type = rocks-db

    # base folder in which RocksDB data will be stored
    filepath = "quine.db"

    # whether to create any directories in "filepath" that do not yet
    # exist
    create-parent-dir = no

    # whether to use a write-ahead log.
    write-ahead-log = on

    # whether to force all writes to be fully confirmed to disk. This
    # is substantially slower, but maintains data integrity even under
    # power loss (write-ahead-log is enough to maintain integrity due
    # to process crashes).
    sync-all-writes = off

    # if set, the number of nodes for which to optimize node creation
    # latency
    # bloom-filter-size =
  }
  # store {
  #   # store data in an Apache Cassandra instance
  #   type = cassandra
  #
  #   # "host:port" strings at which Cassandra nodes can be accessed from
  #   # the application
  #   endpoints = [
  #     "localhost:9042"
  #   ]
  #
  #   # the keyspace to use
  #   keyspace = quine
  #
  #   # whether the application should create the keyspace if it does not
  #   # yet exist
  #   should-create-keyspace = true
  #
  #   # whether the application should create tables in the keyspace if
  #   # they do not yet exist
  #   should-create-tables = true
  #
  #   # how many copies of each datum the Cassandra cluster should retain
  #   replication-factor = 1
  #
  #   # how many hosts must agree on a datum for Quine to consider that
  #   # datum written/read
  #   write-consistency = LOCAL_QUORUM
  #   read-consistency = LOCAL_QUORUM
  #
  #   # passed through to Cassandra
  #   local-datacenter = "datacenter1"
  #
  #   # how long to wait before considering a write operation failed
  #   write-timeout = "10s"
  #
  #   # how long to wait before considering a read operation failed
  #   read-timeout = "10s"
  #
  #   # if set, the number of nodes for which to optimize node creation
  #   # latency
  #   # bloom-filter-size =
  # }
  # store {
  #   # store data in a memory-mapped local file using MapDB. This is not
  #   # supported in a multi-host cluster
  #   type = map-db
  #
  #   # base filename from which MapDB filenames will be created. For
  #   # example, "quine.db", "quine.db.part.3", etc
  #   filepath = "quine.db"
  #
  #   # whether to create any directories in "filepath" that don't yet exist
  #   create-parent-dir = no
  #
  #   # how many files to use. MapDB performance slows dramatically above
  #   # around 2GB per file
  #   number-partitions = 1
  #
  #   # whether to use a write-ahead log. Does not support Windows hosts.
  #   write-ahead-log = off
  #
  #   # if write-ahead-log = true, how often to commit the write ahead log
  #   commit-interval = "10s"
  #
  #   # if set, the number of nodes for which to optimize node creation
  #   # latency
  #   # bloom-filter-size =
  # }
  # store {
  #   # do not store any data, only use the temporary node cache
  #   # all writes to the persistor will be a no-op.
  #   type = empty
  # }
  # store {
  #   # Use in-memory maps to simulate a local persistor.
  #   type = in-memory
  # }

  # where metrics collected by the application should be reported
  metrics-reporters = [
    {
      # one of [jmx, csv, influxdb, slf4j]
      # jmx will report metrics as namespaced MBeans. Other alternatives
      # are listed (commented out) below
      type = jmx
    }
    # {
    #   # create a csv file for each reported metric
    #   type = csv
    #
    #   # required by csv - the interval at which new rows will be
    #   # written to the CSV file (for example, 200ms)
    #   period = _
    #
    #   # required by csv - the directory in which the csv files should
    #   # be created and written
    #   log-directory = _
    # }
    # {
    #   # report metrics to an influxdb (version 1) database
    #   type = influxdb
    #
    #   # required by influxdb - the interval at which new records will
    #   # be written to the database
    #   period = _
    #
    #   # connection information for the influxdb database
    #   database = metrics
    #   scheme = http
    #   host = localhost
    #   port = 8086
    #
    #   # authentication information for the influxdb database. Both
    #   # fields may be omitted
    #   # user =
    #   # password =
    # }
    # {
    #   # log metrics via an slf4j logger
    #   type = slf4j
    #
    #   # required by slf4j - the interval at which new records will be
    #   # logged
    #   period = _
    #
    #   # logger to which metrics will be logged
    #   logger-name = metrics
    # }
  ]

  # Startup and shutdown timeout for the Quine Application
  # The system will throw an error and exit if any component required
  # to start or shutdown Quine takes longer that this time
  timeout = 2 m

  # the property on a node reserved to store that node's labels.
  labels-property = "__LABEL"

  # the minimum amount of time a node must stay in the cache after
  # being updated
  decline-sleep-when-write-within = 100 ms

  # the minimum amount of time a node must stay in the cache after
  # being accessed
  decline-sleep-when-access-within = 0 ms

  # nodes will wait up to this amount of time before processing messages
  # when at-time is in the future (occurs when there is difference in
  # the system clock across nodes in the cluster)
  max-catch-up-sleep = 2000 ms

  # whether the application should log its current config at startup
  dump-config = no

  # whether on restart quine will resume ingest streams that had been
  # running and were interrupted on previous shutdown
  should-resume-ingest = no

  # which metrics are enabled and their configurations
  metrics {
    # whether to enable debug metrics (i.e., metrics whose collection may slow down
    # the operation of Quine)
    enable-debug-metrics = no
  }

  # configuration for the log sanitizer
  log-config {
    # whether to hide potentially sensitive information in logs (e.g., values
    # derived from ingested records, literals in queries)
    show-unsafe = no

    # whether to show exceptions in logs. These may contain sensitive information
    # and may include verbose stacktraces
    show-exceptions = no

    # the redaction method to use when hiding sensitive information
    redactor = {
        # must be "redact-hide", which replaces potentially sensitive information
        # with a placeholder string "**REDACTED**"
        type = redact-hide
    }
  }

  # send anonymous usage information
  help-make-quine-better = true
}