# # $Id# # # Copyright 2008-2014,2016 Quantcast Corporation. All rights reserved. # # Author: Mike Ovsiannikov # # This file is part of Quantcast File System (QFS). # # Licensed under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or # implied. See the License for the specific language governing # permissions and limitations under the License. # # The metaserver configuration. # Additional information can be found in # https://github.com/quantcast/qfs/wiki/Configuration-Reference. # Client listener port. metaServer.clientPort = 20000 # Client connection listener ip address to bind to. # Use :: to bind to ipv6 address any. # Default is empty, treated as 0.0.0.0 ipv4 address any, unless the following # parameter metaServer.clientIpV6Only set to 1 # metaServer.clientIp = # Accept ipv4 client connections. # Default is 0, enable acception ipv4 connection. Only has effect if # metaServer.clientIp left empty, or set to :: # metaServer.clientIpV6Only = 0 # Chunk server connection listener ip address to bind to. # Use :: to bind to ipv6 address any. # Default is empty, treated as 0.0.0.0 ipv4 address any, unless the following # parameter metaServer.chunkServerIpV6Only set to 1 # metaServer.chunkServerIp = # Accept ipv4 chunk servers connections. # Default is 0, enable acception ipv4 connection. Only has effect if # metaServer.chunkServerIp left empty, or set to :: # metaServer.chunkServerIpV6Only = 0 # Chunk server listener port. metaServer.chunkServerPort = 30000 # Meta serve transactions log directory. metaServer.logDir = meta/transaction_logs # Meta server checkpoint directory. metaServer.cpDir = meta/checkpoint # Allow to automatically create an empty file system if checkpoint file does # not exist. # The default is 0, as under the normal circumstances where the file system # content is of value, completely losing checkpoint, transaction log, and # automatically creating an empty fs will have the same effect as conventional # "mkfs". All chunks (blocks) will get deleted, and restoring the checkpoint # and logs later won't be sufficient to recover the data. # Use "-c" command line option to create new empty file system. For example: # metaserver -c MetaServer.prp # metaServer.createEmptyFs = 0 # Root directory permissions -- used only when the new file system created. # metaServer.rootDirUser = 0 # metaServer.rootDirGroup = 0 # metaServer.rootDirMode = 0755 # Defaults for checkpoint and transaction log without permissions conversion on # startup. # metaServer.defaultLoadUser = 0 # metaServer.defaultLoadGroup = 0 # metaServer.defaultLoadFileMode = 0644 # metaServer.defaultLoadDirMode = 0755 # The size of the "client" thread pool. # When set to greater than 0, dedicated threads to do client network io, request # parsing, and response assembly are created. The thread pool size should # usually be (at least one) less than the number of CPUs. "Client" threads help # with processing large amount of ["short"] requests where more cpu used for # context switch, network io, request parsing, and response assembly, than # the cpu for the request processing itself. For example i-node attribute # lookup, or write append chunk allocations that can be satisfied from the write # append allocation cache. # Default is 0 -- no dedicated "client" threads. # metaServer.clientThreadCount = 0 # Meta server threads affinity. # Presently only supported on linux. # The first cpu index to set thread affinity to. # The main thread will be assigned to the cpu at the specified index, then the # next "client" thread will be assigned to the cpu index plus one and so on. # For example with 2 client threads and start cpu index 0 the threads affinity # would be 0 1 2 respectively. # Useful on machines with more than one multi-core processor with shared dram # cache. Assigning the threads to the same processor might help minimize dram # cache misses. # Default is off (start index less than 0) no thread affinity set. # metaServer.clientThreadStartCpuAffinity = -1 # Meta server process max. locked memory. # If set to a value greater than 0 then locked memory limit will be set to the # specified value, and mlock(MCL_CURRENT|MCL_FUTURE) invoked. # On linux running under non root user setting locked memory "hard" limit # greater or equal to the specified value required. ulimit -l can be used for # example. # Default is 0 -- no memory locking. # metaServer.maxLockedMemory = 0 # Size of [network] io buffer pool. # The default buffer size is 4K, therefore the amount of memory is # 4K * metaServer.bufferPool.partitionBuffers. # All io buffers are allocated at startup. # If memory locking enabled io buffers are locked in memory at startup. # Default is 256K or 1GB on 64 bit system, and 32K or 128MB on 32 bit system. # metaServer.bufferPool.partitionBuffers = 262144 # ============================================================================== # The parameters below this line can be changed at runtime by editing the # configuration file and sending meta server process HUP signal. # Note that to restore parameter to default at run time the default value must # be explicitly specified in the configuration file. In other words commenting # out the parameter will not have any effect until restart. # WORM mode. # ***************************************************************************** # * This parameter is deprecated, and has no effect, please use qfsadmin or # * qfstoggleworm to set WORM mode. # * WORM mode is now stored in checkpoint and transaction logs. # ***************************************************************************** # Write once, read many mode. # In this mode only modification of files ".tmp" (without quotes) suffix is # allowed. # Typically the application would create and write the file with ".tmp" suffix, # and then rename it so the destination file name will not have ".tmp " suffix. # To delete a file without ".tmp" suffix the mode can be temporary turned off # by the administrator. "qfstoggleworm" utility, or temporary configuration # modification can be used to do that. # Default is 0. # metaServer.wormMode = 0 # Mininum number of connected / functional chunk servers before the file system # can be used. # Default is 1. # metaServer.minChunkservers = 1 # Wait 30 sec for chunk servers to connect back after restarting, before file # system considered fully functional. metaServer.recoveryInterval = 30 # Ignore master/slave chunk server assignment for write append. # Master/slave assignment can help with append replication 2, to avoid # theoretically possible IO buffers resource deadlock when chunk server A is # "slave" in one "AB" synchronous append replication chain, and chunk server B # is "master" in another chunk "BA" synchronous replication. # In practice such deadlocks should be rare enough to matter, and, if occur, # are resolved by replication timeout mechanism. # The downside of using master/slave assignment is that presently it only works # with replication 2, and only half of the chunk server population will be # accepting client's append requests. # Default is "on" -- ignore. # metaServer.appendPlacementIgnoreMasterSlave = 1 # For write append use the low order bit of the IP address for the chunk servers # master/slave assignment. This scheme is works well if least significant bit of # ip address uniformly distributes masters and slaves withing the rack, # especially with "in rack" placement for append. # Default is 0. Assign master / slave to keep number of masters and slaves # equal. The obvious downside of this is that the assignment depends on the # chunk servers connection order. # metaServer.assignMasterByIp = 0 # Chunk server executables md5 sums white list. # The chunk server sends its executable md5sum when it connects to the meta # server. If the following space separated list is not empty and does not # contain the chunk server executable md5 sum then the chunk server is # instructed to exit or restart itself. # This might be useful for upgrade or versions control. # While the chunk server is connected to the meta server no md5 sum verification # performed. # Default is empty list. # metaServer.chunkServerMd5sums = # Unique file system id -- some name that uniquely identifies distributed file # system instance. # This is used to protect data loss / and or corruption in the case where chunk # server(s) connect to the "wrong" meta server. # The meta server will not accept connections from the chunk servers with a # different "cluster key". # Default is empty string. metaServer.clusterKey = my-fs-unique-identifier # Assign rack id by ip prefix -- ip address treated as strings. # Valid rack id range is from 0 to 65535. Entries with rack ids outside of this # range have no effect. # The prefix can be positioned with trailing ?? # For example: 10.6.34.2? # The rack id assigned on chunk server connect, and will not change until the # chunk server re-connect. Therefore the configuration file changes will not # have any effect until the chunk servers re-connect. # Default is empty -- use rack id assigned in the chunk server config. # metaServer.rackPrefixes = # Example: # 10.6.1.* -- rack 1, 10.6.2.* -- rack 2, 10.6.4.1? -- rack 4 etc. # metaServer.rackPrefixes = 10.6.1. 1 10.6.2. 2 10.6.4.1? 4 10.6.4.1 5 # "Static" placement weights of the racks. The more weight and more chunk # servers are in the rack the more likely the rack will be chosen for chunk # allocation. # Default is empty -- all weight are default to 1. # metaServer.rackWeights = # Example: Racks 1 and 2 have weight 1, rack 3 -- 0.9, rack 4 weight 1.2, # rack 5 weight 1.5. All other rack weights are 1. # metaServer.rackWeights = 1 1 2 1 3 0.9 4 1.2 5 1.5 # Various timeout settings. # Extend write lease expiration time by 30 sec. in the case of the write master # disconnect, to give it a chance to re-connect. # Default is 30 sec. Production value is 60 sec. # metaServer.leaseOwnerDownExpireDelay = 30 # Re-replication or recovery delay in seconds on chunk server down, to give # chunk server a chance to re-connect. # Default is 120 sec. # metaServer.serverDownReplicationDelay = 120 # Chunk server heartbeat interval. # Default is 30 sec. # metaServer.chunkServer.heartbeatInterval = 30 # Chunk server operations timeouts. # Heartbeat timeout results in declaring chunk server non operational, and # closing connection. # All other operations timeout are interpreted as the operation failure. # The values are in seconds. # The defaults: # metaServer.chunkServer.heartbeatTimeout = 60 # metaServer.chunkServer.chunkReallocTimeout = 75 # metaServer.chunkServer.chunkAllocTimeout = 40 # metaServer.chunkServer.chunkReallocTimeout = 75 # metaServer.chunkServer.makeStableTimeout = 330 # metaServer.chunkServer.replicationTimeout = 330 # The current production values. # metaServer.chunkServer.heartbeatInterval = 18 # metaServer.chunkServer.heartbeatTimeout = 30 # metaServer.chunkServer.chunkReallocTimeout = 18 # metaServer.chunkServer.chunkAllocTimeout = 18 # metaServer.chunkServer.makeStableTimeout = 60 # Other chunk server operations timeout. # metaServer.chunkServer.requestTimeout = 600 # Chunk server space utilization placement threshold. # Chunk servers with space utilization over this threshold are not considered # as candidates for the chunk placement. # Default is 0.95 or 95%. # metaServer.maxSpaceUtilizationThreshold = 0.95 # Unix style permissions # Space separated list of ip addresses of hosts where root user is allowed. # Empty list means that root user is allowed on any host. # Default is empty. # metaServer.rootHosts = # File modification time update resolution. Increasing the value will reduce # number of corresponding transaction log writes with large files. # Default is 1 sec. # metaServer.MTimeUpdateResolution = 1 # Files access time update resolution in seconds. Increasing the value will # reduce number of corresponding transaction log writes. # File access time update considered when chunk or object store block read # lease acquire or read lease renew completes successfully. # Setting to a negative value turns off access time updates. # Default is -1 -- no file access time update. # metaServer.ATimeUpdateResolution = -1 # Directories access time update resolution in seconds. Increasing the value # will reduce number of corresponding transaction log writes. # Directory access time update considered every time when directory "read" / # "listed", i.e. "read dir" or "read dir plus" RPC issued. # Setting to a negative value turns off access time updates. # Default is -1 -- n directories access time update. # metaServer.dirATimeUpdateResolution = -1 # --------------- File create limits. ------------------------------------------ # # Disallow specific file types. The list is space separate file type ids. # Default is empty list. All valid file types are allowed. # metaServer.createFileTypeExclude = # Limit number of data stripes for all file types.If create attempt exceeds # the limit the meta server returns "permission denied". # Default is the max supported by the compile time constants. # metaServer.maxDataStripeCount = 511 # Limit number of recovery stripes for all file types. If create attempt exceeds # the limit the meta server returns "permission denied". # Default is 32. # Max supported by the compile time constants in common/kfstypes.h is 127. # metaServer.maxRecoveryStripeCount = 32 # Limit number of data stripes for files with recovery. # Default is 64. # Max supported by the compile time constants in common/kfstypes.h is 511. # metaServer.maxRSDataStripeCount = 64 # Max number of replicas for "regular / replicated" file with no recovery. # If create, or change replication requests exceeds this limit then the meta # server replaces the value with the value specified. # metaServer.maxReplicasPerFile = 64 # Max number of replicas for RS (file with recovery). # If create, or change replication requests exceeds this limit then the meta # server replaces the value with the value specified. # metaServer.maxReplicasPerRSFile = 64 # Force effective user to root. This effectively turns off all permissions # control. # Default is off. # metaServer.forceEUserToRoot = 0 # Client backward compatibility. # Defaults are no user and no group -- no backward compatibility. # metaServer.defaultUser = 0xFFFFFFFF # metaServer.defaultGroup = 0xFFFFFFFF # metaServer.defaultFileMode = 0644 # metaServer.defaultDirMode = 0755 # The chunk server disconnects history size. Useful for monitoring. # Default is 4096 slots / disconnect events. # metaServer.maxDownServersHistorySize = 4096 # Space and placement re-balancing. # Space re-balancing is controlled by the next two parameters (thresholds) below. # Re-balancing constantly scans all chunks in the system and checks chunk # placement within the replication or RS groups, and moves chunks from chunk # servers that are above metaServer.maxRebalanceSpaceUtilThreshold to the chunk # servers that are below metaServer.minRebalanceSpaceUtilThreshold. # Default is 1 -- on. # metaServer.rebalancingEnabled = 1 # Space re-balancing thresholds. # Move chunk from the servers that exceed the # metaServer.maxRebalanceSpaceUtilThreshold # Default is 0.82 # metaServer.maxRebalanceSpaceUtilThreshold = 0.82 # Move chunks to server below metaServer.minRebalanceSpaceUtilThreshold. # Default is 0.72. # metaServer.minRebalanceSpaceUtilThreshold = 0.72 # Time interval in seconds between replication queues scans. # The more often the scan is scheduled the more cpu can potentially use. # Default is 5 sec. # metaServer.replicationCheckInterval = 5 # Re-balance scan depth. # Max number of chunks to scan in one partial scan. The more chunks are scanned # the more cpu re-balance will use, and the "faster" it will scan the chunks. # metaServer.maxRebalanceScan = 1024 # Single re-balance partial scan time limit. # Default is 0.03 sec. # metaServer.maxRebalanceRunTime = 0.03 # Minimum time between two consecutive re-balance partial scans. # Default is 0.512 sec. # metaServer.rebalanceRunInterval = 0.512 # Max. number of a single client connection requests in flight. # The higher value might reduce cpu and alleviate "head of the line blocking" # when single client connection shared between multiple concurrent file readers # and writers, potentially at the cost of reducing "fairness" between the client # connections. Increasing the value could also reduce number of context # switches, and os scheduling overhead with the "client" threads enabled. # Default is 16 if the "client" threads are enabled, and 1 otherwise. # metaServer.clientSM.maxPendingOps = 16 # ------------------ Chunk placement parameters -------------------------------- # The metaServer.sortCandidatesByLoadAvg and # metaServer.sortCandidatesBySpaceUtilization are mutially exclusive. # metaServer.sortCandidatesBySpaceUtilization takes precedence over # metaServer.sortCandidatesByLoadAvg if both set to 1 # When allocating (placing) a chunk prefer chunk servers with lower "load" # metric over the chunk servers with the higher "load" metric. # For the write intensive file systems turning this mode on is # recommended. # Default is 0. Do not take chunk server "load" metric into the account. # metaServer.sortCandidatesByLoadAvg = 0 # When allocating (placing) a chunk prefer chunk servers with lower disk space # utilization. # Default is 0. Do not take space utilization into the account. # metaServer.sortCandidatesBySpaceUtilization = 0 # When allocating (placing) a chunk do not consider chunk server with the "load" # exceeding average load multiplied by metaServer.maxGoodCandidateLoadRatio. # Default is 4. # metaServer.maxGoodCandidateLoadRatio = 4 # When allocating (placing) a chunk do not consider chunk server with the "load" # exceeding average "master" chunk server load multiplied by # metaServer.maxGoodMasterLoadRatio if the chunk server is used as master (head # or synchronous replication chain). # Default is 4. # metaServer.maxGoodMasterLoadRatio = 4 # When allocating (placing) a chunk do not consider chunk server with the "load" # exceeding average "slave" load multiplied by metaServer.maxGoodSlaveLoadRatio # if the chunk server is used as slave. # Default is 4. # metaServer.maxGoodSlaveLoadRatio = 4 # When allocating (placing) a chunk do not consider chunk server with the # average number of chunks opened for write per drive (disk) exceeding average # number of chunks opened for write across all disks / chunks servers multiplied # by metaServer.maxWritesPerDriveRatio. # Default is 1.5. # metaServer.maxWritesPerDriveRatio = 1.5 # When allocating (placing) a chunk do not consider chunk server running on the # same host as writer if the average number of chunks opened for write per drive # (disk) exceeding average number of chunks opened for write across all disks / # chunks servers multiplied by metaServer.maxLocalPlacementWeight. # Default is 1.0. # metaServer.maxLocalPlacementWeight = 1.0 # "In rack" placement for append and non append chunk allocations. # Place chunk replicas on the same rack to save cross rack bandwidth at the cost # of reduced reliability. Useful for temporary / scratch file systems. # Default is 0. # metaServer.inRackPlacementForAppend = 0 # "In rack" placement for non append files. # Default is 0 - place replicas and chunks from the same RS blocks on different # racks. # metaServer.inRackPlacement = 0 # Limit number of re-replications (this does not include RS chunk recovery), # that the given chunk server can be used as replication "source". # Default is 10. # metaServer.maxConcurrentReadReplicationsPerNode = 10 # Limit max concurrent chunk re-replications and RS recoveries per chunk server. # Default is 5. # metaServer.maxConcurrentWriteReplicationsPerNode = 5 #------------------------------------------------------------------------------- # Order chunk replicas locations by the chunk "load average" metric in "get # alloc" responses. The read client logic attempts to use replicas in this # order. # Default is 0. The replicas locations are shuffled randomly. # metaServer.getAllocOrderServersByLoad = 0 # Delay recovery for the chunks that are past the logical end of file in files # with Reed-Solomon redundant encoding. # The delay is required to avoid starting recovery while the file is being # written into, and the chunk sizes aren't known / final. The writer can stop # writing into a file, and the corresponding chunks write leases might timed # out, and will be automatically revoked. The existing writer logic sets logical # EOF when it closes the file, before that the logical file size remains 0 # during write. (Unless it is re-write which is currently for all practical # purposes not supported with RS files). The timeout below should be set to # at least the max. practical file "write" time. # Setting the timeout to a very large value will prevent processing the chunks # sitting in the replication delayed queue from the "abandoned" files, i.e. # files that the writer wrote something and then exited without closing the # file. # The parameter and the corresponding "delay" logic will likely be removed in # future releases, and replaced with the write lease renew logic. # Default is 6 hours or 21600 seconds. # metaServer.pastEofRecoveryDelay = 21600 # -------------------------- Periodic checkpointing ---------------------------- # If set to -1 checkpoint is disabled. In such case "logcompactor" can be used # periodically create new checkpoint from the transaction logs. # Default is 3600 sec. # metaServer.checkpoint.interval = 3600 # Checkpoint lock file name. Can be used to serialize checkpoint write and load # with external programs, for example logcompactor. # Default is empty -- no lock file used. # metaServer.checkpoint.lockFileName = # Max consecutive checkpoint write failures. # Meta server will exit if checkpoint write fails # metaServer.checkpoint.maxFailedCount times in the row for any reason (not # enough disk space for example). # Default is 2. # metaServer.checkpoint.maxFailedCount = 2 # Checkpoint write timeout. Max time the checkpoint write can take before # declaring write failure. # Default is 3600 sec. # metaServer.checkpoint.writeTimeoutSec = 3600 # Use synchronous mode to write checkpoint, i.e. tell host os to flush all data # to disk prior to write system call return. # The main purpose is to reduce the number of "dirty" / unwritten pages in the # host os vm subsystem / file system buffer cache, therefore reducing memory # contention and lowering the chances of paging out meta server and other # processes with no memory locking. # Default is on. # metaServer.checkpoint.writeSync = 1 # Checkpoint write buffer size. # The buffer size should be adequate with synchronous write mode enabled, # especially if journal and data of host's file system are on the same spinning # media device, in order to minimize the number of seeks. # Default is 16MB. # metaServer.checkpoint.writeBufferSize = 16777216 # --------------------------------- Audit log ---------------------------------- # All request headers and response status are logged. # The audit log records are null ('\0') separated. # The log could be useful for debugging and audit purposes. # The logging require some cpu, but the main resource consumption is disk io. # Default is off. # metaServer.clientSM.auditLogging = 0 # Colon (:) separated file name prefixes to store log segments. # Default is empty list. # metaServer.auditLogWriter.logFilePrefixes = # Maximum log segment size. # Default is -1 -- unlimited. # metaServer.auditLogWriter.maxLogFileSize = -1 # Maximum number of log segments. # Default is -1 -- unlimited. # metaServer.auditLogWriter.maxLogFiles = -1 # Max. time to wait for the log buffer to become available. # When wait is enabled the request processing thread will wait for the log # buffer disk io to complete. If the disk subsystem cannot keep up with the # logging it will slow down the meta server request processing. # Default is -1. Do not wait, drop log record instead. # metaServer.auditLogWriter.waitMicroSec = -1 #------------------------------------------------------------------------------- # ---------------------------------- Message log. ------------------------------ # Message log level FATAL, ALERT, CRIT, ERROR, WARN, NOTICE, INFO, DEBUG # Default is DEBUG, except for non debug builds with NDEBUG defined INFO is # default. metaServer.msgLogWriter.logLevel = INFO # Colon (:) separated file name prefixes to store log segments. # Default is empty list. The default is to use file name from the command line # or if none specified write into file descriptor 2 -- stderror. # metaServer.msgLogWriter.logFilePrefixes = # Maximum log segment size. # Default is -1 -- unlimited. # metaServer.msgLogWriter.maxLogFileSize = -1 # Maximum number of log segments. # Default is -1 -- unlimited. # metaServer.msgLogWriter.maxLogFiles = -1 # Max. time to wait for the log buffer to become available. # When wait is enabled the request processing thread will wait for the log # buffer disk io to complete. If the disk subsystem cannot keep up with the # logging it will slow down the meta server request processing. # Default is -1. Do not wait, drop log record instead. # metaServer.msgLogWriter.waitMicroSec = -1 #------------------------------------------------------------------------------- # -------------------- Chunk servers authentication. --------------------------- # # Authentication is off by default. Both X509 (ssl) and Kerberos authentication # methods can be enabled at the same time. Chunk server can negotiate # authentication method. If both Kerberos and X509 are configured on the chunk # server and meta server then Kerberos authentication is used. # Chunk and meta servers perform mutual authentication with authentication # enabled. # # Use of X509 authentication is recommended in order to avoid KDC dependency. # Chunk servers have to periodically request Kerberos tickets from KDC. The meta # server enforces Kerberos ticket expiration time, by asking chunk server to # re-authenticate when its ticket expires. Therefore KDC unavailability for any # reason, including network communication outage, might result in chunk servers # disconnects. Long enough KDC unavailability might result in unrecoverable data # loss, due to the file system unability to perform replication and recovery # in response to disk and node failures. # # Please see OpenSSL documentation for detailed description about X509 # authentication configuration. # src/test-scripts/qfsmkcerts.sh might be used as a simple example how to create # and use certificate authority, and X509 certificates. # Maximum authenticated session lifetime. This limits authenticated session time # for all authentication methods. In other words, the session [connection] must # be re-authenticated if the authentication token (Kerberos ticket, or x509 # certificate) "end time" is reached or authenticated session exists longer than # the value of this parameter. # Default is 24 hours. # metaServer.clientAuthentication.maxAuthenticationValidTimeSec = 86400 # Check chunk server authenticated name against the user and group database. # If enabled then the authenticated name must be present in the user database in # order for chunk server to be accepted. # Default is 0 (off), use only black and white lists, if configured, see below. # metaServer.CSAuthentication.useUserAndGroupDb = 0 # ================= X509 authentication ======================================== # Meta server's X509 certificate file in PEM format. # metaServer.CSAuthentication.X509.X509PemFile = # Password if X509 PEM file is encrypted. # metaServer.CSAuthentication.X509.X509Password = # Meta server's private key file. # metaServer.CSAuthentication.X509.PKeyPemFile = # Password if private key PEM file is encrypted. # metaServer.CSAuthentication.X509.PKeyPassword = # Certificate authorities file. Used for both chunk server certificate # validation and to create certificate chain with meta server's X509 # certificate. # metaServer.CSAuthentication.X509.CAFile = # Certificate authorities directory can be used in addition to CAFile. # For more detailed information please see SSL_CTX_load_verify_locations manual # page. CAFile/CADir corresponds to CAfile/CApath in the man page. # metaServer.CSAuthentication.X509.CADir = # If set (the default) verify peer certificate, and declare error if peer, i.e. # chunk server, does not preset "trusted" valid X509 certificate. # Default is on. # metaServer.CSAuthentication.X509.verifyPeer = 1 # OpenSSL cipher configuration. # metaServer.CSAuthentication.X509.cipher = !ADH:!AECDH:!MD5:HIGH:@STRENGTH # SSL/TLS session cache timeout. Session cache is only used with X509 # authentication method, with non default client or server side openssl options # that turns off use of tls session tickets. # Default is 4 hours. # metaServer.CSAuthentication.X509.session.timeout = 14400 # The long integer value passed to SSL_CTX_set_options() call. # See open ssl documentation for details. # Default is the integer value that corresponds to SSL_OP_NO_COMPRESSION # metaServer.clientAuthentication.X509.options = # ================= Kerberos authentication ===================================== # Kerberos principal: service/host@realm # Meta server's Kerberos principal [service/host@realm] service name part. # metaServer.CSAuthentication.krb5.service = # Meta server's Kerberos principal [service/host@realm] host name part. # metaServer.CSAuthentication.krb5.host = # Kerberos keytab file with the key(s) that corresponds to the meta server's # principal. # metaServer.CSAuthentication.krb5.keytab = # Copy keytab into memory keytab, if supported by the kerberos versions, to # improve performance, and avoid disk access. # Default is on. # metaServer.CSAuthentication.krb5.copyToMemKeytab = 1 # Client's (chunk server) principal "unparse" mode. # Can be set to space separated combination of the following modes: # short noRealm display # The result of the principal conversion to string is used as client's # (chunk server's) "authenticated name". # The default is fully qualified principal name. For chunk servers it # would typically be in the form of service/host@realm. # The "unparsed" chunk server name is checked against "black" and "white" chunk # server list names as described below. # metaServer.CSAuthentication.krb5.princUnparseMode = # OpenSSL cipher configuration for TLS-PSK authentication method. This method # is used with TLS-PSK and with Kerberos authentication. # metaServer.CSAuthentication.psk.cipherpsk = !ADH:!AECDH:!MD5:!3DES:PSK:@STRENGTH # The long integer value passed to SSL_CTX_set_options() call. # See open ssl documentation for details. # Default is the integer value that corresponds to the logical OR of # SSL_OP_NO_COMPRESSION and SSL_OP_NO_TICKET # metaServer.CSAuthentication.psk.options = # ================= PSK authentication ========================================= # PSK chunk server authentication is intended only for testing and possibly for # small [test] clusters with very few chunk servers, where the same # authentication credentials [PSK "key"] are used for for all chunk servers. # Chunk server PSK key id. This string sent to the chunk as TLS PSK "hint", and # also used as chunk server "authenticated name". # This effectively overrides chunk server key id. # If chunk server key id set to non empty string, then it can be left empty. # In such case chunk server key id is used as authenticated name. The chunk # server key id sent as "clear text" as part of ssl handshake, and is not # "tied" in any way known to the meta server logic to the key id, therefore any # "name" can be used. In other words the key is the only real security # "credential" with this authentication scheme. # The resulting chunk server name must not be empty, and pass "black" and # "white" list check, see below. # metaServer.CSAuthentication.psk.keyId = # Chunk server PSK key (the "pre-shared-key"). The same key must be used on the # chunk server side in order for psk authentication to work. # The default is empty key -- PSK authentication is not enabled. # The key must be base 64 encoded, i.e. it must be valid base 64 sequence. # metaServer.CSAuthentication.psk.key = # ================= Chunk servers's "black" and "white" lists ================== # Chunk server's X509 common names and/or kerberos names, "black" ("revocation") # list. If chunk server's authenticated name matches one of the name in this # list the authentication will fail. The names in the list are must be # separated by spaces. Names with white space symbols are not supported. # metaServer.CSAuthentication.blackList = # Chunk server's X509 common names and/or kerberos names, "white list". Unless # the list is empty the chunk server's authenticated name must match one of the # names in the list. # metaServer.CSAuthentication.whiteList = #------------------------------------------------------------------------------- # -------------------- User / "client" authentication. ------------------------- # Client X509 and kerberos authentication parameters only differ from chunk # server's authentication parameters by metaServer.clientAuthentication prefix. # The defaults are identical to chunk server authentication. # Maximum authenticated session lifetime. This limits authenticated session time # for all authentication methods. In other words, the session [connection] must # be re-authenticated if the authentication token (delegation token, Kerberos # ticket, or x509 certificate) "end time" is reached or authenticated session # exists longer than the value of this parameter. # Default is 24 hours. # metaServer.clientAuthentication.maxAuthenticationValidTimeSec = 86400 # ================= X509 authentication ======================================== # Meta server's X509 certificate file in PEM format. # metaServer.clientAuthentication.X509.X509PemFile = # Password if X509 PEM file is encrypted. # metaServer.clientAuthentication.X509.X509Password = # Meta server's private key file. # metaServer.clientAuthentication.X509.PKeyPemFile = # Password if private key PEM file is encrypted. # metaServer.clientAuthentication.X509.PKeyPassword = # Certificate authorities file. Used for both chunk server certificate # validation and to create certificate chain with meta server's X509 # certificate. # metaServer.clientAuthentication.X509.CAFile = # Certificate authorities directory can be used in addition to CAFile. # For more detailed information please see SSL_CTX_load_verify_locations manual # page. CAFile/CADir corresponds to CAfile/CApath in the manual page. # metaServer.clientAuthentication.X509.CADir = # If set (the default) verify peer certificate, and declare error if peer, i.e. # QFS client, does not preset certificate. # Default is on. # metaServer.clientAuthentication.X509.verifyPeer = 1 # OpenSSL cipher configuration for X509 authentication method. # metaServer.clientAuthentication.X509.cipher = !ADH:!AECDH:!MD5:HIGH:@STRENGTH # SSL/TLS session cache timeout. Session cache is only used with X509 # authentication method, with non default client or server side openssl options # that turns off use of tls session tickets. # Default is 4 hours. # metaServer.clientAuthentication.X509.session.timeout = 14400 # The long integer value passed to SSL_CTX_set_options() call. # See open ssl documentation for details. # Default is the integer value that corresponds to SSL_OP_NO_COMPRESSION # metaServer.clientAuthentication.X509.options = # ================= Kerberos authentication ===================================== # Kerberos principal: service/host@realm # Meta server's Kerberos principal [service/host@realm] service name part. # metaServer.clientAuthentication.krb5.service = # Meta server's Kerberos principal [service/host@realm] host name part. # metaServer.clientAuthentication.krb5.host = # Kerberos keytab file with the key(s) that corresponds to the meta server's # principal. # metaServer.clientAuthentication.krb5.keytab = # Copy keytab into memory keytab, if supported by the kerberos versions, to # improve performance, and avoid disk access. # Default is on. # metaServer.clientAuthentication.krb5.copyToMemKeytab = 1 # Client's principal "unparse" mode. # Can be set to space separated combination of the following modes: # short noRealm display # The result of the principal conversion to string is used as client's # (client's) "authenticated name". # The default is fully qualified principal name. For users this typically would # it would be in the form user@realm # The resulting authentication name should match password database, the meta # server host uses. The recommended value is "short', discard realm if it # matches the default the kerberos configuration's default realm. # metaServer.clientAuthentication.krb5.princUnparseMode = # OpenSSL cipher configuration for TLS-PSK authentication method. This method # is used with delegation and with Kerberos authentication. # metaServer.clientAuthentication.psk.cipherpsk = !ADH:!AECDH:!MD5:!3DES:PSK:@STRENGTH # The long integer value passed to SSL_CTX_set_options() call. # See open ssl documentation for details. # Default is the integer value that corresponds to the logical OR of # SSL_OP_NO_COMPRESSION and SSL_OP_NO_TICKET # metaServer.clientAuthentication.psk.options = # The following two parameters and respective defaults are intended to allow # non authenticated access for the meta server web UI from the local host. # Space separated list of host ips that RPC listed in the next parameter are # permitted with no authentication. # Default is 127.0.0.1 # metaServer.clientAuthentication.noAuthOpsHostIps = 127.0.0.1 # Space separated list of RPC names that allowed with no authentication, if the # client's host ip obtained with getpeername() call matches one of the ips in # the preceding list. # Default is RPCs used by the meta server web UI. # metaServer.clientAuthentication.noAuthOps = PING GET_CHUNK_SERVERS_COUNTERS GET_CHUNK_SERVER_DIRS_COUNTERS GET_REQUEST_COUNTERS DISCONNECT # ================= Client's "black" and "white" lists ========================= # Client's (user) X509 common names and/or kerberos names, "black" # ("revocation") list. If client's authenticated name matches one of the name in # this list the authentication will fail. The names in the list are must be # separated by spaces. Names with white space symbols are not supported. # metaServer.clientAuthentication.blackList = # Client's X509 common names and/or kerberos names, "white list". Unless # the list is empty the client's authenticated name must match one of the # names in the list. # metaServer.clientAuthentication.whiteList = # ================== Delegation ================================================ # # Delegation token expiration time limit. # Default is 24 hours. # metaServer.clientAuthentication.maxDelegationValidForTimeSec = 86400 # Do not limit delegation token end time to the meta server session credentials # (Kerberos ticket or X509 certificate) end time. # Default is 0. # metaServer.clientAuthentication.delegationIgnoreCredEndTime = 0 ================================================================================ # Allow to use "clear text" communication mode by performing SSL/TLS shutdown # immediately after successful authentication completion. If enabled, the QFS # client's corresponding setting defines the communication mode between the # client and the write master. The "clear text" communication mode between chunk # servers (synchronous replication, re-replication, and chunk recovery) will be # used if this parameter set to "on". # Using this mode might make sense in order reduce chunk server CPU utilization # and/or possibly increase IO throughput, in the cases where chunk server # communication channel is considered to have adequate security for the purpose # at hands. # Default is "off" / "no" # metaServer.clientCSAllowClearText = 0 # Chunk server access token maximum lifetime. # Chunk server access token time defines chunk access time limit. # Chunk access tokens have 10 min time limit -- twice chunk lease time. The # chunk server access token effectively defines maximum client and chunk server # to chunk server connections lifetimes. The client and chunk servers attempt # to obtain and use a new chunk server access token before the current token # expires, and re-open connection with the newly obtained token. # Default is 2 hours. # metaServer.CSAccessValidForTimeSec = 7200 # The meta server limits the write lease end time to the max of the current time # plus the value of the following parameter, and the authentication end time. # The parameter is intended primarily for testing, to avoid spurious write # retries with authentication maximum life time set to very small value -- 5sec. # (The short authentication lifetime is used in order to test the # re-authentication logic.) # metaServer.minWriteLeaseTimeSec = 600 #------------------------------------------------------------------------------- # -------------------- User and group configuration. --------------------------- # User and group database parameters. # The meta server host's user and group configuration used for QFS file system # when QFS authentication is enabled. The user and group database is used to map # "authenticated names" obtained with Kerberos and X509 authentication methods # to user and group ids, and to establish group membership. Authenticated names # that have no corresponding user id, or user id that have no corresponding # "user name" are considered invalid, and as the result the authentication # fails. # User and group id with value 4294967295 have special treatment. Access always # denied for users with such id. # Root user entry with name "root" and id 0 added if not present in the the user # database, unless explicitly excluded with metaServer.userAndGroup.excludeUser # parameter. # With authentication enabled QFS client library does not use host's local user # and group database, the meta server's host database is effectively used by all # QFS clients. # Minimal user id to include in user name to id mapping. # Default is 0. # metaServer.userAndGroup.minUserId = 0 # Maximum user id to include in user name to id mapping. # Default is 4294967295. # metaServer.userAndGroup.maxUserId = 4294967295 # Minimal group id to include in group name to group id mapping. # Default is 0. # metaServer.userAndGroup.minGroupId = 0 # Maximum group id to include in group name to group id mapping. # Default is 4294967295. # metaServer.userAndGroup.maxGroupId = 4294967295 # Omit entries with user names if it has one of the specified prefixes. # metaServer.userAndGroup.omitUserPrefix = # Omit entries with group names if it has one of the specified prefixes. # Default is empty list. # metaServer.userAndGroup.omitGroupPrefix = # Update / re-read user and group to id mappings with every N seconds. # By default periodic updates are effectively disabled. The parameter reload # with HUP signal can be used to trigger user and group information update. # Default is 315360000. # metaServer.userAndGroup.updatePeriodSec = 315360000 # Disable user and group initial loading and/or reloading. # Default is enabled. # metaServer.userAndGroup.disable = 0 # Space separated list of the user names to exclude when loading or updating # user database. # Default is empty list. # metaServer.userAndGroup.excludeUser = # Space separated list of the group names to exclude when loading or updating # group database. # Default is empty list. # metaServer.userAndGroup.excludeGroup = # Space separated list of the group names, where members of these groups # have effective user id 0 -- root. # Default is empty list. # metaServer.userAndGroup.rootGroups = # Space separated list of the user names, where such users have effective user # id 0 -- root. # User with name root and id 0 always added, even if it isn't present or # excluded from the user database. # Default is empty list. # metaServer.userAndGroup.rootUsers = # Space separated list of the user names. Specified users are allowed to # perform meta server administrative requests: fsck, chunk server retire, # toggle worm, recompute directory sizes, dump to chunk to servers map, # dump replication candidates, check chunk leases, list open files. # Default is root user. # metaServer.userAndGroup.metaServerAdminUsers = root # Space separated list of group names. Members of these groups are allowed to # perform meta server administration described in the previous parameter's # section. # Default is empty list. # metaServer.userAndGroup.metaServerAdminGroups = # Space separated list of the user names. Specified users are allowed to # perform meta server status inquiry requests: ping, up servers, meta stats, get # chunk servers counters, get chunk directory counters, get meta server request # counters. # Default is root user. # metaServer.userAndGroup.metaServerStatsUsers = root # Space separated list of group names. Members of these groups are allowed to # perform meta server status requests described in the previous parameter's # section. # Default is empty list. # metaServer.userAndGroup.metaServerStatsGroups = # Space separated list of the user names. Specified users are allowed to # renew and cancel delegation tokens that belong to other users. No delegation # can be used, to perform delegation renew or cancel, i.e. Kerberos or X509 # authentication methods must be used. # Default is empty list. # metaServer.userAndGroup.delegationRenewAndCancelUsers = # Space separated list of group names. Members of these groups are allowed to # renew and cancel delegation tokens that belong to other users, as described # in the previous parameter's section. # metaServer.userAndGroup.delegationRenewAndCancelGruops = # -------------------- Meta server cryptographic keys. ------------------------- # Key lifetime. The value defines maximum time before the delegation tokens # issued by the meta server expire, and have to be renewed. # Default is 4 hours. # metaServer.cryptoKeys.keyValidTimeSec = 14400 # Keys change period. # Key lifetime minus key change period is the minim time before delegation token # must be renewed. # metaServer.cryptoKeys.keyChangePeriodSec = 7200 # Meta server crypto keys file name. # This parameter is *deprecated*, the keys are now store in checkpoint and # transaction log. In non replicated meta server configuration the file will be # read and the keys will be loaded if no keys exist in the checkpoint and # transaction log. # File name to save meta server keys. # Specify file name to save keys, in order to ensure that delegation tokens are # persistent across meta server restarts. # Default is none, no keys are not persistent across meta server restarts. # metaServer.cryptoKeys.keysFileName = # ------------------- Meta server authentication override ---------------------- # # Setting the following 3 parameters to the values specified below, will # effectively disable client authentication. The chunk server authentication can # still be enabled. # Like the other parameters, removing or commenting out these parameters will not # turn back QFS client authentication on, until chunk and meta servers restart. # To turn back the QFS client authentication the parameters should be explicitly # set back to the original / default values. # # Overriding the default behavior might be useful for initial authentication # setup and/or debugging. Only two sets of values, the default and the set of # inverted values work, any other combinations will not though could be useful # for testing, i.e. 1 0 0 0 and 0 1 1 1 # # Default is 1 if QFS client authentication *not* configured, 0 otherwise. # metaServer.clientAuthentication.authNone = 1 # # Default is 0 if QFS client authentication *not* configured, 1 otherwise. # metaServer.clientCSAuthRequired = 0 # # Default is 0 if chunk and meta server server authentication is *not* # configured, 1 otherwise. # chunkServer.client.auth.enabled = 0 # # Default is 0 if chunk and meta server server authentication is *not* # configured, 1 otherwise. # chunkServer.remoteSync.auth.enabled = 0 # -------------------- File system ID ------------------------------------------ # # File system id is 64 bit file system identifier generated by the meta server # at the time the file system is created. The file system id is used to protect # against accidental use of chunk files that belong to a different file system. # For example file system id should prevent use of stale chunk inventory that # belongs to "old"/different file system, in the case where a new file system # was created, and the same "cluster key" is used. # Require file system id in the chunk server hello. # Default is 0, to maintain backward compatibility with file systems crated with # no file system id. # metaServer.fileSystemIdRequired = 0 # The following parameter might be used for temporary file systems, that might # be intentionally re-created from scratch on meta server restart, or in the # case of loss of transaction log and/or checkpoint. Use with extra caution. # Default is 0, Do not delete chunks on file system id mismatch. Do not use # chunk directories if chunk directory file system does not match. # metaServer.deleteChunkOnFsIdMismatch = 0 #------------------------------------------------------------------------------- # ===================== Chunk servers configuration parameters. ================ # Configuration parameters in the meta server configuration file take precedence # over the chunk server configuration files. # ---------------------------------- Message log. ------------------------------ # Chunk server log level. chunkServer.msgLogWriter.logLevel = NOTICE # Colon (:) separated file name prefixes to store log segments. # Default is empty list. The default is to use file name from the command line # or if none specified write into file descriptor 2 -- stderror. # chunkServer.msgLogWriter.logFilePrefixes = # Maximum log segment size. # Default is -1 -- unlimited. # chunkServer.msgLogWriter.maxLogFileSize = -1 # Maximum number of log segments. # Default is -1 -- unlimited. # chunkServer.msgLogWriter.maxLogFiles = -1 # Max. time to wait for the log buffer to become available. # When wait is enabled the request processing thread will wait for the log # buffer disk io to complete. If the disk subsystem cannot keep up with the # logging it will slow down the request processing. # For chunk servers keeping the default is strongly recommended to minimize # dependency on the host's disk subsystem reliability and performance. # Default is -1. Do not wait, drop log record instead. # chunkServer.msgLogWriter.waitMicroSec = -1 #------------------------------------------------------------------------------- # Disk io request timeout. # Default is 270 sec. Production value is 40 sec. # chunkServer.diskIo.maxIoTimeSec = 270 # Synchronous replication timeouts. # Record append synchronous replication timeout. # Default is 180 sec. Production value is 20 sec. # chunkServer.recAppender.replicationTimeoutSec = 180 # Write replication timeout. # Default is 300 sec. Production value is 20 sec. # chunkServer.remoteSync.responseTimeoutSec = 300 # Controls buffered io -- use os file system cache, instead of direct io on the # os / file systems that support direct io (most file systems on linux). # Default is off. # It is conceivable that enabling buffered io might help with short reads for # the "broadcast" / "web server" type of loads. For the "typical" large io (1MB) # requests sequential type loads enabling caching will likely lower cluster # performance due to higher system (os) cpu overhead, and memory contention. # Default is off. # chunkServer.bufferedIo = 0 # If sparse files, and in particular chunks aren't used (sequential write only # for example) the following parameter can be set to 0. # Default is 1 -- enabled. # chunkServer.allowSparseChunks = 1 # The minimal amount of space in bytes that must be available in order for the # chunk directory to be used for chunk placement (considered as "writable"). # Default is chunk size -- 64MB plus chunk header size 16KB. # chunkServer.minFsAvailableSpace = 67125248 # The minimal amount of space that must be available in order for the chunk # directory to be used for chunk placement (considered as "writable"), expressed # as part total host file system space. # Default is 0.05 or 5%, or in other words stop using chunk directory when the # host file system where the chunk directory resides reaches 95% space # utilization. # chunkServer.maxSpaceUtilizationThreshold = 0.05 # The "weight" of pending disk io in chunk placement. # If set to 0 or less the pending io (number of io bytes in the disk queue) has # no no effect on the placement (choosing chunk directory where to create chunk). # If weight set to greater than 0, then the average pending io per chunk # directory (host file system / disk) is calculated, as # (total_pending_read_bytes * total_pending_read_weight + # total_pending_write_bytes * total_pending_write_weight) / chunk_directory_count # Chunk directories with pending_read + pending_write that exceed the value the # above are taken out of the consideration for placement. # Default is 0. Typical production value is 1.3 # chunkServer.chunkPlacementPendingReadWeight = 0 # chunkServer.chunkPlacementPendingWriteWeight = 0 # Averaging interval for calculating the average time the incoming "client's" # requests spend in io buffer wait queue. The "average wait time" value used by # the meta server for chunk placement. The average exponentially decays (IIR # filter). # Default is 20 sec. Typical production value is 8. # chunkServer.bufferManager.waitingAvgInterval = 20 # "Not available" directories rescan interval in seconds. Default is 180 sec. # (see comment in chunk server configuration file). # chunkServer.dirRecheckInterval = 60 # The following parameter has effect only if client threads enabled, i.e. if # chunkServer.clientThreadCount parameter set to a value greater than 0 in the # chunk server configuration. # If the value is less than chunkServer.clientThreadCount, then # threads in range # [chunkServer.client.firstClientThreadIndex, chunkServer.clientThreadCount) # will be used to service all requests, except RS chunk recovery, otherwise the # "main" thread will be used. # chunkServer.client.firstClientThreadIndex = 0 # The following parameter has effect only if client threads enabled, i.e. if # chunkServer.clientThreadCount parameter set to a value greater than 0 in the # chunk server configuration. # Limit number of client threads used for RS recovery. Each thread uses # single dedicated connection to the meta server to perform RS recovery. # If set to 0 or less, then the "main" thread is used to perform RS recovery. # The client threads in the range # [0, min(chunkServer.rsReader.maxRecoveryThreads, chunkServer.clientThreadCount) # are used to preform RS recovery. # Default is 5. The same value as # metaServer.maxConcurrentWriteReplicationsPerNode default. # chunkServer.rsReader.maxRecoveryThreads = 5 # Assign chunk directories to storage tiers by specifying directory prefixes and # tier. For example assign all chunk directories that start with /mnt/flash to # tier 14, and /mnt/ram to tier 13, and all others to 15. # chunkServer.storageTierPrefixes = /mnt/flash 14 /mnt/ram 13 # Default is assign all chunk directories to tier 15. # chunkServer.storageTierPrefixes = # Use host os DNS resolver if set to 1. # If set to 0 then load /etc/hosts and /etc/resolv.conf and use QFS built-in non # blocking DNS resolver. # chunkServer.useOsResolver = 0 # DNS resolver maximum cache size. # DNS cache is off if set to 0 or less. # Default is 8192 entries. # chunkServer.resolverMaxCacheSize = 8192 # DNS resolver cache expiration time in seconds. # If set to 0 or less the DNS cache is off. # Default is -1. # chunkServer.resolverCacheExpiration = -1 # ---------------- Chunk server watchdog. -------------------------------------- # Watchdog thread polls chunk server threads and aborts chunk server process, # when configured to do so, in the case if one or more threads appear not to be # making progress due to likely chunk server and / or OS malfunction. # Poll interval. # Default is 16 seconds when chunkServer.watchdog.maxTimeoutCount is equal or # greater than 0, and 1.15 otherwise. # Minimum interval is # max(1.25, 4.0 / max(1, chunkServer.watchdog.maxTimeoutCount)) seconds # when chunkServer.watchdog.maxTimeoutCount equal or greater than 0, and 1.05 # seconds otherwise. # chunkServer.watchdog.pollIntervalSec = 1.15 # Abort process if no progress detected after the specified consecutive poll # intervals. # Default is -1, do not abort the process. # chunkServer.watchdog.maxTimeoutCount = -1 # ==================== AWS S3 object store ===================================== # # Global toggle to enable object store. # Default is 0, Must be set to 1 to enable object store. # metaServer.objectStoreEnabled = 0 # By default "object store access proxy" (AP) (chunk server functions as access # proxy) is required to be on the same host as QFS client. # Changing the following two parameters allows to use non host local AP. # # If use of non local AP enabled, then the meta server first attempts to find # AP running on the host QFS client runs, then if no such AP found, attempts # to find AP on the same rack, and finally chooses of the chunk servers. # The meta server attempts to distribute "load" evenly between all APs when # non host local AP has to be used. # # # Allow to use AP running on different than client host to serve read requests. # Enabling this option might result in additional network traffic. # Default is 0. # metaServer.objectStoreReadCanUseProxyOnDifferentHost = 0 # The following has effect only when # metaServer.objectStoreReadCanUseProxyOnDifferentHost is not 0 # Possible values: # 0 -- if rack set in client request, then consider AP with matching rack first, # and if no suitable AP found, or rack is not set in the client request, then # consider all available APs. # 1 -- only consider APs with the rack that matches QFS client, if no AP with # matching rack found or if no rack set in the client request, then declare # failure. # 2 -- if rack set in client request, then only consider AP with matching rack, # otherwise if no rack set in client request then consider all available APs. # Default is 0. # metaServer.readUseProxyOnDifferentHostMode = 0 # Allow to use AP running on different than client host to serve write requests. # Enabling this option might result in additional network traffic, and reduced # write reliability in the cases where AP and client run on different hosts, as # object write protocol provides no redundancy. Therefore permanently loosing # communication between client and AP, or AP host going down would result in # unrecoverable write failure. # Default is 0. # metaServer.objectStoreWriteCanUseProxyOnDifferentHost = 0 # Please see the description for metaServer.readUseProxyOnDifferentHostMode the # above. # Default is 0. # metaServer.writeUseProxyOnDifferentHostMode = 0 # If more than one object store directory specified, the following parameter # must be set to assign each object store directory to a single tier. # Note that changing tier assignment for object store directories that already # exist and not empty is not supported, as the object store block names include # storage tier id. The syntax is the same as for chunkServer.storageTierPrefixes # parameter. # For example assign s3://aws14. to tier 14, and the other directory, if any, # to tier 15 # chunkServer.objecStorageTierPrefixes = s3://aws14. 14 # Default is to assign object store directory, if any, to tier 15. # chunkServer.objecStorageTierPrefixes = # Object "directory" parameters. # For description please see # chunkServer.objectDir parameter description in the annotated chunk server # configuration file. # # S3 bucket name used to store object blocks. # chunkServer.diskQueue.bucketName = # S3 access key ID # chunkServer.diskQueue.accessKeyId = # S3 secret access key. # chunkServer.diskQueue.secretAccessKey = # S3 region name. # If not specified, then AWS authorization version 2 is used, otherwise # version 4 is used. # Note S3 allows server side encryption only with version 4 # Default empty. # chunkServer.diskQueue.region = # S3 use server side encryption. If set to non 0 enables aws:kms type encryption. # If enabled, then AWS region must be specified. # Presently only aws:kms supported. # Default is 0. # chunkServer.diskQueue.useServerSideEncryption = 0 # S3 object storage class. # If non empty, then x-amz-storage-class header with the specified value # added to all put requests for this bucket. # Valid values: STANDARD | STANDARD_IA | REDUCED_REDUNDANCY # Default is empty, no x-amz-storage-class header sent. # chunkServer.diskQueue.storageClass = # The following parameter controls querying S3 for upload IDs prior to deleting # object store block. By default object store is queried for upload IDs that # match the object key being deleted, and uploads are deleted. This is need to # cleanup any stale multi part uploads. # Setting this parameter to 1 turns off upload IDs querying. Doing so might # eliminate query cost, in the case if "external" to QFS procedure is used to # clean stale multi part uploads. # For example: periodically (say once every 24 hours) perform bulk uploads IDs # query, and delete uploads with keys that correspond i-node numbers that were # created earlier than the maximum time the file write can take (say 24 hours). # A new file file can be created and its i-nodes can be used to determine i-node # numbers that were created after this file was created, as i-node numbers are # monotonically increasing integer. # chunkServer.diskQueue.deleteNoUploadList = 0 # If no parameters with the following prefix exits: # chunkServer.diskQueue..ssl. # set, then http protocol instead of https used. # S3 server's x509 certificate verification. # Default is 1. # chunkServer.diskQueue.ssl.verifyPeer = 1 # Certificate authorities bundle used for S3 server verification. # Bundle must be in format that openssl supports. # Such bundle might be already installed on the system or obtained from: # https://raw.githubusercontent.com/bagder/ca-bundle/master/ca-bundle.crt # chunkServer.diskQueue.ssl.CAFile = # Certificate authorities directory can be used in addition to CAFile. # For more detailed information please see SSL_CTX_load_verify_locations manual # page. CAFile/CADir corresponds to CAfile/CApath in the man page. # chunkServer.diskQueue.CADir = # OpenSSL cipher configuration. # Default is !ADH:!AECDH:!MD5:HIGH:@STRENGTH # chunkServer.diskQueue.ssl.cipher = !ADH:!AECDH:!MD5:HIGH:@STRENGTH # Use host os DNS resolver if set to 1. # If set to 0 then load /etc/hosts and /etc/resolv.conf and use QFS built-in non # blocking DNS resolver. # chunkServer.diskQueue.useOsResolver = 0 # DNS resolver maximum cache size. # DNS cache is off if set to 0 or less. # Default is 8192 entries. # chunkServer.diskQueue.resolverMaxCacheSize = 8192 # DNS resolver cache expiration time in seconds. # If set to 0 or less the DNS cache is off. # Default is 1 second. # chunkServer.diskQueue.resolverCacheExpiration = 1 # ================= Transaction log writer. ==================================== # Start new transaction log segment every 600 (default) seconds. # Default is 600 sec.. # metaServer.log.rotateIntervalSec = 600 # Log segment size limit. # Default 8MB. # metaServer.log.logFileMaxSize = 8388608 # Issue fsync() after every log block write. # Default is off, to minimize log / RPC latency. # metaServer.log.sync = 0 # ================= Meta data (checkpoint and transaction log) store. ========== # Number of past checkpoints, and the corresponding transaction log segments to # keep. # Meta server deletes prior checkpoints and log segments, every time when new # checkpoint successfully.created. # The default is 16. # metaServer.dataStore.maxCheckpointsToKeepCount = 16 # Do not cleanup, older checkpoint and log files, unless these have not been # accessed in the last 60 (default) seconds. # metaServer.dataStore.maxInactiveTime = 60 # Data store disk IO thread count -- number of threads to fetch data from disk # when serving other nodes meta data read requests. # Default is 1. # metaServer.dataStore.threadCount = 1 # Read RPC request size limit. # metaServer.dataStore.maxReadSize = 2097152 # ======================= FSCK ================================================= # Maximum number of files to include in FSCK report. # Default is 128K # metaServer.maxFsckFiles = 131072 # Maximum FSCK run time. # FSCK stopped if / when run time exceeds the maximum and partial report # produces.. # Default is 1140 seconds (19 minutes) # metaServer.maxFsckTime = 1140 # Abandoned file timeout. # Files with 0 logical size and 1 or more blocks are reported by FSCK if / when # modification time plus timeout is less than the FSCK start time. # Default is 1000 seconds. # metaServer.fsckAbandonedFileTimeout = 1000 # ==================== Meta Server Viewstamped Replication (VR) ================ # Configuring replicated meta server group / cluster consists of the following # steps. # # 0. Decide on the number of meta server nodes N. The minimum N is 3. The maximum # number of tolerated nodes failures is N - (N/2 + 1). QFS clients and chunk # servers automatically re-connect to newly elected primary node in the case when # prior primary node becomes unavailable due to node or network connectivity # failure. # # 1. Assign node IDs. The node ID must be non negative 63 bit integer. Initial # set of nodes must have node with ID 0. The node with lowest ID is # elected as a primary, the remaining active nodes are assigned backup # status. Node's "primary order" (32 bit signed integer) can be set to change # primary election, the node with the smallest primary order becomes primary. # Node's primary order takes precedence over node ID, node ID breaks tie in case # when primary orders are equal. In initial configuration all nodes primary # order must be set to 0. Node primary order is VR configuration parameter, it # can be changed with qfsadmin vr_reconfiguration command. # ****************************************************************************** # * Node IDs must be unique, and should never be re-used. Non unique IDs withing # * the same meta server group / file system can result in loss of file system. # ****************************************************************************** # # 2. Configure meta data sync, and log receiver listeners, please see the # following section for parameters description below. # # 3. Copy or create new file system checkpoint and transaction log segments # on/to the node with ID 0, and ensure that other nodes have empty checkpoint # and transaction log directories. Specify file system ID in the meta server # configuration file -- see metaServer.metaDataSync.fileSystemId parameter # below. File system ID can be obtained from the beginning of the checkpoint # file: the first number on the line with the "filesysteminfo/fsid/" prefix. # # 4. Create DNS record with the list of meta server nodes IP addresses, or add # meta server nodes IP addresses to the client configuration file by using # client.metaServerNodes parameter. # Chunk servers, similarly to QFS client, need to be configured with the meta # server DNS name, or list of meta server nodes network locations. Please see # chunkServer.meta.nodes parameter description in chunk server configuration # file. # # 5. Start meta server on all nodes. The nodes with non 0 IDs should fetch # checkpoint and transaction log from node with ID 0. # # 6. Use qfsadmin vr_reconfiguration command to configure replication. VR # configuration stored in the checkpoint and transaction log, and replicated # onto all meta server nodes. # The first step is to add all nodes with their respective transaction log # listeners network addresses [locations], the second is to activate nodes. Meta # server should respond to vr_reconfiguration without arguments with the # command description. QFS admin configuration file has the same format and # parameters as QFS client configuration file. If nodes has more than one # network links / IP addresses, multiple redundant network connections can # be configured to increase connectivity reliability by specifying list of log # listeners' network locations (IP address and port). # # Please note that DNS name that lists all meta server nodes is to be used with # qfsadmin commands in the example below. # For example: # ## Add node 0 to VR configuration: # qfsadmin -f qfsadmin.cfg # -s \ # -p \ # -F op-type=add-node \ # -F arg-count=1 \ # -F node-id=0 \ # -F args='node0-ip-address node0-log-listener-port-number' \ # vr_reconfiguration ## Add node 1 to VR configuration: # qfsadmin -f qfsadmin.cfg # -s \ # -p \ # -F op-type=add-node \ # -F arg-count=1 \ # -F node-id=1 \ # -F args='node1-ip-address node1-log-listener-port-number' \ # vr_reconfiguration ## Add node 2 to VR configuration: # qfsadmin -f qfsadmin.cfg # -s \ # -p \ # -F op-type=add-node \ # -F arg-count=1 \ # -F node-id=2 \ # -F args='node1-ip-address node2-log-listener-port-number' \ # vr_reconfiguration ## Activate nodes: # qfsadmin -f qfsadmin.cfg # -s \ # -p \ # -F op-type=activate-nodes \ # -F arg-count=3 \ # -F args='0 1 2' \ # vr_reconfiguration # Meta server VR (viewstamped replication) node ID. # The node IDs must be unique, and should never be reused, in order to ensure # all VR node IDs are unique. Duplicate node IDs might result in loss of the # file system. # Valid node ID must be equal or greater than 0. # Default is -1, no valid node ID assigned. # metaServer.vr.id = -1 # Host name to VR node ID map. # Use only if host names returned by getnostname() system call are guaranteed # to be unique. # The list must be space separated pairs. For example: # host_name_1 0 host_name_2 1 host_name_3 2 # Default is none. # metaServer.vr.hostnameToId = # List of valid meta server executable md5 # If the list is not empty, meta executable md5 must be included in this list in # order to be able to communicate with other VR nodes. # Default is empty list. # metaServer.metaMds = # Meta server VR node state file. # The file contains node specific state. A valid file is required to form VR # quorum, i.e. meta server nodes group serving client requests, and maintaining # (re-replicating or recovering lost) chunks. # metaServer.vr.stateFileName = vrstate # Sync (fsync) VR state file to disk (persistent store). # Default is on. # metaServer.vr.syncVrStateFile = 1 # An active meta server node must have valid VR state file in order to be able # to form VR quorum. The state includes node ID, and reflects node's replicated # state. Valid state must match the state of the transaction log. The state file # is used on meta server startup. Meta server validates that the node ID and # transaction log state corresponds to the values in VR state file on startup. # In case of mismatch the file is deleted. The meta server writes the VR state # file during its operations. The file is small, typically couple hundred bytes. # The only case where this might need to be changed is in the case of attempting # to recover from the meta data loss, where VR state files were lost on more # than quorum nodes. # Default is off. # metaServer.vr.ignoreInvalidVrState = 0 # This node client listener, if it differs different than the IP returned by # getpeername() on the other VR nodes, and/or metaServer.clientIp # The port should be the same as this node metaServer.clientPort. # In other words, this is IP and port that other VR nodes must use to connect to # this node client listener. # Default is empty. # metaServer.vr.metaDataStoreLocation = # ------------ Meta data initial fetch / synchronization. ----------------- # File system ID, required for initial (when no checkpoint exists) meta # data fetch to work. # Default is no valid file system id. # metaServer.metaDataSync.fileSystemId = # Space separated list of replicated meta server nodes locations. # Each location consists of IP address (or DNS name) and port number. # The port numbers the respective client listener port numbers, i.e. values of # each node metaServer.clientPort parameter. # This parameter is used for initial meta data fetch, if required, in the cases # if no checkpoint and logs exist, or checkpoint and logs are "too old", i.e. # more recent checkpoint and logs must be fetched from another meta server node. # For example: # metaServer.metaDataSync.servers = 10.10.10.10 20000 test.example.com 20000 # metaServer.metaDataSync.servers = # Meta data read parameters. Read size multiplied by read ops count define # amount of data in flight. The default is 1MB which would normally be greater # than bandwidth delay product, including spinning media (disk) latency in a # cluster environment. # metaServer.metaDataSync.maxReadSize = 65536 # metaServer.metaDataSync.maxReadOpsCount = 16 # Maximum transaction log block size. # The intention is to avoid possible out of buffers due to bogus / corrupted # data. # Default is 64MB # metaServer.metaDataSync.maxLogBlockSize = 67108864 # File name where fetch state is stored in case if meta server process restart # is required due to need to fetch checkpoint. # metaServer.metaDataSync.fetchOnRestartFileName = metadatafetch # Temporary files suffix. # Default .tmp # metaServer.metaDataSync.tmpSuffix = .tmp # Max data fetch retry attempts, and timeout, and other retry parameters. # metaServer.metaDataSync.maxRetryCount = 10 # metaServer.metaDataSync.retryTimeout = 3 # metaServer.metaDataSync.maxReadOpRetryCount = 8 # metaServer.metaDataSync.readOpTimeoutSec = 10 # metaServer.metaDataSync.maxOpRetryCount = 10 # metaServer.metaDataSync.maxOpLogWriteRetryCount = 0 # metaServer.metaDataSync.timeBetweenRetries = 4 # Meta data write disk sync. With sufficiently large requests, sync mode helps to # reduce host OS memory contention by minimizing write behind, and number of VM # dirty pages. # Default is off. # metaServer.metaDataSync.writeSync = 0 # Amount of data to buffer before issuing disk write request. # Default is 4MB # metaServer.metaDataSync.minWriteSize = 4194304 # Issue reads, while loading checkpoint, and replaying logs, in order to # ensure that subsequent log segments are not going to be deleted / reclaimed by # by the node that the data is fetched from. # metaServer.metaDataSync.keepLogSegmentsInterval = 10 # Meta server node acts as QFS client when fetches data from other meta # server node, and when it transmits transaction log. For parameters description # please see the parameters descriptions with client.auth. prefix in # QfsClient.prp file in X509, Kerberos, and PSK authentication sections. # Meta data fetch authentication. # By default authentication is not used. # # metaServer.metaDataSync.auth.X509.X509PemFile = # metaServer.metaDataSync.auth.X509.X509Password = # metaServer.metaDataSync.auth.X509.PKeyPemFile = # metaServer.metaDataSync.auth.X509.PKeyPassword = # metaServer.metaDataSync.auth.X509.CAFile = # metaServer.metaDataSync.auth.X509.CADir = # metaServer.metaDataSync.auth.X509.verifyPeer = 1 # metaServer.metaDataSync.auth.X509.cipher = !ADH:!AECDH:!MD5:HIGH:@STRENGTH # metaServer.metaDataSync.auth.X509.options = # # metaServer.metaDataSync.auth.krb5.service = # metaServer.metaDataSync.auth.krb5.host = # metaServer.metaDataSync.auth.krb5.keytab = # metaServer.metaDataSync.auth.krb5.clientName = # metaServer.metaDataSync.auth.krb5.initClientCache = 0 # # metaServer.metaDataSync.auth.psk.options = # metaServer.metaDataSync.auth.psk.keyId = # metaServer.metaDataSync.auth.psk.key = # --------- Transaction log transmitter (synchronous replication). ------------- # Time between connect attempts. # Default 2 sec. # metaServer.log.transmitter.retryInterval = 2 # Authentication. # By default log transmitter authentication is off. # Authentication types to use # Default any available, if configured. # metaServer.log.transmitter.authType = Krb5 X509 PSK # # metaServer.log.transmitter.auth.X509.X509PemFile = # metaServer.log.transmitter.auth.X509.X509Password = # metaServer.log.transmitter.auth.X509.PKeyPemFile = # metaServer.log.transmitter.auth.X509.PKeyPassword = # metaServer.log.transmitter.auth.X509.CAFile = # metaServer.log.transmitter.auth.X509.CADir = # metaServer.log.transmitter.auth.X509.verifyPeer = 1 # metaServer.log.transmitter.auth.X509.cipher = !ADH:!AECDH:!MD5:HIGH:@STRENGTH # metaServer.log.transmitter.auth.X509.options = # # metaServer.log.transmitter.auth.krb5.service = # metaServer.log.transmitter.auth.krb5.host = # metaServer.log.transmitter.auth.krb5.keytab = # metaServer.log.transmitter.auth.krb5.clientName = # metaServer.log.transmitter.auth.krb5.initClientCache = 0 # # metaServer.log.transmitter.auth.psk.options = # metaServer.log.transmitter.auth.psk.keyId = # metaServer.log.transmitter.auth.psk.key = # ---------- Transaction log receiver listener configuration. ------------------ # For example to configure ipv4 listener on all hosted IP addresses, and port # 1234: # metaServer.log.receiver.listenOn = 0.0.0.0 1234 # By default transaction log listener is disabled. # metaServer.log.receiver.listenOn = # Listen only on IPv6 address, if IPv6 listen on address is used. # Default is off. # metaServer.log.receiver.ipV6Only = 0 # Connections limit from log transmitters. # Default is 8192. # metaServer.log.maxConnectionCount = 8192 # Meta server log receiver authentication. # By default log receiver authentication is off. # For parameters description please see chunk server authentication section in # this file. # # metaServer.log.receiver.auth.X509.X509PemFile = # metaServer.log.receiver.auth.X509.X509Password = # metaServer.log.receiver.auth.X509.PKeyPemFile = # metaServer.log.receiver.auth.X509.PKeyPassword = # metaServer.log.receiver.auth.X509.CAFile = # metaServer.log.receiver.auth.X509.CADir = # metaServer.log.receiver.auth.X509.verifyPeer = 1 # metaServer.log.receiver.auth.X509.cipher = !ADH:!AECDH:!MD5:HIGH:@STRENGTH # metaServer.log.receiver.auth.X509.session.timeout = 14400 # metaServer.log.receiver.auth.X509.options = # # metaServer.log.receiver.auth.krb5.service = # metaServer.log.receiver.auth.krb5.host = # metaServer.log.receiver.auth.krb5.keytab = # metaServer.log.receiver.auth.krb5.copyToMemKeytab = 1 # metaServer.log.receiver.auth.krb5.princUnparseMode = # # metaServer.log.receiver.auth.psk.options = # metaServer.log.receiver.auth.psk.keyId = # metaServer.log.receiver.auth.psk.key = # # metaServer.log.receiver.auth.blackList = # metaServer.log.receiver.auth.whiteList = # Meta server VR node process restart might be required in order to load a new # checkpoint when transaction log on all other available nodes have "rolled # over", and have no transactions that immediately follow the last transaction of # this node. By default meta server "re-executes" itself when restart is # required. In the case when the meta server process is automatically restarted # by another process the following parameter can be set to 1 to exit to allow # another process to automatically restart meta server. # metaServer.exitOnRestart = 0 # Meta server can use DNS resolver for meta data fetch on startup, when DNS # name(s) is(are) present in metaServer.metaDataSync.servers list. # # Use host os DNS resolver if set to 1. # If set to 0 then load /etc/hosts and /etc/resolv.conf and use QFS built-in non # blocking DNS resolver. # metaServer.useOsResolver = 0 # DNS resolver maximum cache size. # DNS cache is off if set to 0 or less. # Default is 8192 entries. # metaServer.resolverMaxCacheSize = 8192 # DNS resolver cache expiration time in seconds. # If set to 0 or less the DNS cache is off. # Default is -1. # metaServer.resolverCacheExpiration = -1 # ---------------- Meta server watchdog. -------------------------------------- # Watchdog thread polls meta server threads and aborts meta server process, # when configured to do so, in the case if one or more threads appear not to be # making progress due to likely meta server and / or OS malfunction. # Poll interval. # Default is 16 seconds when metaServer.watchdog.maxTimeoutCount is equal or # greater than 0, and 1.15 seconds otherwise. # Minimum interval is # max(1.25, 4.0 / max(1, metaServer.watchdog.pollIntervalSec)) seconds # when metaServer.watchdog.maxTimeoutCount equal or greater than 0, and 1.05 # seconds otherwise. # metaServer.watchdog.pollIntervalSec = 1.15 # Abort process if no progress detected after the specified consecutive poll # intervals. # Default is -1, do not abort the process. # metaServer.watchdog.maxTimeoutCount = -1