From f3d26ba60af7980cf13a9052f57a9bc9f6ad534d Mon Sep 17 00:00:00 2001 From: Oleksandr Poliakov Date: Tue, 23 Sep 2025 11:35:53 -0700 Subject: [PATCH 01/11] DRIVERS-2884: CSOT avoid connection churn when operations timeout --- .../tests/pending-response.json | 661 +++++++++ .../tests/pending-response.yml | 349 +++++ .../connection-monitoring-and-pooling.md | 294 +++- .../tests/README.md | 68 + .../logging/connection-logging-csot.json | 578 ++++++++ .../tests/logging/connection-logging-csot.yml | 254 ++++ .../tests/logging/connection-logging.yml | 1 + source/unified-test-format/schema-1.28.json | 1264 +++++++++++++++++ source/unified-test-format/schema-latest.json | 22 + .../unified-test-format.md | 31 +- 10 files changed, 3513 insertions(+), 9 deletions(-) create mode 100644 source/client-side-operations-timeout/tests/pending-response.json create mode 100644 source/client-side-operations-timeout/tests/pending-response.yml create mode 100644 source/connection-monitoring-and-pooling/tests/logging/connection-logging-csot.json create mode 100644 source/connection-monitoring-and-pooling/tests/logging/connection-logging-csot.yml create mode 100644 source/unified-test-format/schema-1.28.json diff --git a/source/client-side-operations-timeout/tests/pending-response.json b/source/client-side-operations-timeout/tests/pending-response.json new file mode 100644 index 0000000000..278368da57 --- /dev/null +++ b/source/client-side-operations-timeout/tests/pending-response.json @@ -0,0 +1,661 @@ +{ + "description": "Connection churn is prevented by reading pending responses during connection checkout", + "schemaVersion": "1.27", + "runOnRequirements": [ + { + "minServerVersion": "4.4", + "topologies": [ + "single", + "replicaset" + ] + } + ], + "createEntities": [ + { + "client": { + "id": "failPointClient", + "useMultipleMongoses": false + } + }, + { + "client": { + "id": "client", + "uriOptions": { + "maxPoolSize": 1, + "minPoolSize": 1 + }, + "awaitMinPoolSizeMS": 10000, + "useMultipleMongoses": false, + "observeEvents": [ + "commandFailedEvent", + "commandSucceededEvent", + "connectionCheckedOutEvent", + "connectionCheckedInEvent", + "connectionClosedEvent", + "connectionPendingResponseSucceeded", + "connectionPendingResponseStarted", + "connectionPendingResponseFailed" + ] + } + }, + { + "database": { + "id": "test", + "client": "client", + "databaseName": "test" + } + }, + { + "collection": { + "id": "coll", + "database": "test", + "collectionName": "coll" + } + } + ], + "initialData": [ + { + "collectionName": "coll", + "databaseName": "test", + "documents": [] + } + ], + "tests": [ + { + "description": "non-timeout op with response and no error", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "failPointClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "insert" + ], + "blockConnection": true, + "blockTimeMS": 100 + } + } + } + }, + { + "name": "insertOne", + "object": "coll", + "arguments": { + "timeoutMS": 75, + "document": { + "_id": 3, + "x": 1 + } + }, + "expectError": { + "isTimeoutError": true + } + }, + { + "name": "find", + "object": "coll", + "arguments": { + "filter": { + "_id": 1 + } + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandSucceededEvent": { + "commandName": "find" + } + } + ] + }, + { + "client": "client", + "eventType": "cmap", + "events": [ + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + }, + { + "connectionPendingResponseStarted": {} + }, + { + "connectionPendingResponseSucceeded": {} + }, + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + } + ] + } + ] + }, + { + "description": "non-timeout op with no response", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "failPointClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "insert" + ], + "blockConnection": true, + "blockTimeMS": 3100 + } + } + } + }, + { + "name": "insertOne", + "object": "coll", + "arguments": { + "timeoutMS": 50, + "document": { + "_id": 3, + "x": 1 + } + }, + "expectError": { + "isTimeoutError": true + } + }, + { + "name": "find", + "object": "coll", + "arguments": { + "filter": { + "_id": 1 + } + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandSucceededEvent": { + "commandName": "find" + } + } + ] + }, + { + "client": "client", + "eventType": "cmap", + "events": [ + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + }, + { + "connectionPendingResponseStarted": {} + }, + { + "connectionPendingResponseFailed": { + "reason": "timeout" + } + }, + { + "connectionClosedEvent": { + "reason": "error" + } + }, + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + } + ] + } + ] + }, + { + "description": "timeout op with response and no error", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "failPointClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "insert" + ], + "blockConnection": true, + "blockTimeMS": 250 + } + } + } + }, + { + "name": "insertOne", + "object": "coll", + "arguments": { + "timeoutMS": 75, + "document": { + "_id": 3, + "x": 1 + } + }, + "expectError": { + "isTimeoutError": true + } + }, + { + "name": "find", + "object": "coll", + "arguments": { + "timeoutMS": 200, + "filter": { + "_id": 1 + } + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandSucceededEvent": { + "commandName": "find" + } + } + ] + }, + { + "client": "client", + "eventType": "cmap", + "events": [ + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + }, + { + "connectionPendingResponseStarted": {} + }, + { + "connectionPendingResponseSucceeded": {} + }, + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + } + ] + } + ] + }, + { + "description": "multiple calls to drain buffer", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "failPointClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "insert" + ], + "blockConnection": true, + "blockTimeMS": 500 + } + } + } + }, + { + "name": "insertOne", + "object": "coll", + "arguments": { + "timeoutMS": 50, + "document": { + "_id": 3, + "x": 1 + } + }, + "expectError": { + "isTimeoutError": true + } + }, + { + "name": "find", + "object": "coll", + "arguments": { + "timeoutMS": 50, + "filter": { + "_id": 1 + } + }, + "expectError": { + "isTimeoutError": true + } + }, + { + "name": "find", + "object": "coll", + "arguments": { + "filter": { + "_id": 1 + } + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandSucceededEvent": { + "commandName": "find" + } + } + ] + }, + { + "client": "client", + "eventType": "cmap", + "events": [ + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + }, + { + "connectionPendingResponseStarted": {} + }, + { + "connectionPendingResponseFailed": { + "reason": "timeout" + } + }, + { + "connectionPendingResponseStarted": {} + }, + { + "connectionPendingResponseSucceeded": {} + }, + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + } + ] + } + ] + }, + { + "description": "read op retries when connection closes server-side while draining response", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "failPointClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "insert" + ], + "blockConnection": true, + "blockTimeMS": 500, + "closeConnection": true + } + } + } + }, + { + "name": "insertOne", + "object": "coll", + "arguments": { + "timeoutMS": 50, + "document": { + "_id": 3, + "x": 1 + } + }, + "expectError": { + "isTimeoutError": true + } + }, + { + "name": "wait", + "object": "testRunner", + "arguments": { + "ms": 500 + } + }, + { + "name": "find", + "object": "coll", + "arguments": { + "timeoutMS": 50, + "filter": { + "_id": 1 + } + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandSucceededEvent": { + "commandName": "find" + } + } + ] + }, + { + "client": "client", + "eventType": "cmap", + "events": [ + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + }, + { + "connectionPendingResponseStarted": {} + }, + { + "connectionPendingResponseFailed": { + "reason": "error" + } + }, + { + "connectionClosedEvent": { + "reason": "error" + } + }, + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + } + ] + } + ] + }, + { + "description": "write op retries when connection closes server-side while draining response", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "failPointClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "insert" + ], + "blockConnection": true, + "blockTimeMS": 500, + "closeConnection": true + } + } + } + }, + { + "name": "insertOne", + "object": "coll", + "arguments": { + "timeoutMS": 50, + "document": { + "_id": 3, + "x": 1 + } + }, + "expectError": { + "isTimeoutError": true + } + }, + { + "name": "wait", + "object": "testRunner", + "arguments": { + "ms": 500 + } + }, + { + "name": "insertOne", + "object": "coll", + "arguments": { + "timeoutMS": 50, + "document": { + "_id": 3, + "x": 1 + } + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandSucceededEvent": { + "commandName": "insert" + } + } + ] + }, + { + "client": "client", + "eventType": "cmap", + "events": [ + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + }, + { + "connectionPendingResponseStarted": {} + }, + { + "connectionPendingResponseFailed": { + "reason": "error" + } + }, + { + "connectionClosedEvent": { + "reason": "error" + } + }, + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + } + ] + } + ] + } + ] +} diff --git a/source/client-side-operations-timeout/tests/pending-response.yml b/source/client-side-operations-timeout/tests/pending-response.yml new file mode 100644 index 0000000000..4cc2f613fb --- /dev/null +++ b/source/client-side-operations-timeout/tests/pending-response.yml @@ -0,0 +1,349 @@ +description: "Connection churn is prevented by reading pending responses during connection checkout" +schemaVersion: "1.27" +runOnRequirements: + - minServerVersion: "4.4" + # TODO(SERVER-96344): When using failpoints, mongos returns MaxTimeMSExpired + # after maxTimeMS, whereas mongod returns it after + # max(blockTimeMS, maxTimeMS). Until this ticket is resolved, these tests + # will not pass on sharded clusters. + topologies: ["single", "replicaset"] +createEntities: + - client: + id: &failPointClient failPointClient + useMultipleMongoses: false + - client: + id: &client client + uriOptions: + maxPoolSize: 1 + minPoolSize: 1 + awaitMinPoolSizeMS: 10000 + useMultipleMongoses: false + observeEvents: + - commandFailedEvent + - commandSucceededEvent + - connectionCheckedOutEvent + - connectionCheckedInEvent + - connectionClosedEvent + - connectionPendingResponseSucceeded + - connectionPendingResponseStarted + - connectionPendingResponseFailed + - database: + id: &database test + client: *client + databaseName: *database + - collection: + id: &collection coll + database: *database + collectionName: *collection +initialData: + - collectionName: *collection + databaseName: *database + documents: [] +tests: + # Attempting a pending response read on a non-timeout operation that can + # immediately read from the TCP buffer should complete the pending read and + # the connection should be checked out. + - description: "non-timeout op with response and no error" + operations: + # Create a failpoint to block the first operation. + - name: failPoint + object: testRunner + arguments: + client: *failPointClient + failPoint: + configureFailPoint: failCommand + mode: {times: 1} + data: + failCommands: ["insert"] + blockConnection: true + blockTimeMS: 100 + # Execute operation with timeout less than block time. + - name: insertOne + object: *collection + arguments: + timeoutMS: 75 + document: {_id: 3, x: 1} + expectError: + isTimeoutError: true + # Execute a subsequent operation to complete the read. + - name: find + object: *collection + arguments: + filter: {_id: 1} + expectEvents: + - client: *client + events: + - commandFailedEvent: + commandName: insert + - commandSucceededEvent: + commandName: find + - client: *client + eventType: cmap + events: + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} # Insert fails. + - connectionPendingResponseStarted: {} + - connectionPendingResponseSucceeded: {} # Find operation drains connection. + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} # Find succeeds. + # Attempting a pending response read on a non-timeout operation that gets no + # response from the server after 3s should close the connection. + - description: "non-timeout op with no response" + operations: + # Create a failpoint to block the first operation. + - name: failPoint + object: testRunner + arguments: + client: *failPointClient + failPoint: + configureFailPoint: failCommand + mode: {times: 1} + data: + failCommands: ["insert"] + blockConnection: true + blockTimeMS: 3100 + # Execute operation with timeout less than block time. + - name: insertOne + object: *collection + arguments: + timeoutMS: 50 + document: {_id: 3, x: 1} + expectError: + isTimeoutError: true + # Execute a subsequent operation to complete the read. We expect failure + # in the pending read, resulting in a closed connection. However, the + # find should retry and succeed. + - name: find + object: *collection + arguments: + filter: {_id: 1} + expectEvents: + - client: *client + events: + - commandFailedEvent: + commandName: insert + - commandSucceededEvent: + commandName: find + - client: *client + eventType: cmap + events: + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} # Insert fails. + - connectionPendingResponseStarted: {} + - connectionPendingResponseFailed: + reason: timeout + - connectionClosedEvent: + reason: error + - connectionCheckedOutEvent: {} # Find's retry starts and succeeds + - connectionCheckedInEvent: {} + # Attempting a pending response read on a realistic timeout operation that can + # immediately read from the TCP buffer should complete the pending read and + # the connection should be checked out. + - description: "timeout op with response and no error" + operations: + # Create a failpoint to block the first operation. + - name: failPoint + object: testRunner + arguments: + client: *failPointClient + failPoint: + configureFailPoint: failCommand + mode: {times: 1} + data: + failCommands: ["insert"] + blockConnection: true + blockTimeMS: 250 + # Execute operation with timeout less than block time. + - name: insertOne + object: *collection + arguments: + timeoutMS: 75 + document: {_id: 3, x: 1} + expectError: + isTimeoutError: true + # Execute a subsequent operation to complete the read. + - name: find + object: *collection + arguments: + timeoutMS: 200 + filter: {_id: 1} + expectEvents: + - client: *client + events: + - commandFailedEvent: + commandName: insert + - commandSucceededEvent: + commandName: find + - client: *client + eventType: cmap + events: + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} # Insert fails. + - connectionPendingResponseStarted: {} + - connectionPendingResponseSucceeded: {} + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} # Find succeeds. + # It may take multiple calls to the pending response handler to drain the + # inbound buffer. + - description: "multiple calls to drain buffer" + operations: + # Create a failpoint to block the first and second operation. + - name: failPoint + object: testRunner + arguments: + client: *failPointClient + failPoint: + configureFailPoint: failCommand + mode: {times: 1} + data: + failCommands: ["insert"] + blockConnection: true + blockTimeMS: 500 + # Execute operation with timeout less than block time. + - name: insertOne + object: *collection + arguments: + timeoutMS: 50 + document: {_id: 3, x: 1} + expectError: + isTimeoutError: true + # Execute a subsequent operation with a timeout less than the block time. + - name: find + object: *collection + arguments: + timeoutMS: 50 + filter: {_id: 1} + expectError: + isTimeoutError: true + # Execute a final operation to drain the buffer. + - name: find + object: *collection + arguments: + filter: {_id: 1} + expectEvents: + - client: *client + events: + - commandFailedEvent: + commandName: insert + - commandSucceededEvent: + commandName: find + - client: *client + eventType: cmap + events: + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} # Insert fails. + - connectionPendingResponseStarted: {} # First find fails + - connectionPendingResponseFailed: + reason: timeout + - connectionPendingResponseStarted: {} # Second find drains the buffer. + - connectionPendingResponseSucceeded: {} + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} # Second find succeeds. + # If the connection is closed server-side while draining the response, the + # driver must retry with a different connection. + - description: "read op retries when connection closes server-side while draining response" + operations: + # Create a failpoint to block the first and second operation. + - name: failPoint + object: testRunner + arguments: + client: *failPointClient + failPoint: + configureFailPoint: failCommand + mode: {times: 1} + data: + failCommands: ["insert"] + blockConnection: true + blockTimeMS: 500 + closeConnection: true + # Execute operation with timeout less than block time. + - name: insertOne + object: *collection + arguments: + timeoutMS: 50 + document: {_id: 3, x: 1} + expectError: + isTimeoutError: true + - name: wait + object: testRunner + arguments: + ms: 500 + # Execute a subsequent operation with a timeout less than the block time. + - name: find + object: *collection + arguments: + timeoutMS: 50 + filter: {_id: 1} + expectEvents: + - client: *client + events: + - commandFailedEvent: + commandName: insert + - commandSucceededEvent: + commandName: find + - client: *client + eventType: cmap + events: + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} # Insert fails. + - connectionPendingResponseStarted: {} # Pending read fails on first find + - connectionPendingResponseFailed: + reason: error + - connectionClosedEvent: + reason: error + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} # Find finishes. + # If the connection is closed server-side while draining the response, the + # driver must retry with a different connection. + - description: "write op retries when connection closes server-side while draining response" + operations: + # Create a failpoint to block the first and second operation. + - name: failPoint + object: testRunner + arguments: + client: *failPointClient + failPoint: + configureFailPoint: failCommand + mode: {times: 1} + data: + failCommands: ["insert"] + blockConnection: true + blockTimeMS: 500 + closeConnection: true + # Execute operation with timeout less than block time. + - name: insertOne + object: *collection + arguments: + timeoutMS: 50 + document: {_id: 3, x: 1} + expectError: + isTimeoutError: true + - name: wait + object: testRunner + arguments: + ms: 500 + # Execute a subsequent operation with a timeout less than the block time. + - name: insertOne + object: *collection + arguments: + timeoutMS: 50 + document: {_id: 3, x: 1} + expectEvents: + - client: *client + events: + - commandFailedEvent: + commandName: insert + - commandSucceededEvent: + commandName: insert + - client: *client + eventType: cmap + events: + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} # Insert fails. + - connectionPendingResponseStarted: {} # Pending read fails on first find + - connectionPendingResponseFailed: + reason: error + - connectionClosedEvent: + reason: error + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} # Find finishes. diff --git a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md index b6e5ebc81b..c631275494 100644 --- a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md +++ b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md @@ -217,17 +217,19 @@ interface Connection { * The current state of the Connection. * * Possible values are the following: - * - "pending": The Connection has been created but has not yet been established. Contributes to - * totalConnectionCount and pendingConnectionCount. + * - "pending": The Connection has been created but has not yet been established. Contributes to + * totalConnectionCount and pendingConnectionCount. * - * - "available": The Connection has been established and is waiting in the pool to be checked - * out. Contributes to both totalConnectionCount and availableConnectionCount. + * - "pending response": The Connection is expecting a response for an operation that was timed out, but server's response was not fully read from the connection. * - * - "in use": The Connection has been established, checked out from the pool, and has yet - * to be checked back in. Contributes to totalConnectionCount. + * - "available": The Connection has been established and is waiting in the pool to be checked + * out. Contributes to both totalConnectionCount and availableConnectionCount. * - * - "closed": The Connection has had its socket closed and cannot be used for any future - * operations. Does not contribute to any connection counts. + * - "in use": The Connection has been established, checked out from the pool, and has yet + * to be checked back in. Contributes to totalConnectionCount. + * + * - "closed": The Connection has had its socket closed and cannot be used for any future + * operations. Does not contribute to any connection counts. * * Note: this field is mainly used for the purposes of describing state * in this specification. It is not required that drivers @@ -576,6 +578,87 @@ other threads from checking out [Connections](#connection) while establishing a Before a given [Connection](#connection) is returned from checkOut, it must be marked as "in use", and the pool's availableConnectionCount MUST be decremented. +##### Draining of Pending Response (drivers that support CSOT) + +If an operation times out while awaiting a server response: + +- **Non-exhaust commands**: If the command was not sent with the `exhaustAllowed` OP_MSG bit flag, the driver MUST: + 1. Put the connection into a "pending response" state. + 2. Record the current time in a way that can be updated while draining of pending response. +- **Exhaust commands**: If the command was sent with the `exhaustAllowed` bit, the driver MUST NOT transition the + connection to a "pending response" state and MUST instead close the connection. + +The next time connection in the "pending response" state is checked out, the driver MUST ensure that any remaining +response data is drained and the connection is ready to send the next command. + +1. **Persist and update timestamp**: The connection must record the current time immediately after the original timeout. + This timestamp MUST be updated to the current time whenever any bytes are successfully read, received, or consumed + while draining of the pending response. +2. **Aliveness check**: If driver uses synchronous read from the socket and the timestamp is older than 3 seconds, the + driver MUST perform aliveness check either performing a non-blocking read or using the minimal possible timeout to + check if at least one byte can be read/received. If at least one byte can be read the timestamp must be updated and + connection SHOULD be drained. If no bytes can be read, the connection MUST be closed. +3. **User-provided timeout**: If a user-provided timeout is specified for the operation, the driver MUST use the minimum + of (a) the remaining time before the 3 second "pending response" window elapses and (b) the remaining of + user-provided timeout as the effective timeout for the draining. +4. **Default timeout**: If no user-provided timeout is specified, the driver MUST use the minimum of (a) the remaining 3 + second "pending response" window and (b) the `waitQueueTimeoutMS` (if not supported by the driver remaining of + `serverSelectionTimeoutMS` should be used instead) as the effective timeout for the draining. +5. **On Timeout**: if timeout occurred while draining the pending response and the 3 seconds pending-response window was + not elapsed, Driver SHOULD return the connection to the pool, so the next operation can continue draining of the + pending response. +6. **Error or over-age**: If any error other than timeout occurs while draining the pending response, or if the draining + exceeds the 3 seconds pending-response window, the driver MUST close the connection. Driver SHOULD try to make + another checkout attempt accordingly to the Checking Out a Connection behavior. +7. **Clear pending state on success**: If the pending response is fully drained, the pending response state MUST be + cleared and connection can be used to execute the next command. + +```mermaid +sequenceDiagram + participant Driver + participant Pool + participant Conn as Connection (*) + participant Server + + Driver->>Pool: Checkout Connection (*) + Pool->>Driver: Return connection (*) + Driver->>Conn: Send operation (1) (CSOT enabled, maxTimeMS > 0, exhaustAllowed = false) + Conn->>Server: Send command + Server-->>Conn: (No response, socket times out) + + Conn->>Conn: Transition connection to "pending response" state, record current time + Conn->>Pool: Check connection back into pool + Pool->>Driver: Error + + Driver->>Pool: Checkout Connection (*) + Pool->>Conn: Draining of pending response from operation (1) + Conn->>Conn: Update pending read timestamp if bytes read + alt No error + Conn->>Conn: Clear pending response state + Conn->>Pool: Draining of pending response succeeded + Pool->>Driver: Return connection + else Error + alt Non-Timeout Error + Conn->>Conn: Close connection + Conn->>Pool: Draining of pending response failed + Pool->>Pool: Checkout next connection + else Timeout Error + alt Timeout window exceeded + Conn->>Conn: Close connection + Conn->>Pool: Draining of pending response failed + Pool->>Driver: Error + else Timeout window not exceeded + Conn->>Pool: Check connection back into pool + Pool->>Driver: Error + end + end + end +``` + +##### Pseudocode + +This subsection outlines the pseudocode steps for acquiring a connection from the pool. + ```text connection = Null tConnectionCheckOutStarted = current instant (use a monotonic clock if possible) @@ -592,6 +675,18 @@ try: if connection is perished: close connection connection = Null + if connection is in "pending response" state: + drain the pending response + if error: + if non-timeout error: + close connection + connection = Null + else if timeout error and last read timestamp on connection > 3 seconds old + close connection + connection = Null + else + check in the connection + connection = Null else if totalConnectionCount < maxPoolSize: if pendingConnectionCount < maxConnecting: connection = create connection @@ -633,6 +728,7 @@ if connection state is "pending": decrement pendingConnectionCount else: decrement availableConnectionCount + set connection state to "in use" # If there is no background thread, the pool MUST ensure that @@ -997,6 +1093,96 @@ interface ConnectionCheckedInEvent { */ connectionId: int64; } + +/** + * Emitted when the connection being checked out is attempting to read and + * discard a pending server response. + */ +interface PendingResponseStarted { + /** + * The ServerAddress of the Endpoint the pool is attempting to connect to. + */ + address: string; + + /** + * The ID of the Connection. + */ + connectionId: int64; + + /** + * The driver-generated request ID associated with the network timeout. + */ + requestId: int64; +} + +/** + * Emitted when the connection successfully read the pending read and is ready + * to be checked out. + */ +interface PendingResponseSucceeded { + /** + * The ServerAddress of the Endpoint the pool is attempting to connect to. + */ + address: string; + + /** + * The ID of the Connection. + */ + connectionId: int64; + + /** + * The time it took to complete the pending read. Measured from when the + * `PendingResponseStarted` event is emitted as part of the connection + * checkout. + * + * Drivers SHOULD choose an idiomatic duration type. If the type doesn't + * include units (e.g. `int64`), include them in the name (for example, + * `durationMS`). + */ + duration: Duration; + + /** + * The driver-generated request ID associated with the network timeout. + */ + requestId: int64; +} + +/** + * Emitted when the connection being checked out failed to complete the pending + * read. + */ +interface PendingResponseFailed { + /** + * The ServerAddress of the Endpoint the pool is attempting to connect to. + */ + address: string; + + /** + * The ID of the Connection. + */ + connectionId: int64; + + /** + * The driver-generated request ID associated with the network timeout. + */ + requestId: int64; + + /** + * Duration until the pending response drain failed. Measured from when the + * `PendingResponseStarted` event is emitted as part of the same connection + * checkout. + * + * Drivers SHOULD choose an idiomatic duration type. If the type doesn't + * include units (e.g. `int64`), include them in the name (for example, + * `durationMS`). + */ + duration: Duration; + + /** + * The reason for why the pending read failed. + */ + reason: string; +} ``` ### Connection Pool Logging @@ -1194,6 +1380,57 @@ placeholders as appropriate: > Connection checked in: address={{serverHost}}:{{serverPort}}, driver-generated ID={{driverConnectionId}} +#### Connection Pending Response Started + +In addition to the common fields defined above, this message MUST contain the following key-value pairs: + +| Key | Suggested Type | Value | +| ------------------ | -------------- | ------------------------------------------------------------------- | +| message | string | "Pending response started" | +| driverConnectionId | int64 | The driver-generated ID for the connection | +| requestId | int64 | The driver-generated request ID associated with the network timeout | + +The unstructured form SHOULD be as follows, using the values defined in the structured format above to fill in +placeholders as appropriate: + +> Pending response started: address={{serverHost}}:{{serverPort}}, driver-generated ID={{driverConnectionId}}, request +> ID={{requestId}} + +#### Connection Pending Response Succeeded + +In addition to the common fields defined above, this message MUST contain the following key-value pairs: + +| Key | Suggested Type | Value | +| ------------------ | ------------------ | -------------------------------------------------------------------- | +| message | string | "Pending response succeeded" | +| driverConnectionId | int64 | The driver-generated ID for the connection. | +| requestId | int64 | The driver-generated request ID associated with the network timeout. | +| durationMS | Int32/Int64/Double | `PendingResponseSucceeded.duration` converted to milliseconds. | + +The unstructured form SHOULD be as follows, using the values defined in the structured format above to fill in +placeholders as appropriate: + +> Pending response started: address={{serverHost}}:{{serverPort}}, driver-generated ID={{driverConnectionId}}, request +> ID={{requestId}}, duration={{durationMS}} ms + +#### Connection Pending Response Failed + +In addition to the common fields defined above, this message MUST contain the following key-value pairs: + +| Key | Suggested Type | Value | +| ------------------ | ------------------ | ------------------------------------------------------------------- | +| message | string | "Pending response failed" | +| driverConnectionId | int64 | The driver-generated ID for the connection | +| requestId | int64 | The driver-generated request ID associated with the network timeout | +| reason | string | The reason for why the pending response read failed | +| durationMS | Int32/Int64/Double | `PendingResponseFailed.duration` converted to milliseconds. | + +The unstructured form SHOULD be as follows, using the values defined in the structured format above to fill in +placeholders as appropriate: + +> Pending response started: address={{serverHost}}:{{serverPort}}, driver-generated ID={{driverConnectionId}}, request +> ID={{requestId}}, reason={{reason}}, duration={{durationMS}} ms + ### Connection Pool Errors A connection pool throws errors in specific circumstances. These Errors MUST be emitted by the pool. Errors SHOULD be @@ -1339,6 +1576,47 @@ some equivalent configuration, but this configuration will also require target f 5.0. The advantage of using Background Thread to manage perished connections is that it will work regardless of environment setup. +### Why is the pending response timeout not calculated dynamically? Why is it specifically 3 seconds? + +Using a dynamic timeout introduces additional complexity. In particular, RTT is an unreliable metric for predicting +future operation latency as both server and network conditions are unpredictable. Benchmarks demonstrate that even in +slow, high-latency scenarios (e.g. draining a 16MiB document over cross-country), reads reliably complete in under 1 +second. The 3-second timeout ensures that every realistic pending response draining will not result in premature +connection closure, but will still close in pathological conditions (e.g., a dead server or true network outage). + +### Why is the pending response read timeout not configurable? + +Because of the +["no knobs" mantra](https://github.com/mongodb/specifications/blob/master/source/driver-mantras.md#no-knobs). We can +always reconsider this in the future. + +### Why does the pending response timeout include the time the connection is idle in the pool? + +The pending response timeout includes idle time in the pool so that stale or unusable connections (e.g., if the socket +is dead) are detected and closed promptly instead of incurring an additional 3-second wait upon checkout. By tracking +the timeout during idle periods, we ensure that the driver can quickly determine if the connection should be closed with +a fast, non-blocking check as soon as it's checked out, avoiding introducing unnecessary latency while still protecting +connection availability. This approach maintains a balance between minimizing connection churn and ensuring users don't +encounter avoidable delays. + +### Why do we refresh the pending response timeout each time we successfully drain bytes from the TCP stream? + +By refreshing the timeout after each successful draining, we acknowledge that progress is being made, and we provide a +new time window for the next segment of data to arrive. Refreshing is less costly than re-establishing a connection +since there is no reason to believe that a new connection would reduce latency. + +### Why are exhaust cursors prohibited from transitioning a connection to the "pending response" state? + +Exhaust cursors are incompatible with the "pending response" connection state due to the non-deterministic nature of the +connection's completion, which occurs only when `moreToCome=0` is received. Consequently, discarding one of these +responses does not restore the connection to a reusable state. + +### Async IO Considerations for Awaiting a Pending Response + +Several drivers (e.g., event-loop or background-read designs) perform socket I/O asynchronously. After a socket times +out, the server's reply may be drained while the connection is idle in the pool. Therefore, the aliveness check only +applies to an undrained connection that has exceeded the pending-response window without progress. + ## Backwards Compatibility As mentioned in [Deprecated Options](#deprecated-options), some drivers currently implement the options `waitQueueSize` diff --git a/source/connection-monitoring-and-pooling/tests/README.md b/source/connection-monitoring-and-pooling/tests/README.md index 3d8aee40aa..fd8f0db706 100644 --- a/source/connection-monitoring-and-pooling/tests/README.md +++ b/source/connection-monitoring-and-pooling/tests/README.md @@ -21,6 +21,74 @@ The following tests have not yet been automated, but MUST still be tested: 5. When a check out attempt fails because connection set up throws an error, assert that a ConnectionCheckOutFailedEvent with reason="connectionError" is emitted. +### Pending Response + +Due to the complexity of managing a proxy layer, the following qualifying tests should only be run for non-ssl, +non-auth, and non-compression connections. + +#### Recover Partially Read Response + +This test verifies that if only part of a response was read before the timeout, the driver can drain the rest of the +response and reuse the connection for the next operation. + +1. Connect to the proxy server with `maxPoolSize=1` and `direct=true`, subscribing to the following CMAP events: + - `PendingResponseStarted` + - `PendingResponseSucceeded` + - `PendingResponseFailed` + - `ConnectionClosed` +2. Send a command (e.g. an insert) with a 200 millisecond timeout and the following `proxyTest` actions: + - `sendBytes`: We have 3 possible states here: + 1. Message size was partially read: random value between 1 and 3 inclusive + 2. Message size was read, body was not read at all: use 4 + 3. Message size was read, body read partially: random value between 5 and 100 inclusive + - `delayMS`: 400 ( to exceed the 200 ms timeout) + - `sendAll`: `true` +3. Assert that the operation failed with timeout error. +4. Issue another operation (e.g. another insert) and assert that it does not return an error. +5. Verify that the following sequence of events was observed: + - `ConnectionPendingResponseStarted` + - `ConnectionPendingResponseSucceeded` +6. Verify that NONE the following events was observed: + - `ConnectionPendingResponseFailed` + - `ConnectionClosed` + +#### Non-destructive Aliveness Check + +This test verifies that if a connection idles past the pending response window (3 seconds) after a partial header, the +aliveness check does not attempt to discard bytes from the TCP stream. + +1. Connect to the proxy server with `maxPoolSize=1` and `direct=true`, subscribing to the following CMAP events: + - `PendingResponseStarted` + - `PendingResponseSucceeded` + - `PendingResponseFailed` + - `ConnectionClosed` +2. Send a command (e.g. an insert) with a 200 millisecond timeout and the following `proxyTest` actions: + - `sendBytes`: any value between 1 and 3 + - `delayMS`: 400 ( to exceed the 200 ms timeout) + - `sendAll`: `true` +3. Assert that the operation failed with timeout error. +4. Sleep for 3 seconds +5. Issue another operation (e.g. another insert) and assert that it does not return an error. +6. Verify that the following sequence of events was observed: + - `ConnectionPendingResponseStarted` + - `ConnectionPendingResponseFailed` + - `ConnectionPendingResponseStarted` + - `ConnectionPendingResponseSucceeded` +7. Verify that NONE the following events was observed: + - `ConnectionClosed` + +#### Exhaust Cursors + +Drivers that support the `exhaustAllowed` `OP_MSG` bit flag must ensure that responses which contain `moreToCome` will +not result in a connection being put into a "pending response" state. Drivers that don't support this behavior can skip +this prose test. + +1. Configure a failpoint to block `getMore` for 500ms. +2. Insert > 2 records into the collection. +3. Create an exhaust cursor using `find` and iterate one `getMore` using `batchSize=1`. +4. Call a subsequent `getMore` on the exhaust cursor with a client-side timeout of 100ms. +5. Ensure that the `ConnectionClosed` event is emitted due to timeout. + ## Logging Tests Tests for connection pool logging can be found in the `/logging` subdirectory and are written in the diff --git a/source/connection-monitoring-and-pooling/tests/logging/connection-logging-csot.json b/source/connection-monitoring-and-pooling/tests/logging/connection-logging-csot.json new file mode 100644 index 0000000000..7992911e85 --- /dev/null +++ b/source/connection-monitoring-and-pooling/tests/logging/connection-logging-csot.json @@ -0,0 +1,578 @@ +{ + "description": "connection-logging", + "schemaVersion": "1.24", + "runOnRequirements": [ + { + "topologies": [ + "single" + ] + } + ], + "createEntities": [ + { + "client": { + "id": "failPointClient" + } + } + ], + "tests": [ + { + "description": "force a pending response read, fail first try, succeed second try", + "operations": [ + { + "name": "createEntities", + "object": "testRunner", + "arguments": { + "entities": [ + { + "client": { + "id": "client", + "uriOptions": { + "retryReads": false, + "appname": "clientAppName", + "maxPoolSize": 1, + "heartbeatFrequencyMS": 10000 + }, + "observeLogMessages": { + "connection": "debug" + } + } + }, + { + "database": { + "id": "test", + "client": "client", + "databaseName": "test" + } + }, + { + "collection": { + "id": "coll", + "database": "test", + "collectionName": "coll" + } + } + ] + } + }, + { + "name": "runCommand", + "object": "test", + "arguments": { + "command": { + "ping": 1 + }, + "commandName": "ping" + } + }, + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "failPointClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "insert" + ], + "blockConnection": true, + "blockTimeMS": 200 + } + } + } + }, + { + "name": "insertOne", + "object": "coll", + "arguments": { + "timeoutMS": 50, + "document": { + "_id": 3, + "x": 1 + } + }, + "expectError": { + "isTimeoutError": true + } + }, + { + "name": "find", + "object": "coll", + "arguments": { + "filter": { + "_id": 1 + }, + "timeoutMS": 50 + }, + "expectError": { + "isTimeoutError": true + } + }, + { + "name": "wait", + "object": "testRunner", + "arguments": { + "ms": 150 + } + }, + { + "name": "find", + "object": "coll", + "arguments": { + "filter": { + "_id": 1 + } + } + } + ], + "expectLogMessages": [ + { + "client": "client", + "ignoreExtraMessages": true, + "messages": [ + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection pool created", + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection pool ready", + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection checkout started", + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection created", + "driverConnectionId": { + "$$type": [ + "int", + "long" + ] + }, + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection ready", + "driverConnectionId": { + "$$type": [ + "int", + "long" + ] + }, + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + }, + "durationMS": { + "$$type": [ + "double", + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection checked out", + "driverConnectionId": { + "$$type": [ + "int", + "long" + ] + }, + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + }, + "durationMS": { + "$$type": [ + "double", + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection checked in", + "driverConnectionId": { + "$$type": [ + "int", + "long" + ] + }, + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection checkout started", + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection checked out", + "driverConnectionId": { + "$$type": [ + "int", + "long" + ] + }, + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + }, + "durationMS": { + "$$type": [ + "double", + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection checked in", + "driverConnectionId": { + "$$type": [ + "int", + "long" + ] + }, + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection checkout started", + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Pending response started", + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + }, + "driverConnectionId": { + "$$type": [ + "int", + "long" + ] + }, + "requestId": { + "$$type": [ + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Pending response failed", + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + }, + "driverConnectionId": { + "$$type": [ + "int", + "long" + ] + }, + "requestId": { + "$$type": [ + "int", + "long" + ] + }, + "reason": "timeout" + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection checkout failed", + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection checkout started", + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Pending response started", + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + }, + "driverConnectionId": { + "$$type": [ + "int", + "long" + ] + }, + "requestId": { + "$$type": [ + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Pending response succeeded", + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + }, + "driverConnectionId": { + "$$type": [ + "int", + "long" + ] + }, + "requestId": { + "$$type": [ + "int", + "long" + ] + }, + "durationMS": { + "$$type": [ + "double", + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection checked out", + "driverConnectionId": { + "$$type": [ + "int", + "long" + ] + }, + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + }, + "durationMS": { + "$$type": [ + "double", + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection checked in", + "driverConnectionId": { + "$$type": [ + "int", + "long" + ] + }, + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + } + } + } + ] + } + ] + } + ] +} diff --git a/source/connection-monitoring-and-pooling/tests/logging/connection-logging-csot.yml b/source/connection-monitoring-and-pooling/tests/logging/connection-logging-csot.yml new file mode 100644 index 0000000000..9153d35519 --- /dev/null +++ b/source/connection-monitoring-and-pooling/tests/logging/connection-logging-csot.yml @@ -0,0 +1,254 @@ +description: "connection-logging" + +schemaVersion: "1.24" + +runOnRequirements: + - topologies: + - single # The number of log messages is different for each topology since there is a connection pool per host. + +createEntities: + - client: + id: &failPointClient failPointClient + +tests: + - description: "force a pending response read, fail first try, succeed second try" + operations: + - name: createEntities + object: testRunner + arguments: + entities: + - client: + id: &client client + uriOptions: + retryReads: false + appname: &clientAppName clientAppName + maxPoolSize: 1 + # use a high heartbeatFrequencyMS to avoid a successful monitor check marking the pool as + # ready (and emitting another event) during the course of test execution. + heartbeatFrequencyMS: 10000 + observeLogMessages: + connection: debug + - database: + id: &database test + client: *client + databaseName: *database + - collection: + id: &collection coll + database: *database + collectionName: *collection + + # Run a ping command to pre-load the pool with a connection. + - name: runCommand + object: *database + arguments: + command: + ping: 1 + commandName: ping + + # Create a failpoint to block the first operation + - name: failPoint + object: testRunner + arguments: + client: *failPointClient + failPoint: + configureFailPoint: failCommand + mode: { times: 1 } + data: + failCommands: ["insert"] + blockConnection: true + blockTimeMS: 300 + + # Execute operation with timeout less than block time + - name: insertOne + object: *collection + arguments: + timeoutMS: 50 + document: { _id: 3, x: 1 } + expectError: + isTimeoutError: true + + # Execute a subsequent operation which should time out during the + # pending response read attempt. + - name: find + object: *collection + arguments: + filter: { _id: 1 } + timeoutMS: 75 + expectError: + isTimeoutError: true + + # Wait for the blocked connection to free up. + - name: wait + # operation. + object: testRunner + arguments: + ms: 200 + + # Execute a subsequent operation which should completed the pending read. + - name: find + object: *collection + arguments: + filter: { _id: 1 } + + expectLogMessages: + - client: *client + ignoreExtraMessages: true + messages: + - level: debug + component: connection + data: + message: "Connection pool created" + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + + - level: debug + component: connection + data: + message: "Connection pool ready" + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + + # Ping + - level: debug + component: connection + data: + message: "Connection checkout started" + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + + - level: debug + component: connection + data: + message: "Connection created" + driverConnectionId: { $$type: [int, long] } + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + + - level: debug + component: connection + data: + message: "Connection ready" + driverConnectionId: { $$type: [int, long] } + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + durationMS: { $$type: [double, int, long] } + + - level: debug + component: connection + data: + message: "Connection checked out" + driverConnectionId: { $$type: [int, long] } + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + durationMS: { $$type: [double, int, long] } + + - level: debug + component: connection + data: + message: "Connection checked in" + driverConnectionId: { $$type: [int, long] } + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + + # Insert + - level: debug + component: connection + data: + message: "Connection checkout started" + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + + - level: debug + component: connection + data: + message: "Connection checked out" + driverConnectionId: { $$type: [int, long] } + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + durationMS: { $$type: [double, int, long] } + + - level: debug + component: connection + data: + message: "Connection checked in" + driverConnectionId: { $$type: [int, long] } + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + + # First Find + - level: debug + component: connection + data: + message: "Connection checkout started" + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + + - level: debug + component: connection + data: + message: "Pending response started" + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + driverConnectionId: { $$type: [int, long] } + requestId: { $$type: [int, long] } + + - level: debug + component: connection + data: + message: "Pending response failed" + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + driverConnectionId: { $$type: [int, long] } + requestId: { $$type: [int, long] } + reason: "timeout" + + - level: debug + component: connection + data: + message: "Connection checkout failed" + serverHost: { $$type: string } + serverPort: { $$type: [ int, long ] } + + # Second Find + - level: debug + component: connection + data: + message: "Connection checkout started" + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + + - level: debug + component: connection + data: + message: "Pending response started" + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + driverConnectionId: { $$type: [int, long] } + requestId: { $$type: [int, long] } + + - level: debug + component: connection + data: + message: "Pending response succeeded" + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + driverConnectionId: { $$type: [int, long] } + requestId: { $$type: [int, long] } + durationMS: { $$type: [double, int, long] } + + - level: debug + component: connection + data: + message: "Connection checked out" + driverConnectionId: { $$type: [int, long] } + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + durationMS: { $$type: [double, int, long] } + + - level: debug + component: connection + data: + message: "Connection checked in" + driverConnectionId: { $$type: [int, long] } + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } diff --git a/source/connection-monitoring-and-pooling/tests/logging/connection-logging.yml b/source/connection-monitoring-and-pooling/tests/logging/connection-logging.yml index 49868a062e..5a697edcdf 100644 --- a/source/connection-monitoring-and-pooling/tests/logging/connection-logging.yml +++ b/source/connection-monitoring-and-pooling/tests/logging/connection-logging.yml @@ -222,3 +222,4 @@ tests: reason: "An error occurred while trying to establish a new connection" error: { $$exists: true } durationMS: { $$type: [double, int, long] } + diff --git a/source/unified-test-format/schema-1.28.json b/source/unified-test-format/schema-1.28.json new file mode 100644 index 0000000000..c678104db8 --- /dev/null +++ b/source/unified-test-format/schema-1.28.json @@ -0,0 +1,1264 @@ +{ + "$schema": "https://json-schema.org/draft/2019-09/schema#", + "title": "Unified Test Format", + "type": "object", + "additionalProperties": false, + "required": [ + "description", + "schemaVersion", + "tests" + ], + "properties": { + "description": { + "type": "string" + }, + "schemaVersion": { + "$ref": "#/definitions/version" + }, + "runOnRequirements": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/definitions/runOnRequirement" + } + }, + "createEntities": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/definitions/entity" + } + }, + "initialData": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/definitions/collectionData" + } + }, + "tests": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/definitions/test" + } + }, + "_yamlAnchors": { + "type": "object", + "additionalProperties": true + } + }, + "definitions": { + "version": { + "type": "string", + "pattern": "^[0-9]+(\\.[0-9]+){1,2}$" + }, + "runOnRequirement": { + "type": "object", + "additionalProperties": false, + "minProperties": 1, + "properties": { + "maxServerVersion": { + "$ref": "#/definitions/version" + }, + "minServerVersion": { + "$ref": "#/definitions/version" + }, + "topologies": { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "enum": [ + "single", + "replicaset", + "sharded", + "sharded-replicaset", + "load-balanced" + ] + } + }, + "serverless": { + "type": "string", + "enum": [ + "require", + "forbid", + "allow" + ] + }, + "serverParameters": { + "type": "object", + "minProperties": 1 + }, + "auth": { + "type": "boolean" + }, + "authMechanism": { + "type": "string" + }, + "csfle": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "object", + "additionalProperties": false, + "minProperties": 1, + "properties": { + "minLibmongocryptVersion": { + "$ref": "#/definitions/version" + } + } + } + ] + } + } + }, + "entity": { + "type": "object", + "additionalProperties": false, + "maxProperties": 1, + "minProperties": 1, + "properties": { + "client": { + "type": "object", + "additionalProperties": false, + "required": [ + "id" + ], + "properties": { + "id": { + "type": "string" + }, + "uriOptions": { + "type": "object" + }, + "useMultipleMongoses": { + "type": "boolean" + }, + "observeEvents": { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "enum": [ + "commandStartedEvent", + "commandSucceededEvent", + "commandFailedEvent", + "poolCreatedEvent", + "poolReadyEvent", + "poolClearedEvent", + "poolClosedEvent", + "connectionCreatedEvent", + "connectionReadyEvent", + "connectionClosedEvent", + "connectionCheckOutStartedEvent", + "connectionCheckOutFailedEvent", + "connectionCheckedOutEvent", + "connectionCheckedInEvent", + "connectionPendingResponseSucceeded", + "connectionPendingResponseStarted", + "connectionPendingResponseFailed", + "serverDescriptionChangedEvent", + "topologyDescriptionChangedEvent", + "topologyOpeningEvent", + "topologyClosedEvent" + ] + } + }, + "ignoreCommandMonitoringEvents": { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + } + }, + "storeEventsAsEntities": { + "deprecated": true, + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/definitions/storeEventsAsEntity" + } + }, + "observeLogMessages": { + "type": "object", + "minProperties": 1, + "additionalProperties": false, + "properties": { + "command": { + "$ref": "#/definitions/logSeverityLevel" + }, + "topology": { + "$ref": "#/definitions/logSeverityLevel" + }, + "serverSelection": { + "$ref": "#/definitions/logSeverityLevel" + }, + "connection": { + "$ref": "#/definitions/logSeverityLevel" + } + } + }, + "observeTracingMessages": { + "type": "object", + "additionalProperties": false, + "properties": { + "enableCommandPayload": { + "type": "boolean" + } + } + }, + "serverApi": { + "$ref": "#/definitions/serverApi" + }, + "observeSensitiveCommands": { + "type": "boolean" + }, + "autoEncryptOpts": { + "type": "object", + "additionalProperties": false, + "required": [ + "keyVaultNamespace", + "kmsProviders" + ], + "properties": { + "keyVaultNamespace": { + "type": "string" + }, + "bypassAutoEncryption": { + "type": "boolean" + }, + "kmsProviders": { + "$ref": "#/definitions/kmsProviders" + }, + "schemaMap": { + "type": "object", + "additionalProperties": { + "type": "object" + } + }, + "extraOptions": { + "type": "object" + }, + "encryptedFieldsMap": { + "type": "object", + "additionalProperties": { + "type": "object" + } + }, + "bypassQueryAnalysis": { + "type": "boolean" + }, + "keyExpirationMS": { + "type": "integer" + } + } + }, + "awaitMinPoolSizeMS": { + "type": "integer" + } + } + }, + "clientEncryption": { + "type": "object", + "additionalProperties": false, + "required": [ + "id", + "clientEncryptionOpts" + ], + "properties": { + "id": { + "type": "string" + }, + "clientEncryptionOpts": { + "$ref": "#/definitions/clientEncryptionOpts" + } + } + }, + "database": { + "type": "object", + "additionalProperties": false, + "required": [ + "id", + "client", + "databaseName" + ], + "properties": { + "id": { + "type": "string" + }, + "client": { + "type": "string" + }, + "databaseName": { + "type": "string" + }, + "databaseOptions": { + "$ref": "#/definitions/collectionOrDatabaseOptions" + } + } + }, + "collection": { + "type": "object", + "additionalProperties": false, + "required": [ + "id", + "database", + "collectionName" + ], + "properties": { + "id": { + "type": "string" + }, + "database": { + "type": "string" + }, + "collectionName": { + "type": "string" + }, + "collectionOptions": { + "$ref": "#/definitions/collectionOrDatabaseOptions" + } + } + }, + "session": { + "type": "object", + "additionalProperties": false, + "required": [ + "id", + "client" + ], + "properties": { + "id": { + "type": "string" + }, + "client": { + "type": "string" + }, + "sessionOptions": { + "type": "object" + } + } + }, + "bucket": { + "type": "object", + "additionalProperties": false, + "required": [ + "id", + "database" + ], + "properties": { + "id": { + "type": "string" + }, + "database": { + "type": "string" + }, + "bucketOptions": { + "type": "object" + } + } + }, + "thread": { + "type": "object", + "additionalProperties": false, + "required": [ + "id" + ], + "properties": { + "id": { + "type": "string" + } + } + } + } + }, + "logComponent": { + "type": "string", + "enum": [ + "command", + "topology", + "serverSelection", + "connection" + ] + }, + "spanComponent": { + "type": "object", + "additionalProperties": false, + "required": [ + "name", + "attributes" + ], + "properties": { + "name": { + "type": "string" + }, + "attributes": { + "type": "object" + }, + "nested": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/definitions/spanComponent" + } + } + } + }, + "logSeverityLevel": { + "type": "string", + "enum": [ + "emergency", + "alert", + "critical", + "error", + "warning", + "notice", + "info", + "debug", + "trace" + ] + }, + "clientEncryptionOpts": { + "type": "object", + "additionalProperties": false, + "required": [ + "keyVaultClient", + "keyVaultNamespace", + "kmsProviders" + ], + "properties": { + "keyVaultClient": { + "type": "string" + }, + "keyVaultNamespace": { + "type": "string" + }, + "kmsProviders": { + "$ref": "#/definitions/kmsProviders" + }, + "keyExpirationMS": { + "type": "integer" + } + } + }, + "kmsProviders": { + "$defs": { + "stringOrPlaceholder": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "object", + "additionalProperties": false, + "required": [ + "$$placeholder" + ], + "properties": { + "$$placeholder": {} + } + } + ] + } + }, + "type": "object", + "additionalProperties": false, + "patternProperties": { + "^aws(:[a-zA-Z0-9_]+)?$": { + "type": "object", + "additionalProperties": false, + "properties": { + "accessKeyId": { + "$ref": "#/definitions/kmsProviders/$defs/stringOrPlaceholder" + }, + "secretAccessKey": { + "$ref": "#/definitions/kmsProviders/$defs/stringOrPlaceholder" + }, + "sessionToken": { + "$ref": "#/definitions/kmsProviders/$defs/stringOrPlaceholder" + } + } + }, + "^azure(:[a-zA-Z0-9_]+)?$": { + "type": "object", + "additionalProperties": false, + "properties": { + "tenantId": { + "$ref": "#/definitions/kmsProviders/$defs/stringOrPlaceholder" + }, + "clientId": { + "$ref": "#/definitions/kmsProviders/$defs/stringOrPlaceholder" + }, + "clientSecret": { + "$ref": "#/definitions/kmsProviders/$defs/stringOrPlaceholder" + }, + "identityPlatformEndpoint": { + "$ref": "#/definitions/kmsProviders/$defs/stringOrPlaceholder" + } + } + }, + "^gcp(:[a-zA-Z0-9_]+)?$": { + "type": "object", + "additionalProperties": false, + "properties": { + "email": { + "$ref": "#/definitions/kmsProviders/$defs/stringOrPlaceholder" + }, + "privateKey": { + "$ref": "#/definitions/kmsProviders/$defs/stringOrPlaceholder" + }, + "endpoint": { + "$ref": "#/definitions/kmsProviders/$defs/stringOrPlaceholder" + } + } + }, + "^kmip(:[a-zA-Z0-9_]+)?$": { + "type": "object", + "additionalProperties": false, + "properties": { + "endpoint": { + "$ref": "#/definitions/kmsProviders/$defs/stringOrPlaceholder" + } + } + }, + "^local(:[a-zA-Z0-9_]+)?$": { + "type": "object", + "additionalProperties": false, + "properties": { + "key": { + "$ref": "#/definitions/kmsProviders/$defs/stringOrPlaceholder" + } + } + } + } + }, + "storeEventsAsEntity": { + "deprecated": true, + "type": "object", + "additionalProperties": false, + "required": [ + "id", + "events" + ], + "properties": { + "id": { + "type": "string" + }, + "events": { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "enum": [ + "PoolCreatedEvent", + "PoolReadyEvent", + "PoolClearedEvent", + "PoolClosedEvent", + "ConnectionCreatedEvent", + "ConnectionReadyEvent", + "ConnectionClosedEvent", + "ConnectionCheckOutStartedEvent", + "ConnectionCheckOutFailedEvent", + "ConnectionCheckedOutEvent", + "ConnectionCheckedInEvent", + "CommandStartedEvent", + "CommandSucceededEvent", + "CommandFailedEvent", + "ServerDescriptionChangedEvent", + "TopologyDescriptionChangedEvent" + ] + } + } + } + }, + "collectionData": { + "type": "object", + "additionalProperties": false, + "required": [ + "collectionName", + "databaseName", + "documents" + ], + "properties": { + "collectionName": { + "type": "string" + }, + "databaseName": { + "type": "string" + }, + "createOptions": { + "type": "object", + "properties": { + "writeConcern": false + } + }, + "documents": { + "type": "array", + "items": { + "type": "object" + } + } + } + }, + "expectedEventsForClient": { + "type": "object", + "additionalProperties": false, + "required": [ + "client", + "events" + ], + "properties": { + "client": { + "type": "string" + }, + "eventType": { + "type": "string", + "enum": [ + "command", + "cmap", + "sdam" + ] + }, + "events": { + "type": "array" + }, + "ignoreExtraEvents": { + "type": "boolean" + } + }, + "oneOf": [ + { + "required": [ + "eventType" + ], + "properties": { + "eventType": { + "const": "command" + }, + "events": { + "type": "array", + "items": { + "$ref": "#/definitions/expectedCommandEvent" + } + } + } + }, + { + "required": [ + "eventType" + ], + "properties": { + "eventType": { + "const": "cmap" + }, + "events": { + "type": "array", + "items": { + "$ref": "#/definitions/expectedCmapEvent" + } + } + } + }, + { + "required": [ + "eventType" + ], + "properties": { + "eventType": { + "const": "sdam" + }, + "events": { + "type": "array", + "items": { + "$ref": "#/definitions/expectedSdamEvent" + } + } + } + }, + { + "additionalProperties": false, + "properties": { + "client": { + "type": "string" + }, + "events": { + "type": "array", + "items": { + "$ref": "#/definitions/expectedCommandEvent" + } + }, + "ignoreExtraEvents": { + "type": "boolean" + } + } + } + ] + }, + "expectedCommandEvent": { + "type": "object", + "additionalProperties": false, + "maxProperties": 1, + "minProperties": 1, + "properties": { + "commandStartedEvent": { + "type": "object", + "additionalProperties": false, + "properties": { + "command": { + "type": "object" + }, + "commandName": { + "type": "string" + }, + "databaseName": { + "type": "string" + }, + "hasServiceId": { + "type": "boolean" + }, + "hasServerConnectionId": { + "type": "boolean" + } + } + }, + "commandSucceededEvent": { + "type": "object", + "additionalProperties": false, + "properties": { + "reply": { + "type": "object" + }, + "commandName": { + "type": "string" + }, + "databaseName": { + "type": "string" + }, + "hasServiceId": { + "type": "boolean" + }, + "hasServerConnectionId": { + "type": "boolean" + } + } + }, + "commandFailedEvent": { + "type": "object", + "additionalProperties": false, + "properties": { + "commandName": { + "type": "string" + }, + "databaseName": { + "type": "string" + }, + "hasServiceId": { + "type": "boolean" + }, + "hasServerConnectionId": { + "type": "boolean" + } + } + } + } + }, + "expectedCmapEvent": { + "type": "object", + "additionalProperties": false, + "maxProperties": 1, + "minProperties": 1, + "properties": { + "poolCreatedEvent": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "poolReadyEvent": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "poolClearedEvent": { + "type": "object", + "additionalProperties": false, + "properties": { + "hasServiceId": { + "type": "boolean" + }, + "interruptInUseConnections": { + "type": "boolean" + } + } + }, + "poolClosedEvent": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "connectionCreatedEvent": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "connectionReadyEvent": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "connectionClosedEvent": { + "type": "object", + "additionalProperties": false, + "properties": { + "reason": { + "type": "string" + } + } + }, + "connectionCheckOutStartedEvent": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "connectionCheckOutFailedEvent": { + "type": "object", + "additionalProperties": false, + "properties": { + "reason": { + "type": "string" + } + } + }, + "connectionCheckedOutEvent": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "connectionCheckedInEvent": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "connectionPendingResponseStarted": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "connectionPendingResponseSucceeded": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "connectionPendingResponseFailed": { + "type": "object", + "additionalProperties": false, + "properties": { + "reason": { + "type": "string" + } + } + } + } + }, + "expectedSdamEvent": { + "type": "object", + "additionalProperties": false, + "maxProperties": 1, + "minProperties": 1, + "properties": { + "serverDescriptionChangedEvent": { + "type": "object", + "additionalProperties": false, + "properties": { + "previousDescription": { + "$ref": "#/definitions/serverDescription" + }, + "newDescription": { + "$ref": "#/definitions/serverDescription" + } + } + }, + "topologyDescriptionChangedEvent": { + "type": "object", + "additionalProperties": false, + "properties": { + "previousDescription": { + "$ref": "#/definitions/topologyDescription" + }, + "newDescription": { + "$ref": "#/definitions/topologyDescription" + } + } + }, + "serverHeartbeatStartedEvent": { + "type": "object", + "additionalProperties": false, + "properties": { + "awaited": { + "type": "boolean" + } + } + }, + "serverHeartbeatSucceededEvent": { + "type": "object", + "additionalProperties": false, + "properties": { + "awaited": { + "type": "boolean" + } + } + }, + "serverHeartbeatFailedEvent": { + "type": "object", + "additionalProperties": false, + "properties": { + "awaited": { + "type": "boolean" + } + } + }, + "topologyOpeningEvent": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "topologyClosedEvent": { + "type": "object", + "additionalProperties": false, + "properties": {} + } + } + }, + "serverDescription": { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { + "type": "string", + "enum": [ + "Standalone", + "Mongos", + "PossiblePrimary", + "RSPrimary", + "RSSecondary", + "RSOther", + "RSArbiter", + "RSGhost", + "LoadBalancer", + "Unknown" + ] + } + } + }, + "topologyDescription": { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { + "type": "string", + "enum": [ + "Single", + "Unknown", + "ReplicaSetNoPrimary", + "ReplicaSetWithPrimary", + "Sharded", + "LoadBalanced" + ] + } + } + }, + "expectedLogMessagesForClient": { + "type": "object", + "additionalProperties": false, + "required": [ + "client", + "messages" + ], + "properties": { + "client": { + "type": "string" + }, + "messages": { + "type": "array", + "items": { + "$ref": "#/definitions/expectedLogMessage" + } + }, + "ignoreExtraMessages": { + "type": "boolean" + }, + "ignoreMessages": { + "type": "array", + "items": { + "$ref": "#/definitions/expectedLogMessage" + } + } + } + }, + "expectTracingMessagesForClient": { + "additionalProperties": false, + "type": "object", + "required": [ + "client", + "spans" + ], + "properties": { + "client": { + "type": "string" + }, + "ignoreExtraSpans": { + "type": "boolean" + }, + "spans": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/definitions/spanComponent" + } + } + } + }, + "expectedLogMessage": { + "type": "object", + "additionalProperties": false, + "required": [ + "level", + "component", + "data" + ], + "properties": { + "level": { + "$ref": "#/definitions/logSeverityLevel" + }, + "component": { + "$ref": "#/definitions/logComponent" + }, + "data": { + "type": "object" + }, + "failureIsRedacted": { + "type": "boolean" + } + } + }, + "collectionOrDatabaseOptions": { + "type": "object", + "additionalProperties": false, + "properties": { + "readConcern": { + "type": "object" + }, + "readPreference": { + "type": "object" + }, + "writeConcern": { + "type": "object" + }, + "timeoutMS": { + "type": "integer" + } + } + }, + "serverApi": { + "type": "object", + "additionalProperties": false, + "required": [ + "version" + ], + "properties": { + "version": { + "type": "string" + }, + "strict": { + "type": "boolean" + }, + "deprecationErrors": { + "type": "boolean" + } + } + }, + "operation": { + "type": "object", + "additionalProperties": false, + "required": [ + "name", + "object" + ], + "properties": { + "name": { + "type": "string" + }, + "object": { + "type": "string" + }, + "arguments": { + "type": "object" + }, + "ignoreResultAndError": { + "type": "boolean" + }, + "expectError": { + "$ref": "#/definitions/expectedError" + }, + "expectResult": {}, + "saveResultAsEntity": { + "type": "string" + } + }, + "allOf": [ + { + "not": { + "required": [ + "expectError", + "expectResult" + ] + } + }, + { + "not": { + "required": [ + "expectError", + "saveResultAsEntity" + ] + } + }, + { + "not": { + "required": [ + "ignoreResultAndError", + "expectResult" + ] + } + }, + { + "not": { + "required": [ + "ignoreResultAndError", + "expectError" + ] + } + }, + { + "not": { + "required": [ + "ignoreResultAndError", + "saveResultAsEntity" + ] + } + } + ] + }, + "expectedError": { + "type": "object", + "additionalProperties": false, + "minProperties": 1, + "properties": { + "isError": { + "type": "boolean", + "const": true + }, + "isClientError": { + "type": "boolean" + }, + "isTimeoutError": { + "type": "boolean" + }, + "errorContains": { + "type": "string" + }, + "errorCode": { + "type": "integer" + }, + "errorCodeName": { + "type": "string" + }, + "errorLabelsContain": { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + } + }, + "errorLabelsOmit": { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + } + }, + "writeErrors": { + "type": "object" + }, + "writeConcernErrors": { + "type": "array", + "items": { + "type": "object" + } + }, + "errorResponse": { + "type": "object" + }, + "expectResult": {} + } + }, + "test": { + "type": "object", + "additionalProperties": false, + "required": [ + "description", + "operations" + ], + "properties": { + "description": { + "type": "string" + }, + "runOnRequirements": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/definitions/runOnRequirement" + } + }, + "skipReason": { + "type": "string" + }, + "operations": { + "type": "array", + "items": { + "$ref": "#/definitions/operation" + } + }, + "expectEvents": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/definitions/expectedEventsForClient" + } + }, + "expectLogMessages": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/definitions/expectedLogMessagesForClient" + } + }, + "expectTracingMessages": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/definitions/expectTracingMessagesForClient" + } + }, + "outcome": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/definitions/collectionData" + } + } + } + } + } +} diff --git a/source/unified-test-format/schema-latest.json b/source/unified-test-format/schema-latest.json index d195ba5af0..c678104db8 100644 --- a/source/unified-test-format/schema-latest.json +++ b/source/unified-test-format/schema-latest.json @@ -157,6 +157,9 @@ "connectionCheckOutFailedEvent", "connectionCheckedOutEvent", "connectionCheckedInEvent", + "connectionPendingResponseSucceeded", + "connectionPendingResponseStarted", + "connectionPendingResponseFailed", "serverDescriptionChangedEvent", "topologyDescriptionChangedEvent", "topologyOpeningEvent", @@ -837,6 +840,25 @@ "type": "object", "additionalProperties": false, "properties": {} + }, + "connectionPendingResponseStarted": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "connectionPendingResponseSucceeded": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "connectionPendingResponseFailed": { + "type": "object", + "additionalProperties": false, + "properties": { + "reason": { + "type": "string" + } + } } } }, diff --git a/source/unified-test-format/unified-test-format.md b/source/unified-test-format/unified-test-format.md index 59700ff510..0032aa9df3 100644 --- a/source/unified-test-format/unified-test-format.md +++ b/source/unified-test-format/unified-test-format.md @@ -2,7 +2,7 @@ - Status: Accepted - Minimum Server Version: N/A -- Current Schema Version: 1.27.0 +- Current Schema Version: 1.28.0 ______________________________________________________________________ @@ -486,6 +486,9 @@ The structure of this object is as follows: - [connectionCheckOutFailedEvent](#expectedEvent_connectionCheckOutFailedEvent) - [connectionCheckedOutEvent](#expectedEvent_connectionCheckedOutEvent) - [connectionCheckedInEvent](#expectedEvent_connectionCheckedInEvent) + - [connectionPendingResponseStarted](#expectedEvent_connectionPendingResponseStarted) + - [connectionPendingResponseSucceeded](#expectedEvent_connectionPendingResponseSucceeded) + - [connectionPendingResponseFailed](#expectedEvent_connectionPendingResponseFailed) - [serverDescriptionChangedEvent](#expectedEvent_serverDescriptionChangedEvent) - [serverHeartbeatStartedEvent](#expectedEvent_serverHeartbeatStartedEvent) - [serverHeartbeatSucceededEvent](#expectedEvent_serverHeartbeatSucceededEvent) @@ -1133,6 +1136,27 @@ The structure of this object is as follows: - `connectionCheckedInEvent`: Optional object. If present, this object MUST be an empty document as all fields in this event are non-deterministic. + + +- `connectionPendingResponseStarted`: Optional object. If present, this object MUST be an empty document as all fields + in this event are non-deterministic. + + + +- `connectionPendingResponseSucceeded`: Optional object. If present, this object MUST be an empty document as all fields + in this event are non-deterministic. + + + +- `connectionPendingResponseFailed`: Optional object. Assertions for one or more + [connectionPendingResponseFailed](../connection-monitoring-and-pooling/connection-monitoring-and-pooling.md#events) + fields. + + The structure of this object is as follows: + + - `reason`: Optional string. Test runners MUST assert that the reason in the published event matches this value. Valid + values for this field are defined in the CMAP spec. + ##### expectedSdamEvent The structure of this object is as follows: @@ -3459,6 +3483,11 @@ other specs *and* collating spec changes developed in parallel or during the sam ## Changelog +- 2025-09-25: **Schema version 1.28**. + + Add draining of pending response events to `expectEvents` and `observeEvents` for client entities. These include + `PendingResponseStartedEvent`, `PendingResponseSucceededEvent`, and `PendingResponseFailedEvent`. + - 2025-09-17: **Schema version 1.27.** Add `observeTracingMessages` configuration for clients and `expectTracingMessages` for test expectations. This allows From 3e1f8e8f827de1d0340d3af27b2cc610edd62d0a Mon Sep 17 00:00:00 2001 From: Oleksandr Poliakov Date: Thu, 25 Sep 2025 17:39:07 -0700 Subject: [PATCH 02/11] pr --- .../connection-monitoring-and-pooling.md | 57 +++++++++---------- 1 file changed, 27 insertions(+), 30 deletions(-) diff --git a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md index c631275494..04dd3ea95a 100644 --- a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md +++ b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md @@ -1095,8 +1095,7 @@ interface ConnectionCheckedInEvent { } /** - * Emitted when the connection being checked out is attempting to read and - * discard a pending server response. + * Emitted when the connection is attempting to drain of the pending response. */ interface PendingResponseStarted { /** @@ -1110,14 +1109,13 @@ interface PendingResponseStarted { connectionId: int64; /** - * The driver-generated request ID associated with the network timeout. + * The driver-generated request ID of the operation that caused the pending response state. */ requestId: int64; } /** - * Emitted when the connection successfully read the pending read and is ready - * to be checked out. + * Emitted when the connection successfully drained of the pending response. */ interface PendingResponseSucceeded { /** @@ -1131,7 +1129,7 @@ interface PendingResponseSucceeded { connectionId: int64; /** - * The time it took to complete the pending read. Measured from when the + * The time it took to complete the pending response drainig. Measured from when the * `PendingResponseStarted` event is emitted as part of the connection * checkout. * @@ -1142,14 +1140,13 @@ interface PendingResponseSucceeded { duration: Duration; /** - * The driver-generated request ID associated with the network timeout. + * The driver-generated request ID of the operation that caused the pending response state. */ requestId: int64; } /** - * Emitted when the connection being checked out failed to complete the pending - * read. + * Emitted when the connection failed to drain of the pending response. */ interface PendingResponseFailed { /** @@ -1163,13 +1160,13 @@ interface PendingResponseFailed { connectionId: int64; /** - * The driver-generated request ID associated with the network timeout. + * The driver-generated request ID of the operation that caused the pending response state. */ requestId: int64; /** - * Duration until the pending response drain failed. Measured from when the - * `PendingResponseStarted` event is emitted as part of the same connection + * Duration until the pending response draining failed. Measured from when the + * `PendingResponseStarted` event is emitted as part of the connection * checkout. * * Drivers SHOULD choose an idiomatic duration type. If the type doesn't @@ -1384,11 +1381,11 @@ placeholders as appropriate: In addition to the common fields defined above, this message MUST contain the following key-value pairs: -| Key | Suggested Type | Value | -| ------------------ | -------------- | ------------------------------------------------------------------- | -| message | string | "Pending response started" | -| driverConnectionId | int64 | The driver-generated ID for the connection | -| requestId | int64 | The driver-generated request ID associated with the network timeout | +| Key | Suggested Type | Value | +| ------------------ | -------------- | ---------------------------------------------------------------------------------------- | +| message | string | "Pending response started" | +| driverConnectionId | int64 | The driver-generated ID for the connection | +| requestId | int64 | The driver-generated request ID of the operation that caused the pending response state. | The unstructured form SHOULD be as follows, using the values defined in the structured format above to fill in placeholders as appropriate: @@ -1400,12 +1397,12 @@ placeholders as appropriate: In addition to the common fields defined above, this message MUST contain the following key-value pairs: -| Key | Suggested Type | Value | -| ------------------ | ------------------ | -------------------------------------------------------------------- | -| message | string | "Pending response succeeded" | -| driverConnectionId | int64 | The driver-generated ID for the connection. | -| requestId | int64 | The driver-generated request ID associated with the network timeout. | -| durationMS | Int32/Int64/Double | `PendingResponseSucceeded.duration` converted to milliseconds. | +| Key | Suggested Type | Value | +| ------------------ | ------------------ | ---------------------------------------------------------------------------------------- | +| message | string | "Pending response succeeded" | +| driverConnectionId | int64 | The driver-generated ID for the connection. | +| requestId | int64 | The driver-generated request ID of the operation that caused the pending response state. | +| durationMS | Int32/Int64/Double | `PendingResponseSucceeded.duration` converted to milliseconds. | The unstructured form SHOULD be as follows, using the values defined in the structured format above to fill in placeholders as appropriate: @@ -1417,13 +1414,13 @@ placeholders as appropriate: In addition to the common fields defined above, this message MUST contain the following key-value pairs: -| Key | Suggested Type | Value | -| ------------------ | ------------------ | ------------------------------------------------------------------- | -| message | string | "Pending response failed" | -| driverConnectionId | int64 | The driver-generated ID for the connection | -| requestId | int64 | The driver-generated request ID associated with the network timeout | -| reason | string | The reason for why the pending response read failed | -| durationMS | Int32/Int64/Double | `PendingResponseFailed.duration` converted to milliseconds. | +| Key | Suggested Type | Value | +| ------------------ | ------------------ | ---------------------------------------------------------------------------------------- | +| message | string | "Pending response failed" | +| driverConnectionId | int64 | The driver-generated ID for the connection | +| requestId | int64 | The driver-generated request ID of the operation that caused the pending response state. | +| reason | string | The reason for why the pending response read failed | +| durationMS | Int32/Int64/Double | `PendingResponseFailed.duration` converted to milliseconds. | The unstructured form SHOULD be as follows, using the values defined in the structured format above to fill in placeholders as appropriate: From 250c1231993e415084954189cfc41e3c78d83bd4 Mon Sep 17 00:00:00 2001 From: Oleksandr Poliakov Date: Thu, 25 Sep 2025 20:09:39 -0700 Subject: [PATCH 03/11] Fix unified spec tests --- .../tests/pending-response.json | 2 +- .../tests/pending-response.yml | 2 +- .../tests/logging/connection-logging-csot.json | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/source/client-side-operations-timeout/tests/pending-response.json b/source/client-side-operations-timeout/tests/pending-response.json index 278368da57..8a015344f3 100644 --- a/source/client-side-operations-timeout/tests/pending-response.json +++ b/source/client-side-operations-timeout/tests/pending-response.json @@ -1,6 +1,6 @@ { "description": "Connection churn is prevented by reading pending responses during connection checkout", - "schemaVersion": "1.27", + "schemaVersion": "1.28", "runOnRequirements": [ { "minServerVersion": "4.4", diff --git a/source/client-side-operations-timeout/tests/pending-response.yml b/source/client-side-operations-timeout/tests/pending-response.yml index 4cc2f613fb..b8e8679fa9 100644 --- a/source/client-side-operations-timeout/tests/pending-response.yml +++ b/source/client-side-operations-timeout/tests/pending-response.yml @@ -1,5 +1,5 @@ description: "Connection churn is prevented by reading pending responses during connection checkout" -schemaVersion: "1.27" +schemaVersion: "1.28" runOnRequirements: - minServerVersion: "4.4" # TODO(SERVER-96344): When using failpoints, mongos returns MaxTimeMSExpired diff --git a/source/connection-monitoring-and-pooling/tests/logging/connection-logging-csot.json b/source/connection-monitoring-and-pooling/tests/logging/connection-logging-csot.json index 7992911e85..f39aaf9b76 100644 --- a/source/connection-monitoring-and-pooling/tests/logging/connection-logging-csot.json +++ b/source/connection-monitoring-and-pooling/tests/logging/connection-logging-csot.json @@ -80,7 +80,7 @@ "insert" ], "blockConnection": true, - "blockTimeMS": 200 + "blockTimeMS": 300 } } } @@ -106,7 +106,7 @@ "filter": { "_id": 1 }, - "timeoutMS": 50 + "timeoutMS": 75 }, "expectError": { "isTimeoutError": true @@ -116,7 +116,7 @@ "name": "wait", "object": "testRunner", "arguments": { - "ms": 150 + "ms": 200 } }, { From 1fcc0997b1296979c81aaae4536d276209360e6f Mon Sep 17 00:00:00 2001 From: Oleksandr Poliakov <31327136+sanych-sun@users.noreply.github.com> Date: Fri, 3 Oct 2025 12:29:48 -0700 Subject: [PATCH 04/11] Update source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md Co-authored-by: Bailey Pearson --- .../connection-monitoring-and-pooling.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md index 04dd3ea95a..82b87447e6 100644 --- a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md +++ b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md @@ -602,8 +602,7 @@ response data is drained and the connection is ready to send the next command. of (a) the remaining time before the 3 second "pending response" window elapses and (b) the remaining of user-provided timeout as the effective timeout for the draining. 4. **Default timeout**: If no user-provided timeout is specified, the driver MUST use the minimum of (a) the remaining 3 - second "pending response" window and (b) the `waitQueueTimeoutMS` (if not supported by the driver remaining of - `serverSelectionTimeoutMS` should be used instead) as the effective timeout for the draining. + second "pending response" window and (b) remaining timeout for connection checkout. 5. **On Timeout**: if timeout occurred while draining the pending response and the 3 seconds pending-response window was not elapsed, Driver SHOULD return the connection to the pool, so the next operation can continue draining of the pending response. From d97a995944d4147c083e991f316f9ba635c9d3ba Mon Sep 17 00:00:00 2001 From: Oleksandr Poliakov <31327136+sanych-sun@users.noreply.github.com> Date: Fri, 3 Oct 2025 12:33:01 -0700 Subject: [PATCH 05/11] Update source/client-side-operations-timeout/tests/pending-response.yml Co-authored-by: Bailey Pearson --- .../client-side-operations-timeout/tests/pending-response.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/client-side-operations-timeout/tests/pending-response.yml b/source/client-side-operations-timeout/tests/pending-response.yml index b8e8679fa9..a649e2db19 100644 --- a/source/client-side-operations-timeout/tests/pending-response.yml +++ b/source/client-side-operations-timeout/tests/pending-response.yml @@ -294,7 +294,7 @@ tests: - connectionCheckedOutEvent: {} - connectionCheckedInEvent: {} # Find finishes. # If the connection is closed server-side while draining the response, the - # driver must retry with a different connection. + # driver must retry with a different connection (write operation) - description: "write op retries when connection closes server-side while draining response" operations: # Create a failpoint to block the first and second operation. From f442ccec4b6a439f4a5892e959a248408d5f0472 Mon Sep 17 00:00:00 2001 From: Oleksandr Poliakov Date: Mon, 6 Oct 2025 14:26:56 -0700 Subject: [PATCH 06/11] PR --- .../tests/pending-response.json | 104 +++++++++++++++--- .../tests/pending-response.yml | 70 ++++++++---- .../connection-monitoring-and-pooling.md | 19 +++- source/unified-test-format/schema-1.28.json | 12 +- source/unified-test-format/schema-latest.json | 12 +- .../unified-test-format.md | 24 ++-- 6 files changed, 174 insertions(+), 67 deletions(-) diff --git a/source/client-side-operations-timeout/tests/pending-response.json b/source/client-side-operations-timeout/tests/pending-response.json index 8a015344f3..7c6c969ffb 100644 --- a/source/client-side-operations-timeout/tests/pending-response.json +++ b/source/client-side-operations-timeout/tests/pending-response.json @@ -29,12 +29,16 @@ "observeEvents": [ "commandFailedEvent", "commandSucceededEvent", + "connectionCheckOutStartedEvent", "connectionCheckedOutEvent", + "ConnectionCheckOutFailedEvent", "connectionCheckedInEvent", + "connectionCreatedEvent", + "connectionReadyEvent", "connectionClosedEvent", - "connectionPendingResponseSucceeded", - "connectionPendingResponseStarted", - "connectionPendingResponseFailed" + "connectionPendingResponseSucceededEvent", + "connectionPendingResponseStartedEvent", + "connectionPendingResponseFailedEvent" ] } }, @@ -128,6 +132,9 @@ "client": "client", "eventType": "cmap", "events": [ + { + "connectionCheckOutStartedEvent": {} + }, { "connectionCheckedOutEvent": {} }, @@ -135,10 +142,13 @@ "connectionCheckedInEvent": {} }, { - "connectionPendingResponseStarted": {} + "connectionCheckOutStartedEvent": {} + }, + { + "connectionPendingResponseStartedEvent": {} }, { - "connectionPendingResponseSucceeded": {} + "connectionPendingResponseSucceededEvent": {} }, { "connectionCheckedOutEvent": {} @@ -217,6 +227,9 @@ "client": "client", "eventType": "cmap", "events": [ + { + "connectionCheckOutStartedEvent": {} + }, { "connectionCheckedOutEvent": {} }, @@ -224,10 +237,13 @@ "connectionCheckedInEvent": {} }, { - "connectionPendingResponseStarted": {} + "connectionCheckOutStartedEvent": {} }, { - "connectionPendingResponseFailed": { + "connectionPendingResponseStartedEvent": {} + }, + { + "connectionPendingResponseFailedEvent": { "reason": "timeout" } }, @@ -236,6 +252,18 @@ "reason": "error" } }, + { + "connectionCheckOutFailedEvent": {} + }, + { + "connectionCheckOutStartedEvent": {} + }, + { + "connectionCreatedEvent": {} + }, + { + "connectionReadyEvent": {} + }, { "connectionCheckedOutEvent": {} }, @@ -314,6 +342,9 @@ "client": "client", "eventType": "cmap", "events": [ + { + "connectionCheckOutStartedEvent": {} + }, { "connectionCheckedOutEvent": {} }, @@ -321,10 +352,13 @@ "connectionCheckedInEvent": {} }, { - "connectionPendingResponseStarted": {} + "connectionCheckOutStartedEvent": {} + }, + { + "connectionPendingResponseStartedEvent": {} }, { - "connectionPendingResponseSucceeded": {} + "connectionPendingResponseSucceededEvent": {} }, { "connectionCheckedOutEvent": {} @@ -416,6 +450,9 @@ "client": "client", "eventType": "cmap", "events": [ + { + "connectionCheckOutStartedEvent": {} + }, { "connectionCheckedOutEvent": {} }, @@ -423,18 +460,27 @@ "connectionCheckedInEvent": {} }, { - "connectionPendingResponseStarted": {} + "connectionCheckOutStartedEvent": {} }, { - "connectionPendingResponseFailed": { + "connectionPendingResponseStartedEvent": {} + }, + { + "connectionPendingResponseFailedEvent": { "reason": "timeout" } }, { - "connectionPendingResponseStarted": {} + "connectionCheckOutFailedEvent": {} + }, + { + "connectionCheckOutStartedEvent": {} + }, + { + "connectionPendingResponseStartedEvent": {} }, { - "connectionPendingResponseSucceeded": {} + "connectionPendingResponseSucceededEvent": {} }, { "connectionCheckedOutEvent": {} @@ -522,6 +568,9 @@ "client": "client", "eventType": "cmap", "events": [ + { + "connectionCheckOutStartedEvent": {} + }, { "connectionCheckedOutEvent": {} }, @@ -529,10 +578,13 @@ "connectionCheckedInEvent": {} }, { - "connectionPendingResponseStarted": {} + "connectionCheckOutStartedEvent": {} }, { - "connectionPendingResponseFailed": { + "connectionPendingResponseStartedEvent": {} + }, + { + "connectionPendingResponseFailedEvent": { "reason": "error" } }, @@ -541,6 +593,12 @@ "reason": "error" } }, + { + "connectionCreatedEvent": {} + }, + { + "connectionReadyEvent": {} + }, { "connectionCheckedOutEvent": {} }, @@ -628,6 +686,9 @@ "client": "client", "eventType": "cmap", "events": [ + { + "connectionCheckOutStartedEvent": {} + }, { "connectionCheckedOutEvent": {} }, @@ -635,10 +696,13 @@ "connectionCheckedInEvent": {} }, { - "connectionPendingResponseStarted": {} + "connectionCheckOutStartedEvent": {} }, { - "connectionPendingResponseFailed": { + "connectionPendingResponseStartedEvent": {} + }, + { + "connectionPendingResponseFailedEvent": { "reason": "error" } }, @@ -647,6 +711,12 @@ "reason": "error" } }, + { + "connectionCreatedEvent": {} + }, + { + "connectionReadyEvent": {} + }, { "connectionCheckedOutEvent": {} }, diff --git a/source/client-side-operations-timeout/tests/pending-response.yml b/source/client-side-operations-timeout/tests/pending-response.yml index a649e2db19..cfd3a70eb3 100644 --- a/source/client-side-operations-timeout/tests/pending-response.yml +++ b/source/client-side-operations-timeout/tests/pending-response.yml @@ -21,12 +21,16 @@ createEntities: observeEvents: - commandFailedEvent - commandSucceededEvent + - connectionCheckOutStartedEvent - connectionCheckedOutEvent + - ConnectionCheckOutFailedEvent - connectionCheckedInEvent + - connectionCreatedEvent + - connectionReadyEvent - connectionClosedEvent - - connectionPendingResponseSucceeded - - connectionPendingResponseStarted - - connectionPendingResponseFailed + - connectionPendingResponseSucceededEvent + - connectionPendingResponseStartedEvent + - connectionPendingResponseFailedEvent - database: id: &database test client: *client @@ -80,10 +84,12 @@ tests: - client: *client eventType: cmap events: + - connectionCheckOutStartedEvent: {} - connectionCheckedOutEvent: {} - connectionCheckedInEvent: {} # Insert fails. - - connectionPendingResponseStarted: {} - - connectionPendingResponseSucceeded: {} # Find operation drains connection. + - connectionCheckOutStartedEvent: {} + - connectionPendingResponseStartedEvent: {} + - connectionPendingResponseSucceededEvent: {} # Find operation drains connection. - connectionCheckedOutEvent: {} - connectionCheckedInEvent: {} # Find succeeds. # Attempting a pending response read on a non-timeout operation that gets no @@ -127,14 +133,20 @@ tests: - client: *client eventType: cmap events: + - connectionCheckOutStartedEvent: {} - connectionCheckedOutEvent: {} - connectionCheckedInEvent: {} # Insert fails. - - connectionPendingResponseStarted: {} - - connectionPendingResponseFailed: + - connectionCheckOutStartedEvent: {} + - connectionPendingResponseStartedEvent: {} + - connectionPendingResponseFailedEvent: reason: timeout - connectionClosedEvent: reason: error - - connectionCheckedOutEvent: {} # Find's retry starts and succeeds + - connectionCheckOutFailedEvent: {} + - connectionCheckOutStartedEvent: {} # Find's retry starts and succeeds + - connectionCreatedEvent: {} + - connectionReadyEvent: {} + - connectionCheckedOutEvent: {} - connectionCheckedInEvent: {} # Attempting a pending response read on a realistic timeout operation that can # immediately read from the TCP buffer should complete the pending read and @@ -177,10 +189,12 @@ tests: - client: *client eventType: cmap events: + - connectionCheckOutStartedEvent: {} - connectionCheckedOutEvent: {} - connectionCheckedInEvent: {} # Insert fails. - - connectionPendingResponseStarted: {} - - connectionPendingResponseSucceeded: {} + - connectionCheckOutStartedEvent: {} + - connectionPendingResponseStartedEvent: {} + - connectionPendingResponseSucceededEvent: {} - connectionCheckedOutEvent: {} - connectionCheckedInEvent: {} # Find succeeds. # It may take multiple calls to the pending response handler to drain the @@ -230,13 +244,17 @@ tests: - client: *client eventType: cmap events: + - connectionCheckOutStartedEvent: {} - connectionCheckedOutEvent: {} - connectionCheckedInEvent: {} # Insert fails. - - connectionPendingResponseStarted: {} # First find fails - - connectionPendingResponseFailed: + - connectionCheckOutStartedEvent: {} + - connectionPendingResponseStartedEvent: {} # First find fails + - connectionPendingResponseFailedEvent: reason: timeout - - connectionPendingResponseStarted: {} # Second find drains the buffer. - - connectionPendingResponseSucceeded: {} + - connectionCheckOutFailedEvent: {} + - connectionCheckOutStartedEvent: {} + - connectionPendingResponseStartedEvent: {} # Second find drains the buffer. + - connectionPendingResponseSucceededEvent: {} - connectionCheckedOutEvent: {} - connectionCheckedInEvent: {} # Second find succeeds. # If the connection is closed server-side while draining the response, the @@ -284,13 +302,17 @@ tests: - client: *client eventType: cmap events: + - connectionCheckOutStartedEvent: {} - connectionCheckedOutEvent: {} - connectionCheckedInEvent: {} # Insert fails. - - connectionPendingResponseStarted: {} # Pending read fails on first find - - connectionPendingResponseFailed: + - connectionCheckOutStartedEvent: {} + - connectionPendingResponseStartedEvent: {} # Pending read fails on first find + - connectionPendingResponseFailedEvent: reason: error - connectionClosedEvent: reason: error + - connectionCreatedEvent: { } + - connectionReadyEvent: { } - connectionCheckedOutEvent: {} - connectionCheckedInEvent: {} # Find finishes. # If the connection is closed server-side while draining the response, the @@ -338,12 +360,16 @@ tests: - client: *client eventType: cmap events: - - connectionCheckedOutEvent: {} - - connectionCheckedInEvent: {} # Insert fails. - - connectionPendingResponseStarted: {} # Pending read fails on first find - - connectionPendingResponseFailed: + - connectionCheckOutStartedEvent: { } + - connectionCheckedOutEvent: { } + - connectionCheckedInEvent: { } # Insert fails. + - connectionCheckOutStartedEvent: { } + - connectionPendingResponseStartedEvent: { } # Pending read fails on first find + - connectionPendingResponseFailedEvent: reason: error - connectionClosedEvent: reason: error - - connectionCheckedOutEvent: {} - - connectionCheckedInEvent: {} # Find finishes. + - connectionCreatedEvent: { } + - connectionReadyEvent: { } + - connectionCheckedOutEvent: { } + - connectionCheckedInEvent: { } # Find finishes. diff --git a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md index 82b87447e6..3e08de65ac 100644 --- a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md +++ b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md @@ -235,7 +235,7 @@ interface Connection { * in this specification. It is not required that drivers * actually include this field in their implementations of Connection. */ - state: "pending" | "available" | "in use" | "closed"; + state: "pending" | "pending response" | "available" | "in use" | "closed"; } ``` @@ -675,17 +675,26 @@ try: close connection connection = Null if connection is in "pending response" state: + tConnectionDrainingStarted = current instant (use a monotonic clock if possible) + emit PendingResponseStartedEvent and equivalent log message drain the pending response if error: + tConnectionDrainingFailed = current instant (use a monotonic clock if possible) if non-timeout error: + emit PendingResponseFailedEvent(reason="error", duration = tConnectionDrainingFailed - tConnectionDrainingStarted) and equivalent log message close connection connection = Null else if timeout error and last read timestamp on connection > 3 seconds old + emit PendingResponseFailedEvent(reason="timeout", duration = tConnectionDrainingFailed - tConnectionDrainingStarted) and equivalent log message close connection connection = Null else + emit PendingResponseFailedEvent(reason="timeout", duration = tConnectionDrainingFailed - tConnectionDrainingStarted) and equivalent log message check in the connection connection = Null + else + tConnectionDrainingSucceeded = current instant (use a monotonic clock if possible) + emit PendingResponseSucceededEvent(duration = tConnectionDrainingSucceeded - tConnectionDrainingStarted) and equivalent log message else if totalConnectionCount < maxPoolSize: if pendingConnectionCount < maxConnecting: connection = create connection @@ -1096,7 +1105,7 @@ interface ConnectionCheckedInEvent { /** * Emitted when the connection is attempting to drain of the pending response. */ -interface PendingResponseStarted { +interface PendingResponseStartedEvent { /** * The ServerAddress of the Endpoint the pool is attempting to connect to. */ @@ -1116,7 +1125,7 @@ interface PendingResponseStarted { /** * Emitted when the connection successfully drained of the pending response. */ -interface PendingResponseSucceeded { +interface PendingResponseSucceededEvent { /** * The ServerAddress of the Endpoint the pool is attempting to connect to. */ @@ -1147,7 +1156,7 @@ interface PendingResponseSucceeded { /** * Emitted when the connection failed to drain of the pending response. */ -interface PendingResponseFailed { +interface PendingResponseFailedEvent { /** * The ServerAddress of the Endpoint the pool is attempting to connect to. */ @@ -1649,6 +1658,8 @@ to close and remove from its pool a [Connection](#connection) which has unread e ## Changelog +- 2025-09-25: Added pending response draining behavior. + - 2025-01-22: Clarify durationMS in logs may be Int32/Int64/Double. - 2024-11-27: Relaxed the WaitQueue fairness requirement. diff --git a/source/unified-test-format/schema-1.28.json b/source/unified-test-format/schema-1.28.json index c678104db8..f3525c1160 100644 --- a/source/unified-test-format/schema-1.28.json +++ b/source/unified-test-format/schema-1.28.json @@ -157,9 +157,9 @@ "connectionCheckOutFailedEvent", "connectionCheckedOutEvent", "connectionCheckedInEvent", - "connectionPendingResponseSucceeded", - "connectionPendingResponseStarted", - "connectionPendingResponseFailed", + "connectionPendingResponseSucceededEvent", + "connectionPendingResponseStartedEvent", + "connectionPendingResponseFailedEvent", "serverDescriptionChangedEvent", "topologyDescriptionChangedEvent", "topologyOpeningEvent", @@ -841,17 +841,17 @@ "additionalProperties": false, "properties": {} }, - "connectionPendingResponseStarted": { + "connectionPendingResponseStartedEvent": { "type": "object", "additionalProperties": false, "properties": {} }, - "connectionPendingResponseSucceeded": { + "connectionPendingResponseSucceededEvent": { "type": "object", "additionalProperties": false, "properties": {} }, - "connectionPendingResponseFailed": { + "connectionPendingResponseFailedEvent": { "type": "object", "additionalProperties": false, "properties": { diff --git a/source/unified-test-format/schema-latest.json b/source/unified-test-format/schema-latest.json index c678104db8..f3525c1160 100644 --- a/source/unified-test-format/schema-latest.json +++ b/source/unified-test-format/schema-latest.json @@ -157,9 +157,9 @@ "connectionCheckOutFailedEvent", "connectionCheckedOutEvent", "connectionCheckedInEvent", - "connectionPendingResponseSucceeded", - "connectionPendingResponseStarted", - "connectionPendingResponseFailed", + "connectionPendingResponseSucceededEvent", + "connectionPendingResponseStartedEvent", + "connectionPendingResponseFailedEvent", "serverDescriptionChangedEvent", "topologyDescriptionChangedEvent", "topologyOpeningEvent", @@ -841,17 +841,17 @@ "additionalProperties": false, "properties": {} }, - "connectionPendingResponseStarted": { + "connectionPendingResponseStartedEvent": { "type": "object", "additionalProperties": false, "properties": {} }, - "connectionPendingResponseSucceeded": { + "connectionPendingResponseSucceededEvent": { "type": "object", "additionalProperties": false, "properties": {} }, - "connectionPendingResponseFailed": { + "connectionPendingResponseFailedEvent": { "type": "object", "additionalProperties": false, "properties": { diff --git a/source/unified-test-format/unified-test-format.md b/source/unified-test-format/unified-test-format.md index 0032aa9df3..6893a4fe47 100644 --- a/source/unified-test-format/unified-test-format.md +++ b/source/unified-test-format/unified-test-format.md @@ -486,9 +486,9 @@ The structure of this object is as follows: - [connectionCheckOutFailedEvent](#expectedEvent_connectionCheckOutFailedEvent) - [connectionCheckedOutEvent](#expectedEvent_connectionCheckedOutEvent) - [connectionCheckedInEvent](#expectedEvent_connectionCheckedInEvent) - - [connectionPendingResponseStarted](#expectedEvent_connectionPendingResponseStarted) - - [connectionPendingResponseSucceeded](#expectedEvent_connectionPendingResponseSucceeded) - - [connectionPendingResponseFailed](#expectedEvent_connectionPendingResponseFailed) + - [connectionPendingResponseStartedEvent](#expectedEvent_connectionPendingResponseStartedEvent) + - [connectionPendingResponseSucceededEvent](#expectedEvent_connectionPendingResponseSucceededEvent) + - [connectionPendingResponseFailedEvent](#expectedEvent_connectionPendingResponseFailedEvent) - [serverDescriptionChangedEvent](#expectedEvent_serverDescriptionChangedEvent) - [serverHeartbeatStartedEvent](#expectedEvent_serverHeartbeatStartedEvent) - [serverHeartbeatSucceededEvent](#expectedEvent_serverHeartbeatSucceededEvent) @@ -1136,20 +1136,20 @@ The structure of this object is as follows: - `connectionCheckedInEvent`: Optional object. If present, this object MUST be an empty document as all fields in this event are non-deterministic. - + -- `connectionPendingResponseStarted`: Optional object. If present, this object MUST be an empty document as all fields - in this event are non-deterministic. +- `connectionPendingResponseStartedEvent`: Optional object. If present, this object MUST be an empty document as all + fields in this event are non-deterministic. - + -- `connectionPendingResponseSucceeded`: Optional object. If present, this object MUST be an empty document as all fields - in this event are non-deterministic. +- `connectionPendingResponseSucceededEvent`: Optional object. If present, this object MUST be an empty document as all + fields in this event are non-deterministic. - + -- `connectionPendingResponseFailed`: Optional object. Assertions for one or more - [connectionPendingResponseFailed](../connection-monitoring-and-pooling/connection-monitoring-and-pooling.md#events) +- `connectionPendingResponseFailedEvent`: Optional object. Assertions for one or more + [connectionPendingResponseFailedEvent](../connection-monitoring-and-pooling/connection-monitoring-and-pooling.md#events) fields. The structure of this object is as follows: From e44e04c2946ca109e4b95c73bf72eead7be9aaec Mon Sep 17 00:00:00 2001 From: Oleksandr Poliakov Date: Mon, 6 Oct 2025 15:32:19 -0700 Subject: [PATCH 07/11] Fix test --- .../client-side-operations-timeout/tests/pending-response.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/client-side-operations-timeout/tests/pending-response.yml b/source/client-side-operations-timeout/tests/pending-response.yml index cfd3a70eb3..1d5f0a1ff4 100644 --- a/source/client-side-operations-timeout/tests/pending-response.yml +++ b/source/client-side-operations-timeout/tests/pending-response.yml @@ -23,7 +23,7 @@ createEntities: - commandSucceededEvent - connectionCheckOutStartedEvent - connectionCheckedOutEvent - - ConnectionCheckOutFailedEvent + - connectionCheckOutFailedEvent - connectionCheckedInEvent - connectionCreatedEvent - connectionReadyEvent From fca4dc0f75cc10115d7449fc139715cd13db8fec Mon Sep 17 00:00:00 2001 From: Oleksandr Poliakov Date: Mon, 6 Oct 2025 15:33:15 -0700 Subject: [PATCH 08/11] Fix test --- .../client-side-operations-timeout/tests/pending-response.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/client-side-operations-timeout/tests/pending-response.json b/source/client-side-operations-timeout/tests/pending-response.json index 7c6c969ffb..c9677dd7b3 100644 --- a/source/client-side-operations-timeout/tests/pending-response.json +++ b/source/client-side-operations-timeout/tests/pending-response.json @@ -31,7 +31,7 @@ "commandSucceededEvent", "connectionCheckOutStartedEvent", "connectionCheckedOutEvent", - "ConnectionCheckOutFailedEvent", + "connectionCheckOutFailedEvent", "connectionCheckedInEvent", "connectionCreatedEvent", "connectionReadyEvent", From 473503fae251f0082408c764c74393d517a460ae Mon Sep 17 00:00:00 2001 From: Oleksandr Poliakov Date: Tue, 7 Oct 2025 10:16:56 -0700 Subject: [PATCH 09/11] pr --- .../tests/pending-response.json | 35 +++++-------------- .../tests/pending-response.yml | 28 +++++---------- .../connection-monitoring-and-pooling.md | 24 ++++++------- 3 files changed, 29 insertions(+), 58 deletions(-) diff --git a/source/client-side-operations-timeout/tests/pending-response.json b/source/client-side-operations-timeout/tests/pending-response.json index c9677dd7b3..98b46c411b 100644 --- a/source/client-side-operations-timeout/tests/pending-response.json +++ b/source/client-side-operations-timeout/tests/pending-response.json @@ -252,12 +252,6 @@ "reason": "error" } }, - { - "connectionCheckOutFailedEvent": {} - }, - { - "connectionCheckOutStartedEvent": {} - }, { "connectionCreatedEvent": {} }, @@ -292,7 +286,7 @@ "insert" ], "blockConnection": true, - "blockTimeMS": 250 + "blockTimeMS": 100 } } } @@ -315,7 +309,7 @@ "name": "find", "object": "coll", "arguments": { - "timeoutMS": 200, + "timeoutMS": 100, "filter": { "_id": 1 } @@ -388,7 +382,7 @@ "insert" ], "blockConnection": true, - "blockTimeMS": 500 + "blockTimeMS": 200 } } } @@ -424,6 +418,7 @@ "name": "find", "object": "coll", "arguments": { + "timeoutMS": 200, "filter": { "_id": 1 } @@ -510,7 +505,7 @@ "insert" ], "blockConnection": true, - "blockTimeMS": 500, + "blockTimeMS": 200, "closeConnection": true } } @@ -530,18 +525,11 @@ "isTimeoutError": true } }, - { - "name": "wait", - "object": "testRunner", - "arguments": { - "ms": 500 - } - }, { "name": "find", "object": "coll", "arguments": { - "timeoutMS": 50, + "timeoutMS": 1000, "filter": { "_id": 1 } @@ -627,7 +615,7 @@ "insert" ], "blockConnection": true, - "blockTimeMS": 500, + "blockTimeMS": 200, "closeConnection": true } } @@ -647,18 +635,11 @@ "isTimeoutError": true } }, - { - "name": "wait", - "object": "testRunner", - "arguments": { - "ms": 500 - } - }, { "name": "insertOne", "object": "coll", "arguments": { - "timeoutMS": 50, + "timeoutMS": 1000, "document": { "_id": 3, "x": 1 diff --git a/source/client-side-operations-timeout/tests/pending-response.yml b/source/client-side-operations-timeout/tests/pending-response.yml index 1d5f0a1ff4..93ccbda963 100644 --- a/source/client-side-operations-timeout/tests/pending-response.yml +++ b/source/client-side-operations-timeout/tests/pending-response.yml @@ -142,8 +142,6 @@ tests: reason: timeout - connectionClosedEvent: reason: error - - connectionCheckOutFailedEvent: {} - - connectionCheckOutStartedEvent: {} # Find's retry starts and succeeds - connectionCreatedEvent: {} - connectionReadyEvent: {} - connectionCheckedOutEvent: {} @@ -164,7 +162,7 @@ tests: data: failCommands: ["insert"] blockConnection: true - blockTimeMS: 250 + blockTimeMS: 100 # Execute operation with timeout less than block time. - name: insertOne object: *collection @@ -177,7 +175,7 @@ tests: - name: find object: *collection arguments: - timeoutMS: 200 + timeoutMS: 100 filter: {_id: 1} expectEvents: - client: *client @@ -212,7 +210,7 @@ tests: data: failCommands: ["insert"] blockConnection: true - blockTimeMS: 500 + blockTimeMS: 200 # Execute operation with timeout less than block time. - name: insertOne object: *collection @@ -233,7 +231,9 @@ tests: - name: find object: *collection arguments: + timeoutMS: 200 filter: {_id: 1} + expectEvents: - client: *client events: @@ -272,7 +272,7 @@ tests: data: failCommands: ["insert"] blockConnection: true - blockTimeMS: 500 + blockTimeMS: 200 closeConnection: true # Execute operation with timeout less than block time. - name: insertOne @@ -282,15 +282,10 @@ tests: document: {_id: 3, x: 1} expectError: isTimeoutError: true - - name: wait - object: testRunner - arguments: - ms: 500 - # Execute a subsequent operation with a timeout less than the block time. - name: find object: *collection arguments: - timeoutMS: 50 + timeoutMS: 1000 filter: {_id: 1} expectEvents: - client: *client @@ -330,7 +325,7 @@ tests: data: failCommands: ["insert"] blockConnection: true - blockTimeMS: 500 + blockTimeMS: 200 closeConnection: true # Execute operation with timeout less than block time. - name: insertOne @@ -340,15 +335,10 @@ tests: document: {_id: 3, x: 1} expectError: isTimeoutError: true - - name: wait - object: testRunner - arguments: - ms: 500 - # Execute a subsequent operation with a timeout less than the block time. - name: insertOne object: *collection arguments: - timeoutMS: 50 + timeoutMS: 1000 document: {_id: 3, x: 1} expectEvents: - client: *client diff --git a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md index 3e08de65ac..98a7cccac9 100644 --- a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md +++ b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md @@ -677,22 +677,22 @@ try: if connection is in "pending response" state: tConnectionDrainingStarted = current instant (use a monotonic clock if possible) emit PendingResponseStartedEvent and equivalent log message - drain the pending response - if error: + try: + drain the pending response + except timeout: tConnectionDrainingFailed = current instant (use a monotonic clock if possible) - if non-timeout error: - emit PendingResponseFailedEvent(reason="error", duration = tConnectionDrainingFailed - tConnectionDrainingStarted) and equivalent log message + emit PendingResponseFailedEvent(reason="timeout", duration = tConnectionDrainingFailed - tConnectionDrainingStarted) and quivalent log message + if last read timestamp on connection > 3 seconds old close connection - connection = Null - else if timeout error and last read timestamp on connection > 3 seconds old - emit PendingResponseFailedEvent(reason="timeout", duration = tConnectionDrainingFailed - tConnectionDrainingStarted) and equivalent log message - close connection - connection = Null else - emit PendingResponseFailedEvent(reason="timeout", duration = tConnectionDrainingFailed - tConnectionDrainingStarted) and equivalent log message check in the connection - connection = Null - else + connection = Null + except: + tConnectionDrainingFailed = current instant (use a monotonic clock if possible) + emit PendingResponseFailedEvent(reason="error", duration = tConnectionDrainingFailed - tConnectionDrainingStarted) and equivalent log message + close connection + connection = Null + else: tConnectionDrainingSucceeded = current instant (use a monotonic clock if possible) emit PendingResponseSucceededEvent(duration = tConnectionDrainingSucceeded - tConnectionDrainingStarted) and equivalent log message else if totalConnectionCount < maxPoolSize: From 8677cbcb48bf62f55bead4513d42132a902f095e Mon Sep 17 00:00:00 2001 From: Oleksandr Poliakov Date: Tue, 7 Oct 2025 18:10:48 -0700 Subject: [PATCH 10/11] pr --- .../tests/README.md | 119 ++++++++++++------ 1 file changed, 78 insertions(+), 41 deletions(-) diff --git a/source/connection-monitoring-and-pooling/tests/README.md b/source/connection-monitoring-and-pooling/tests/README.md index fd8f0db706..61654a8568 100644 --- a/source/connection-monitoring-and-pooling/tests/README.md +++ b/source/connection-monitoring-and-pooling/tests/README.md @@ -23,59 +23,96 @@ The following tests have not yet been automated, but MUST still be tested: ### Pending Response -Due to the complexity of managing a proxy layer, the following qualifying tests should only be run for non-ssl, -non-auth, and non-compression connections. +We use mongoproxy to emulate the network delays. See +[mongoproxy/README.md](https://github.com/mongodb-labs/drivers-evergreen-tools/blob/master/.evergreen/mongoproxy/README.md) +for more details. Due to the complexity of managing a proxy layer, the following qualifying tests should only be run for +non-ssl, non-auth, and non-compression connections. #### Recover Partially Read Response This test verifies that if only part of a response was read before the timeout, the driver can drain the rest of the -response and reuse the connection for the next operation. - -1. Connect to the proxy server with `maxPoolSize=1` and `direct=true`, subscribing to the following CMAP events: - - `PendingResponseStarted` - - `PendingResponseSucceeded` - - `PendingResponseFailed` - - `ConnectionClosed` -2. Send a command (e.g. an insert) with a 200 millisecond timeout and the following `proxyTest` actions: - - `sendBytes`: We have 3 possible states here: +response and reuse the connection for the next operation. This test has 3 variation, see step 4, `sendBytes` parameter. + +1. Setup mongoproxy. +2. Connect to the proxy server with `maxPoolSize=1`, `direct=true`, disable read and write retryability, subscribing to + the following CMAP events: + - `PendingResponseStartedEvent` + - `PendingResponseSucceededEvent` + - `PendingResponseFailedEvent` + - `ConnectionCreatedEvent` + - `ConnectionClosedEvent` +3. Execute `ping` command to populate the connection pool. +4. Send a command (e.g. an insert) with a 200 millisecond timeout and the following `proxyTest` actions: + - `sendBytes`: There are 3 test cases here: 1. Message size was partially read: random value between 1 and 3 inclusive 2. Message size was read, body was not read at all: use 4 3. Message size was read, body read partially: random value between 5 and 100 inclusive - - `delayMS`: 400 ( to exceed the 200 ms timeout) - - `sendAll`: `true` -3. Assert that the operation failed with timeout error. -4. Issue another operation (e.g. another insert) and assert that it does not return an error. -5. Verify that the following sequence of events was observed: - - `ConnectionPendingResponseStarted` - - `ConnectionPendingResponseSucceeded` -6. Verify that NONE the following events was observed: - - `ConnectionPendingResponseFailed` - - `ConnectionClosed` + - `delayMs`: 400 ( to exceed the 200 ms timeout) + - `sendAll`: `true` Example of run command payload: + ``` + { + insert: "coll", + documents: [{ _id : 42 }], + proxyTest: { + actions: [ + { sendBytes : 2 }, + { delayMs : 400 }, + { sendAll : true }, + ] + } + } + ``` +5. Assert that the operation failed with timeout error. +6. Issue another operation (e.g. another insert) and assert that it does not return an error. +7. Verify that the following sequence of events was observed: + - `ConnectionCreatedEvent` + - `ConnectionPendingResponseStartedEvent` + - `ConnectionPendingResponseSucceededEvent` +8. Verify that NONE the following events was observed: + - `ConnectionPendingResponseFailedEvent` + - `ConnectionClosedEvent` #### Non-destructive Aliveness Check This test verifies that if a connection idles past the pending response window (3 seconds) after a partial header, the aliveness check does not attempt to discard bytes from the TCP stream. -1. Connect to the proxy server with `maxPoolSize=1` and `direct=true`, subscribing to the following CMAP events: - - `PendingResponseStarted` - - `PendingResponseSucceeded` - - `PendingResponseFailed` - - `ConnectionClosed` -2. Send a command (e.g. an insert) with a 200 millisecond timeout and the following `proxyTest` actions: - - `sendBytes`: any value between 1 and 3 - - `delayMS`: 400 ( to exceed the 200 ms timeout) - - `sendAll`: `true` -3. Assert that the operation failed with timeout error. -4. Sleep for 3 seconds -5. Issue another operation (e.g. another insert) and assert that it does not return an error. -6. Verify that the following sequence of events was observed: - - `ConnectionPendingResponseStarted` - - `ConnectionPendingResponseFailed` - - `ConnectionPendingResponseStarted` - - `ConnectionPendingResponseSucceeded` -7. Verify that NONE the following events was observed: - - `ConnectionClosed` +1. Setup mongoproxy. +2. Connect to the proxy server with `maxPoolSize=1`, `direct=true`, disable read and write retryability, subscribing to + the following CMAP events: + - `PendingResponseStartedEvent` + - `PendingResponseSucceededEvent` + - `PendingResponseFailedEvent` + - `ConnectionCreatedEvent` + - `ConnectionClosedEvent` +3. Execute `ping` command to populate the connection pool. +4. Send a command (e.g. an insert) with a 200 millisecond timeout and the following `proxyTest` actions: + - `sendBytes`: random value between 1 and 3 inclusive + - `delayMs`: 400 ( to exceed the 200 ms timeout) + - `sendAll`: `true` Example of run command payload: + ``` + { + insert: "coll", + documents: [{ _id : 42 }], + proxyTest: { + actions: [ + { sendBytes : 2 }, + { delayMs : 400 }, + { sendAll : true }, + ] + } + } + ``` +5. Assert that the operation failed with timeout error. +6. Sleep for 4 seconds (to pass the 3-seconds expiration window) +7. Issue another operation (e.g. another insert) and assert that it does not return an error. +8. Verify that the following sequence of events was observed: + - `ConnectionCreatedEvent` + - `ConnectionPendingResponseStartedEvent` + - `ConnectionPendingResponseSucceededEvent` +9. Verify that NONE the following events was observed: + - `ConnectionClosedEvent` + - `ConnectionPendingResponseFailedEvent` #### Exhaust Cursors @@ -87,7 +124,7 @@ this prose test. 2. Insert > 2 records into the collection. 3. Create an exhaust cursor using `find` and iterate one `getMore` using `batchSize=1`. 4. Call a subsequent `getMore` on the exhaust cursor with a client-side timeout of 100ms. -5. Ensure that the `ConnectionClosed` event is emitted due to timeout. +5. Ensure that the `ConnectionClosedEvent` event is emitted due to timeout. ## Logging Tests From d33c5c21e48d485627b6cec90b39e801ce0c008d Mon Sep 17 00:00:00 2001 From: Oleksandr Poliakov Date: Wed, 8 Oct 2025 15:24:22 -0700 Subject: [PATCH 11/11] Move closed connection tests into separate file. --- .../pending-response-close-connection.json | 180 ++++++++++++++ .../pending-response-close-connection.yml | 98 ++++++++ .../tests/pending-response.json | 221 ------------------ .../tests/pending-response.yml | 107 --------- 4 files changed, 278 insertions(+), 328 deletions(-) create mode 100644 source/client-side-operations-timeout/tests/pending-response-close-connection.json create mode 100644 source/client-side-operations-timeout/tests/pending-response-close-connection.yml diff --git a/source/client-side-operations-timeout/tests/pending-response-close-connection.json b/source/client-side-operations-timeout/tests/pending-response-close-connection.json new file mode 100644 index 0000000000..84de8306c5 --- /dev/null +++ b/source/client-side-operations-timeout/tests/pending-response-close-connection.json @@ -0,0 +1,180 @@ +{ + "description": "Connection churn is prevented by reading pending responses during connection checkout", + "schemaVersion": "1.28", + "runOnRequirements": [ + { + "minServerVersion": "5.0", + "topologies": [ + "single", + "replicaset" + ] + } + ], + "createEntities": [ + { + "client": { + "id": "failPointClient", + "useMultipleMongoses": false + } + }, + { + "client": { + "id": "client", + "uriOptions": { + "maxPoolSize": 1, + "minPoolSize": 1 + }, + "awaitMinPoolSizeMS": 10000, + "useMultipleMongoses": false, + "observeEvents": [ + "commandFailedEvent", + "commandSucceededEvent", + "connectionCheckOutStartedEvent", + "connectionCheckedOutEvent", + "connectionCheckOutFailedEvent", + "connectionCheckedInEvent", + "connectionCreatedEvent", + "connectionReadyEvent", + "connectionClosedEvent", + "connectionPendingResponseSucceededEvent", + "connectionPendingResponseStartedEvent", + "connectionPendingResponseFailedEvent" + ] + } + }, + { + "database": { + "id": "test", + "client": "client", + "databaseName": "test" + } + }, + { + "collection": { + "id": "coll", + "database": "test", + "collectionName": "coll" + } + } + ], + "initialData": [ + { + "collectionName": "coll", + "databaseName": "test", + "documents": [] + } + ], + "tests": [ + { + "description": "write op retries when connection closes server-side while draining response", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "failPointClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "insert" + ], + "blockConnection": true, + "blockTimeMS": 200, + "closeConnection": true + } + } + } + }, + { + "name": "insertOne", + "object": "coll", + "arguments": { + "timeoutMS": 50, + "document": { + "_id": 3, + "x": 1 + } + }, + "expectError": { + "isTimeoutError": true + } + }, + { + "name": "insertOne", + "object": "coll", + "arguments": { + "timeoutMS": 1000, + "document": { + "_id": 3, + "x": 1 + } + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandSucceededEvent": { + "commandName": "insert" + } + } + ] + }, + { + "client": "client", + "eventType": "cmap", + "events": [ + { + "connectionCheckOutStartedEvent": {} + }, + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + }, + { + "connectionCheckOutStartedEvent": {} + }, + { + "connectionPendingResponseStartedEvent": {} + }, + { + "connectionPendingResponseFailedEvent": { + "reason": "error" + } + }, + { + "connectionClosedEvent": { + "reason": "error" + } + }, + { + "connectionCreatedEvent": {} + }, + { + "connectionReadyEvent": {} + }, + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + } + ] + } + ] + } + ] +} diff --git a/source/client-side-operations-timeout/tests/pending-response-close-connection.yml b/source/client-side-operations-timeout/tests/pending-response-close-connection.yml new file mode 100644 index 0000000000..20f5375c19 --- /dev/null +++ b/source/client-side-operations-timeout/tests/pending-response-close-connection.yml @@ -0,0 +1,98 @@ +description: "Connection churn is prevented by reading pending responses during connection checkout" +schemaVersion: "1.28" +runOnRequirements: + - minServerVersion: "5.0" + # FailPoint on Server before 5.0 do not wait for blockTimeMS before closing the connection + topologies: ["single", "replicaset"] +createEntities: + - client: + id: &failPointClient failPointClient + useMultipleMongoses: false + - client: + id: &client client + uriOptions: + maxPoolSize: 1 + minPoolSize: 1 + awaitMinPoolSizeMS: 10000 + useMultipleMongoses: false + observeEvents: + - commandFailedEvent + - commandSucceededEvent + - connectionCheckOutStartedEvent + - connectionCheckedOutEvent + - connectionCheckOutFailedEvent + - connectionCheckedInEvent + - connectionCreatedEvent + - connectionReadyEvent + - connectionClosedEvent + - connectionPendingResponseSucceededEvent + - connectionPendingResponseStartedEvent + - connectionPendingResponseFailedEvent + - database: + id: &database test + client: *client + databaseName: *database + - collection: + id: &collection coll + database: *database + collectionName: *collection +initialData: + - collectionName: *collection + databaseName: *database + documents: [] +tests: + # If the connection is closed server-side while draining the response, the + # driver must retry with a different connection. + - description: "write op retries when connection closes server-side while draining response" + operations: + # Create a failpoint to block the first and second operation. + - name: failPoint + object: testRunner + arguments: + client: *failPointClient + failPoint: + configureFailPoint: failCommand + mode: {times: 1} + data: + failCommands: ["insert"] + blockConnection: true + blockTimeMS: 200 + closeConnection: true + # Execute operation with timeout less than block time. + - name: insertOne + object: *collection + arguments: + timeoutMS: 50 + document: {_id: 3, x: 1} + expectError: + isTimeoutError: true + # Draining pending response should failure because of closed connection, + # but another connection should be established within the checkout and operation should succeed. + - name: insertOne + object: *collection + arguments: + timeoutMS: 1000 + document: {_id: 3, x: 1} + expectEvents: + - client: *client + events: + - commandFailedEvent: + commandName: insert + - commandSucceededEvent: + commandName: insert + - client: *client + eventType: cmap + events: + - connectionCheckOutStartedEvent: { } + - connectionCheckedOutEvent: { } + - connectionCheckedInEvent: { } # Insert fails. + - connectionCheckOutStartedEvent: { } + - connectionPendingResponseStartedEvent: { } + - connectionPendingResponseFailedEvent: # Pending response failed. + reason: error + - connectionClosedEvent: + reason: error + - connectionCreatedEvent: { } # New connection should be created and checked out. + - connectionReadyEvent: { } + - connectionCheckedOutEvent: { } + - connectionCheckedInEvent: { } # Find finishes. diff --git a/source/client-side-operations-timeout/tests/pending-response.json b/source/client-side-operations-timeout/tests/pending-response.json index 98b46c411b..4e69ebe44c 100644 --- a/source/client-side-operations-timeout/tests/pending-response.json +++ b/source/client-side-operations-timeout/tests/pending-response.json @@ -486,227 +486,6 @@ ] } ] - }, - { - "description": "read op retries when connection closes server-side while draining response", - "operations": [ - { - "name": "failPoint", - "object": "testRunner", - "arguments": { - "client": "failPointClient", - "failPoint": { - "configureFailPoint": "failCommand", - "mode": { - "times": 1 - }, - "data": { - "failCommands": [ - "insert" - ], - "blockConnection": true, - "blockTimeMS": 200, - "closeConnection": true - } - } - } - }, - { - "name": "insertOne", - "object": "coll", - "arguments": { - "timeoutMS": 50, - "document": { - "_id": 3, - "x": 1 - } - }, - "expectError": { - "isTimeoutError": true - } - }, - { - "name": "find", - "object": "coll", - "arguments": { - "timeoutMS": 1000, - "filter": { - "_id": 1 - } - } - } - ], - "expectEvents": [ - { - "client": "client", - "events": [ - { - "commandFailedEvent": { - "commandName": "insert" - } - }, - { - "commandSucceededEvent": { - "commandName": "find" - } - } - ] - }, - { - "client": "client", - "eventType": "cmap", - "events": [ - { - "connectionCheckOutStartedEvent": {} - }, - { - "connectionCheckedOutEvent": {} - }, - { - "connectionCheckedInEvent": {} - }, - { - "connectionCheckOutStartedEvent": {} - }, - { - "connectionPendingResponseStartedEvent": {} - }, - { - "connectionPendingResponseFailedEvent": { - "reason": "error" - } - }, - { - "connectionClosedEvent": { - "reason": "error" - } - }, - { - "connectionCreatedEvent": {} - }, - { - "connectionReadyEvent": {} - }, - { - "connectionCheckedOutEvent": {} - }, - { - "connectionCheckedInEvent": {} - } - ] - } - ] - }, - { - "description": "write op retries when connection closes server-side while draining response", - "operations": [ - { - "name": "failPoint", - "object": "testRunner", - "arguments": { - "client": "failPointClient", - "failPoint": { - "configureFailPoint": "failCommand", - "mode": { - "times": 1 - }, - "data": { - "failCommands": [ - "insert" - ], - "blockConnection": true, - "blockTimeMS": 200, - "closeConnection": true - } - } - } - }, - { - "name": "insertOne", - "object": "coll", - "arguments": { - "timeoutMS": 50, - "document": { - "_id": 3, - "x": 1 - } - }, - "expectError": { - "isTimeoutError": true - } - }, - { - "name": "insertOne", - "object": "coll", - "arguments": { - "timeoutMS": 1000, - "document": { - "_id": 3, - "x": 1 - } - } - } - ], - "expectEvents": [ - { - "client": "client", - "events": [ - { - "commandFailedEvent": { - "commandName": "insert" - } - }, - { - "commandSucceededEvent": { - "commandName": "insert" - } - } - ] - }, - { - "client": "client", - "eventType": "cmap", - "events": [ - { - "connectionCheckOutStartedEvent": {} - }, - { - "connectionCheckedOutEvent": {} - }, - { - "connectionCheckedInEvent": {} - }, - { - "connectionCheckOutStartedEvent": {} - }, - { - "connectionPendingResponseStartedEvent": {} - }, - { - "connectionPendingResponseFailedEvent": { - "reason": "error" - } - }, - { - "connectionClosedEvent": { - "reason": "error" - } - }, - { - "connectionCreatedEvent": {} - }, - { - "connectionReadyEvent": {} - }, - { - "connectionCheckedOutEvent": {} - }, - { - "connectionCheckedInEvent": {} - } - ] - } - ] } ] } diff --git a/source/client-side-operations-timeout/tests/pending-response.yml b/source/client-side-operations-timeout/tests/pending-response.yml index 93ccbda963..4324e97476 100644 --- a/source/client-side-operations-timeout/tests/pending-response.yml +++ b/source/client-side-operations-timeout/tests/pending-response.yml @@ -233,7 +233,6 @@ tests: arguments: timeoutMS: 200 filter: {_id: 1} - expectEvents: - client: *client events: @@ -257,109 +256,3 @@ tests: - connectionPendingResponseSucceededEvent: {} - connectionCheckedOutEvent: {} - connectionCheckedInEvent: {} # Second find succeeds. - # If the connection is closed server-side while draining the response, the - # driver must retry with a different connection. - - description: "read op retries when connection closes server-side while draining response" - operations: - # Create a failpoint to block the first and second operation. - - name: failPoint - object: testRunner - arguments: - client: *failPointClient - failPoint: - configureFailPoint: failCommand - mode: {times: 1} - data: - failCommands: ["insert"] - blockConnection: true - blockTimeMS: 200 - closeConnection: true - # Execute operation with timeout less than block time. - - name: insertOne - object: *collection - arguments: - timeoutMS: 50 - document: {_id: 3, x: 1} - expectError: - isTimeoutError: true - - name: find - object: *collection - arguments: - timeoutMS: 1000 - filter: {_id: 1} - expectEvents: - - client: *client - events: - - commandFailedEvent: - commandName: insert - - commandSucceededEvent: - commandName: find - - client: *client - eventType: cmap - events: - - connectionCheckOutStartedEvent: {} - - connectionCheckedOutEvent: {} - - connectionCheckedInEvent: {} # Insert fails. - - connectionCheckOutStartedEvent: {} - - connectionPendingResponseStartedEvent: {} # Pending read fails on first find - - connectionPendingResponseFailedEvent: - reason: error - - connectionClosedEvent: - reason: error - - connectionCreatedEvent: { } - - connectionReadyEvent: { } - - connectionCheckedOutEvent: {} - - connectionCheckedInEvent: {} # Find finishes. - # If the connection is closed server-side while draining the response, the - # driver must retry with a different connection (write operation) - - description: "write op retries when connection closes server-side while draining response" - operations: - # Create a failpoint to block the first and second operation. - - name: failPoint - object: testRunner - arguments: - client: *failPointClient - failPoint: - configureFailPoint: failCommand - mode: {times: 1} - data: - failCommands: ["insert"] - blockConnection: true - blockTimeMS: 200 - closeConnection: true - # Execute operation with timeout less than block time. - - name: insertOne - object: *collection - arguments: - timeoutMS: 50 - document: {_id: 3, x: 1} - expectError: - isTimeoutError: true - - name: insertOne - object: *collection - arguments: - timeoutMS: 1000 - document: {_id: 3, x: 1} - expectEvents: - - client: *client - events: - - commandFailedEvent: - commandName: insert - - commandSucceededEvent: - commandName: insert - - client: *client - eventType: cmap - events: - - connectionCheckOutStartedEvent: { } - - connectionCheckedOutEvent: { } - - connectionCheckedInEvent: { } # Insert fails. - - connectionCheckOutStartedEvent: { } - - connectionPendingResponseStartedEvent: { } # Pending read fails on first find - - connectionPendingResponseFailedEvent: - reason: error - - connectionClosedEvent: - reason: error - - connectionCreatedEvent: { } - - connectionReadyEvent: { } - - connectionCheckedOutEvent: { } - - connectionCheckedInEvent: { } # Find finishes.