diff --git a/ydb/core/fq/libs/row_dispatcher/row_dispatcher.cpp b/ydb/core/fq/libs/row_dispatcher/row_dispatcher.cpp index 9684e8463029..8560af1418dd 100644 --- a/ydb/core/fq/libs/row_dispatcher/row_dispatcher.cpp +++ b/ydb/core/fq/libs/row_dispatcher/row_dispatcher.cpp @@ -686,7 +686,6 @@ void TRowDispatcher::UpdateMetrics() { for (const auto& key : toDelete) { AggrStats.LastQueryStats.erase(key); } - PrintStateToLog(); } TString TRowDispatcher::GetInternalState() { diff --git a/ydb/core/kqp/common/events/script_executions.h b/ydb/core/kqp/common/events/script_executions.h index d44e5c05e4db..972915d83996 100644 --- a/ydb/core/kqp/common/events/script_executions.h +++ b/ydb/core/kqp/common/events/script_executions.h @@ -72,6 +72,7 @@ struct TEvGetScriptExecutionOperation : public TEventWithDatabaseId { @@ -81,15 +82,17 @@ struct TEvGetScriptExecutionOperationQueryResponse : public TEventLocal FinalizationStatus; TActorId RunScriptActorId; TString ExecutionId; Ydb::StatusIds::StatusCode Status; NYql::TIssues Issues; Ydb::Query::ExecuteScriptMetadata Metadata; - bool RetryRequired = false; + bool WaitRetry = false; i64 LeaseGeneration = 0; bool StateSaved = false; + NKikimrKqp::TScriptExecutionRetryState RetryState; }; struct TEvGetScriptExecutionOperationResponse : public TEventLocal { @@ -97,6 +100,9 @@ struct TEvGetScriptExecutionOperationResponse : public TEventLocal Metadata; bool Ready = false; bool StateSaved = false; + ui64 RetryCount = 0; + TInstant LastFailAt; + TInstant SuspendedUntil; }; TEvGetScriptExecutionOperationResponse(Ydb::StatusIds::StatusCode status, TInfo&& info, NYql::TIssues issues) @@ -105,6 +111,9 @@ struct TEvGetScriptExecutionOperationResponse : public TEventLocal Metadata; bool StateSaved = false; + ui64 RetryCount = 0; + TInstant LastFailAt; + TInstant SuspendedUntil; }; struct TEvListScriptExecutionOperations : public TEventWithDatabaseId { diff --git a/ydb/core/kqp/common/kqp_script_executions.cpp b/ydb/core/kqp/common/kqp_script_executions.cpp index 530ff2f27d5c..2e84ec511d19 100644 --- a/ydb/core/kqp/common/kqp_script_executions.cpp +++ b/ydb/core/kqp/common/kqp_script_executions.cpp @@ -1,9 +1,14 @@ #include "kqp_script_executions.h" -#include - +#include #include +#include + +#include + +#include + namespace NKikimr::NKqp { TString ScriptExecutionOperationFromExecutionId(const std::string& executionId) { @@ -13,6 +18,10 @@ TString ScriptExecutionOperationFromExecutionId(const std::string& executionId) return NOperationId::ProtoToString(operationId); } +NOperationId::TOperationId OperationIdFromExecutionId(const TString& executionId) { + return NOperationId::TOperationId(ScriptExecutionOperationFromExecutionId(executionId)); +} + TMaybe ScriptExecutionIdFromOperation(const TString& operationId, TString& error) try { NOperationId::TOperationId operation(operationId); return ScriptExecutionIdFromOperation(operation, error); @@ -38,4 +47,47 @@ TMaybe ScriptExecutionIdFromOperation(const NOperationId::TOperationId& return Nothing(); } +NYql::TIssues AddRootIssue(const TString& message, const NYql::TIssues& issues, bool addEmptyRoot) { + if (!issues && !addEmptyRoot) { + return {}; + } + + NYql::TIssue rootIssue(message); + for (const auto& issue : issues) { + rootIssue.AddSubIssue(MakeIntrusive(issue)); + } + + return {rootIssue}; +} + +TString SerializeIssues(const NYql::TIssues& issues) { + NYql::TIssue root; + for (const auto& issue : issues) { + root.AddSubIssue(MakeIntrusive(issue)); + } + + Ydb::Issue::IssueMessage rootMessage; + if (issues) { + NYql::IssueToMessage(root, &rootMessage); + } + + return NProtobufJson::Proto2Json(rootMessage, NProtobufJson::TProto2JsonConfig()); +} + +TString SequenceToJsonString(ui64 size, std::function valueFiller) { + NJson::TJsonValue value; + value.SetType(NJson::EJsonValueType::JSON_ARRAY); + + NJson::TJsonValue::TArray& jsonArray = value.GetArraySafe(); + jsonArray.resize(size); + for (ui64 i = 0; i < size; ++i) { + valueFiller(i, jsonArray[i]); + } + + NJsonWriter::TBuf serializedJson; + serializedJson.WriteJsonValue(&value, false, PREC_NDIGITS, 17); + + return serializedJson.Str(); +} + } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/common/kqp_script_executions.h b/ydb/core/kqp/common/kqp_script_executions.h index 17c5ea8e26fa..a3c38869d26c 100644 --- a/ydb/core/kqp/common/kqp_script_executions.h +++ b/ydb/core/kqp/common/kqp_script_executions.h @@ -1,6 +1,11 @@ #pragma once + #include +#include + +#include + #include #include @@ -8,7 +13,20 @@ namespace NKikimr::NKqp { TString ScriptExecutionOperationFromExecutionId(const std::string& executionId); +NOperationId::TOperationId OperationIdFromExecutionId(const TString& executionId); TMaybe ScriptExecutionIdFromOperation(const TString& operationId, TString& error); TMaybe ScriptExecutionIdFromOperation(const NOperationId::TOperationId& operationId, TString& error); +NYql::TIssues AddRootIssue(const TString& message, const NYql::TIssues& issues, bool addEmptyRoot = true); +TString SerializeIssues(const NYql::TIssues& issues); + +TString SequenceToJsonString(ui64 size, std::function valueFiller); + +template +TString SequenceToJsonString(const TContainer& container) { + return SequenceToJsonString(container.size(), [&](ui64 i, NJson::TJsonValue& value) { + value = NJson::TJsonValue(container[i]); + }); +} + } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/common/kqp_tx.cpp b/ydb/core/kqp/common/kqp_tx.cpp index 5fb07bf759eb..be0671f3f332 100644 --- a/ydb/core/kqp/common/kqp_tx.cpp +++ b/ydb/core/kqp/common/kqp_tx.cpp @@ -392,6 +392,7 @@ bool HasUncommittedChangesRead(THashSet& modifiedTables, cons case NKqpProto::TKqpPhyConnection::kResult: case NKqpProto::TKqpPhyConnection::kValue: case NKqpProto::TKqpPhyConnection::kMerge: + case NKqpProto::TKqpPhyConnection::kDqSourceStreamLookup: case NKqpProto::TKqpPhyConnection::TYPE_NOT_SET: break; } diff --git a/ydb/core/kqp/common/ya.make b/ydb/core/kqp/common/ya.make index d7dd98bb8a51..2478eaefd191 100644 --- a/ydb/core/kqp/common/ya.make +++ b/ydb/core/kqp/common/ya.make @@ -27,31 +27,34 @@ SRCS( ) PEERDIR( + library/cpp/json/writer + library/cpp/lwtrace + library/cpp/protobuf/json ydb/core/base ydb/core/engine - ydb/core/protos - ydb/core/scheme - ydb/core/kqp/expr_nodes - ydb/core/kqp/common/simple + ydb/core/grpc_services/cancelation ydb/core/kqp/common/compilation ydb/core/kqp/common/events ydb/core/kqp/common/shutdown + ydb/core/kqp/common/simple + ydb/core/kqp/expr_nodes ydb/core/kqp/provider + ydb/core/protos + ydb/core/scheme ydb/core/tx/long_tx_service/public ydb/core/tx/sharding - ydb/library/yql/dq/expr_nodes ydb/library/aclib - yql/essentials/core/issue - yql/essentials/core/services ydb/library/yql/dq/actors ydb/library/yql/dq/common - yql/essentials/core/dq_integration - yql/essentials/parser/pg_wrapper/interface + ydb/library/yql/dq/expr_nodes + ydb/public/api/protos ydb/public/sdk/cpp/src/library/operation_id ydb/public/sdk/cpp/src/library/operation_id/protos - ydb/core/grpc_services/cancelation - library/cpp/lwtrace - #library/cpp/lwtrace/protos + yql/essentials/core/dq_integration + yql/essentials/core/issue + yql/essentials/core/services + yql/essentials/parser/pg_wrapper/interface + yql/essentials/public/issue ) YQL_LAST_ABI_VERSION() diff --git a/ydb/core/kqp/compute_actor/kqp_compute_actor.cpp b/ydb/core/kqp/compute_actor/kqp_compute_actor.cpp index 361f8094d979..3c1ea590f5f2 100644 --- a/ydb/core/kqp/compute_actor/kqp_compute_actor.cpp +++ b/ydb/core/kqp/compute_actor/kqp_compute_actor.cpp @@ -6,19 +6,21 @@ #include #include #include -#include #include #include #include #include -#include +#include #include +#include +#include +#include #include -#include +#include #include #include -#include -#include +#include +#include namespace NKikimr { namespace NMiniKQL { @@ -90,6 +92,7 @@ NYql::NDq::IDqAsyncIoFactory::TPtr CreateKqpAsyncIoFactory( RegisterKqpWriteActor(*factory, counters); RegisterSequencerActorFactory(*factory, counters); RegisterKqpVectorResolveActor(*factory, counters); + NYql::NDq::RegisterDqInputTransformLookupActorFactory(*factory); if (federatedQuerySetup) { auto s3HttpRetryPolicy = NYql::GetHTTPDefaultRetryPolicy(NYql::THttpRetryPolicyOptions{.RetriedCurlCodes = NYql::FqRetriedCurlCodes()}); @@ -101,6 +104,7 @@ NYql::NDq::IDqAsyncIoFactory::TPtr CreateKqpAsyncIoFactory( } NYql::NDq::RegisterDQSolomonReadActorFactory(*factory, federatedQuerySetup->CredentialsFactory); + NYql::NDq::RegisterDQSolomonWriteActorFactory(*factory, federatedQuerySetup->CredentialsFactory); NYql::NDq::RegisterDqPqReadActorFactory(*factory, *federatedQuerySetup->Driver, federatedQuerySetup->CredentialsFactory, federatedQuerySetup->PqGateway, nullptr); NYql::NDq::RegisterDqPqWriteActorFactory(*factory, *federatedQuerySetup->Driver, federatedQuerySetup->CredentialsFactory, federatedQuerySetup->PqGateway, nullptr); } diff --git a/ydb/core/kqp/compute_actor/ya.make b/ydb/core/kqp/compute_actor/ya.make index 95544a251b24..bf56cc428a19 100644 --- a/ydb/core/kqp/compute_actor/ya.make +++ b/ydb/core/kqp/compute_actor/ya.make @@ -25,12 +25,13 @@ PEERDIR( ydb/library/formats/arrow/protos ydb/library/formats/arrow/common ydb/library/yql/dq/actors/compute + ydb/library/yql/dq/actors/input_transforms + ydb/library/yql/dq/comp_nodes ydb/library/yql/providers/generic/actors ydb/library/yql/providers/pq/async_io ydb/library/yql/providers/s3/actors_factory ydb/library/yql/providers/solomon/actors yql/essentials/public/issue - ydb/library/yql/dq/comp_nodes ) GENERATE_ENUM_SERIALIZATION(kqp_compute_state.h) diff --git a/ydb/core/kqp/executer_actor/kqp_tasks_graph.cpp b/ydb/core/kqp/executer_actor/kqp_tasks_graph.cpp index fed87977e4fc..14c5f630352d 100644 --- a/ydb/core/kqp/executer_actor/kqp_tasks_graph.cpp +++ b/ydb/core/kqp/executer_actor/kqp_tasks_graph.cpp @@ -517,6 +517,47 @@ void TKqpTasksGraph::BuildVectorResolveChannels(const TStageInfo& stageInfo, ui3 inputStageInfo, outputIndex, enableSpilling, logFunc); } +void TKqpTasksGraph::BuildDqSourceStreamLookupChannels(const TStageInfo& stageInfo, ui32 inputIndex, const TStageInfo& inputStageInfo, + ui32 outputIndex, const NKqpProto::TKqpPhyCnDqSourceStreamLookup& dqSourceStreamLookup, const TChannelLogFunc& logFunc) { + YQL_ENSURE(stageInfo.Tasks.size() == 1); + + auto* settings = GetMeta().Allocate(); + settings->SetLeftLabel(dqSourceStreamLookup.GetLeftLabel()); + settings->SetRightLabel(dqSourceStreamLookup.GetRightLabel()); + settings->SetJoinType(dqSourceStreamLookup.GetJoinType()); + settings->SetNarrowInputRowType(dqSourceStreamLookup.GetConnectionInputRowType()); + settings->SetNarrowOutputRowType(dqSourceStreamLookup.GetConnectionOutputRowType()); + settings->SetCacheLimit(dqSourceStreamLookup.GetCacheLimit()); + settings->SetCacheTtlSeconds(dqSourceStreamLookup.GetCacheTtlSeconds()); + settings->SetMaxDelayedRows(dqSourceStreamLookup.GetMaxDelayedRows()); + settings->SetIsMultiget(dqSourceStreamLookup.GetIsMultiGet()); + + const auto& leftJointKeys = dqSourceStreamLookup.GetLeftJoinKeyNames(); + settings->MutableLeftJoinKeyNames()->Assign(leftJointKeys.begin(), leftJointKeys.end()); + + const auto& rightJointKeys = dqSourceStreamLookup.GetRightJoinKeyNames(); + settings->MutableRightJoinKeyNames()->Assign(rightJointKeys.begin(), rightJointKeys.end()); + + auto& streamLookupSource = *settings->MutableRightSource(); + streamLookupSource.SetSerializedRowType(dqSourceStreamLookup.GetLookupRowType()); + const auto& compiledSource = dqSourceStreamLookup.GetLookupSource(); + streamLookupSource.SetProviderName(compiledSource.GetType()); + *streamLookupSource.MutableLookupSource() = compiledSource.GetSettings(); + + TTransform dqSourceStreamLookupTransform = { + .Type = "StreamLookupInputTransform", + .InputType = dqSourceStreamLookup.GetInputStageRowType(), + .OutputType = dqSourceStreamLookup.GetOutputStageRowType(), + }; + YQL_ENSURE(dqSourceStreamLookupTransform.Settings.PackFrom(*settings)); + + for (const auto taskId : stageInfo.Tasks) { + GetTask(taskId).Inputs[inputIndex].Transform = dqSourceStreamLookupTransform; + } + + BuildUnionAllChannels(*this, stageInfo, inputIndex, inputStageInfo, outputIndex, /* enableSpilling */ false, logFunc); +} + void TKqpTasksGraph::BuildKqpStageChannels(TStageInfo& stageInfo, ui64 txId, bool enableSpilling, bool enableShuffleElimination) { auto& stage = stageInfo.Meta.GetStage(stageInfo.Id); @@ -709,6 +750,12 @@ void TKqpTasksGraph::BuildKqpStageChannels(TStageInfo& stageInfo, ui64 txId, boo break; } + case NKqpProto::TKqpPhyConnection::kDqSourceStreamLookup: { + BuildDqSourceStreamLookupChannels(stageInfo, inputIdx, inputStageInfo, outputIdx, + input.GetDqSourceStreamLookup(), log); + break; + } + default: YQL_ENSURE(false, "Unexpected stage input type: " << (ui32)input.GetTypeCase()); } @@ -968,6 +1015,8 @@ void TKqpTasksGraph::FillChannelDesc(NDqProto::TChannel& channelDesc, const TCha channelDesc.SetSrcTaskId(channel.SrcTask); channelDesc.SetDstTaskId(channel.DstTask); channelDesc.SetEnableSpilling(enableSpilling); + channelDesc.SetCheckpointingMode(channel.CheckpointingMode); + channelDesc.SetWatermarksMode(channel.WatermarksMode); const auto& resultChannelProxies = GetMeta().ResultChannelProxies; @@ -1369,6 +1418,8 @@ void TKqpTasksGraph::FillInputDesc(NYql::NDqProto::TTaskInput& inputDesc, const } transformProto->MutableSettings()->PackFrom(*input.Meta.VectorResolveSettings); + } else { + *transformProto->MutableSettings() = input.Transform->Settings; } } } @@ -1724,6 +1775,7 @@ bool TKqpTasksGraph::BuildComputeTasks(TStageInfo& stageInfo, const ui32 nodesCo case NKqpProto::TKqpPhyConnection::kMap: case NKqpProto::TKqpPhyConnection::kParallelUnionAll: case NKqpProto::TKqpPhyConnection::kVectorResolve: + case NKqpProto::TKqpPhyConnection::kDqSourceStreamLookup: break; default: YQL_ENSURE(false, "Unexpected connection type: " << (ui32)input.GetTypeCase() << Endl); diff --git a/ydb/core/kqp/executer_actor/kqp_tasks_graph.h b/ydb/core/kqp/executer_actor/kqp_tasks_graph.h index b559526ec559..7ae57f5df01f 100644 --- a/ydb/core/kqp/executer_actor/kqp_tasks_graph.h +++ b/ydb/core/kqp/executer_actor/kqp_tasks_graph.h @@ -422,6 +422,8 @@ class TKqpTasksGraph : public NYql::NDq::TDqTasksGraph + +#include + +namespace NKikimr::NKqp { + +TStreamingQuerySettings& TStreamingQuerySettings::FromProto(const NKikimrSchemeOp::TStreamingQueryProperties& info) { + for (const auto& [name, value] : info.GetProperties()) { + if (name == TStreamingQueryMeta::TSqlSettings::QUERY_TEXT_FEATURE) { + QueryText = value; + } else if (name == TStreamingQueryMeta::TProperties::Run) { + Run = value == "true"; + } else if (name == TStreamingQueryMeta::TProperties::ResourcePool) { + ResourcePool = value; + } + } + + return *this; +} + +} // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/gateway/behaviour/streaming_query/common/utils.h b/ydb/core/kqp/gateway/behaviour/streaming_query/common/utils.h new file mode 100644 index 000000000000..5a295a4f30f3 --- /dev/null +++ b/ydb/core/kqp/gateway/behaviour/streaming_query/common/utils.h @@ -0,0 +1,44 @@ +#pragma once + +#include + +#include + +namespace NKikimrSchemeOp { + +class TStreamingQueryProperties; + +} // namespace NKikimrSchemeOp + +namespace NKikimr::NKqp { + +class TStreamingQueryMeta { +public: + struct TColumns { + static inline constexpr char DatabaseId[] = "database_id"; + static inline constexpr char QueryPath[] = "query_path"; + static inline constexpr char State[] = "state"; + }; + + // Properties which crated during query translation + using TSqlSettings = NSQLTranslationV1::TStreamingQuerySettings; + + struct TProperties { + static inline constexpr char Run[] = "run"; + static inline constexpr char ResourcePool[] = "resource_pool"; + static inline constexpr char Force[] = "force"; + }; +}; + +// Used for properties parsing after describing streaming query +class TStreamingQuerySettings { +public: + TStreamingQuerySettings& FromProto(const NKikimrSchemeOp::TStreamingQueryProperties& info); + +public: + TString QueryText; + bool Run = false; + TString ResourcePool; +}; + +} // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/gateway/behaviour/streaming_query/common/ya.make b/ydb/core/kqp/gateway/behaviour/streaming_query/common/ya.make new file mode 100644 index 000000000000..43bbfbd1b390 --- /dev/null +++ b/ydb/core/kqp/gateway/behaviour/streaming_query/common/ya.make @@ -0,0 +1,14 @@ +LIBRARY() + +SRCS( + utils.cpp +) + +PEERDIR( + ydb/core/protos + yql/essentials/sql/v1 +) + +YQL_LAST_ABI_VERSION() + +END() diff --git a/ydb/core/kqp/gateway/behaviour/streaming_query/object.h b/ydb/core/kqp/gateway/behaviour/streaming_query/object.h index f7fa9da23aff..9b3115bd70c6 100644 --- a/ydb/core/kqp/gateway/behaviour/streaming_query/object.h +++ b/ydb/core/kqp/gateway/behaviour/streaming_query/object.h @@ -1,31 +1,16 @@ #pragma once +#include #include #include -#include namespace NKikimr::NKqp { -class TStreamingQueryConfig { +class TStreamingQueryConfig : public TStreamingQueryMeta { public: using TStatus = NMetadata::NModifications::IOperationsManager::TYqlConclusionStatus; using TAsyncStatus = NThreading::TFuture; - struct TColumns { - static inline constexpr char DatabaseId[] = "database_id"; - static inline constexpr char QueryPath[] = "query_path"; - static inline constexpr char State[] = "state"; - }; - - // Properties which crated during query translation - using TSqlSettings = NSQLTranslationV1::TStreamingQuerySettings; - - struct TProperties { - static inline constexpr char Run[] = "run"; - static inline constexpr char ResourcePool[] = "resource_pool"; - static inline constexpr char Force[] = "force"; - }; - static NMetadata::IClassBehaviour::TPtr GetBehaviour(); static TString GetTypeId(); diff --git a/ydb/core/kqp/gateway/behaviour/streaming_query/optimization.cpp b/ydb/core/kqp/gateway/behaviour/streaming_query/optimization.cpp index 450d2132af40..5192c2ae7183 100644 --- a/ydb/core/kqp/gateway/behaviour/streaming_query/optimization.cpp +++ b/ydb/core/kqp/gateway/behaviour/streaming_query/optimization.cpp @@ -39,8 +39,12 @@ bool ExploreStreamingQueryNode(TExprNode::TPtr node, TStreamingExploreCtx& res) const auto providerArg = node->ChildPtr(1); if (const auto maybeDataSource = TMaybeNode(providerArg)) { const auto dataSourceCategory = maybeDataSource.Cast().Category().Value(); - if (IsIn({NYql::PqProviderName, NYql::S3ProviderName, NYql::GenericProviderName}, dataSourceCategory)) { - res.StreamingReads += dataSourceCategory == NYql::PqProviderName; + if (dataSourceCategory == NYql::PqProviderName) { + ++res.StreamingReads; + return true; + } + + if (IsIn({NYql::S3ProviderName, NYql::GenericProviderName}, dataSourceCategory)) { return true; } @@ -60,6 +64,10 @@ bool ExploreStreamingQueryNode(TExprNode::TPtr node, TStreamingExploreCtx& res) return true; } + if (dataSinkCategory == NYql::SolomonProviderName) { + return true; + } + if (dataSinkCategory == NYql::ResultProviderName) { res.Ctx.AddError(NYql::TIssue(res.Ctx.GetPosition(node->Pos()), "Results is not allowed for streaming queries, please use INSERT to record the query result")); } else if (dataSinkCategory == NYql::KikimrProviderName) { diff --git a/ydb/core/kqp/gateway/behaviour/streaming_query/queries.cpp b/ydb/core/kqp/gateway/behaviour/streaming_query/queries.cpp index 86ae38dd3b83..cf753872e435 100644 --- a/ydb/core/kqp/gateway/behaviour/streaming_query/queries.cpp +++ b/ydb/core/kqp/gateway/behaviour/streaming_query/queries.cpp @@ -195,23 +195,6 @@ struct TEvPrivate { //// Common -NYql::TIssues AddRootIssue(const TString& message, const NYql::TIssues& issues, bool addEmptyRoot = true) { - if (!issues && !addEmptyRoot) { - return {}; - } - - NYql::TIssue rootIssue(message); - for (const auto& issue : issues) { - rootIssue.AddSubIssue(MakeIntrusive(issue)); - } - - return {rootIssue}; -} - -NOperationId::TOperationId OperationIdFromExecutionId(const TString& executionId) { - return NOperationId::TOperationId(ScriptExecutionOperationFromExecutionId(executionId)); -} - TString LogQueryState(const NKikimrKqp::TStreamingQueryState& state) { return TStringBuilder() << "{Status: " << NKikimrKqp::TStreamingQueryState::EStatus_Name(state.GetStatus()) @@ -327,36 +310,6 @@ class TPropertyValidator { TProperties Dst; }; -// Used for properties parsing after describing streaming query -class TStreamingQuerySettings { -public: - TStreamingQuerySettings& FromProto(const NKikimrSchemeOp::TStreamingQueryProperties& info) { - for (const auto& [name, value] : info.GetProperties()) { - if (name == TStreamingQueryConfig::TSqlSettings::QUERY_TEXT_FEATURE) { - QueryText = value; - } else if (name == TStreamingQueryConfig::TProperties::Run) { - Run = value == "true"; - } else if (name == TStreamingQueryConfig::TProperties::ResourcePool) { - ResourcePool = value; - } else { - LOG_E("Ignored unexpected property: " << name); - } - } - - return *this; - } - -private: - static TString LogPrefix() { - return TStringBuilder() << "[TStreamingQuerySettings] "; - } - -public: - TString QueryText; - bool Run = false; - TString ResourcePool; -}; - template class TActionActorBase : public TActorBootstrapped { using TBase = TActorBootstrapped; @@ -491,7 +444,7 @@ class TSchemeActorBase : public TActionActorBase { bool ScheduleRetry(NYql::TIssues issues, bool longDelay = false) { if (!RetryState) { RetryState = TRetryPolicy::GetExponentialBackoffPolicy( - [](bool longDelay){ + [](bool longDelay) { return longDelay ? ERetryErrorClass::LongRetry : ERetryErrorClass::ShortRetry; }, TDuration::MilliSeconds(100), @@ -885,7 +838,7 @@ class TQueryBase : public NKikimr::TQueryBase { ExecuteQuery(__func__, sql, ¶ms, txControl); } - void PersistQueryInfo(NKikimrKqp::TStreamingQueryState state, const TTxControl& txControl) { + void PersistQueryInfo(const NKikimrKqp::TStreamingQueryState& state, const TTxControl& txControl) { const TString sql = fmt::format(R"( DECLARE $database_id AS Text; DECLARE $query_path AS Text; diff --git a/ydb/core/kqp/gateway/behaviour/streaming_query/ya.make b/ydb/core/kqp/gateway/behaviour/streaming_query/ya.make index a8d98143cb60..87374b4cc62f 100644 --- a/ydb/core/kqp/gateway/behaviour/streaming_query/ya.make +++ b/ydb/core/kqp/gateway/behaviour/streaming_query/ya.make @@ -17,6 +17,7 @@ PEERDIR( ydb/core/cms/console ydb/core/kqp/common ydb/core/kqp/common/events + ydb/core/kqp/gateway/behaviour/streaming_query/common ydb/core/kqp/gateway/utils ydb/core/kqp/provider ydb/core/protos @@ -38,3 +39,7 @@ PEERDIR( YQL_LAST_ABI_VERSION() END() + +RECURSE( + common +) diff --git a/ydb/core/kqp/host/kqp_host.cpp b/ydb/core/kqp/host/kqp_host.cpp index e923c15f697f..3f920aad6c92 100644 --- a/ydb/core/kqp/host/kqp_host.cpp +++ b/ydb/core/kqp/host/kqp_host.cpp @@ -1864,6 +1864,8 @@ class TKqpHost : public IKqpHost { auto solomonState = MakeIntrusive(); + solomonState->SupportRtmrMode = false; + solomonState->WriteThroughDqIntegration = true; solomonState->Types = TypesCtx.Get(); solomonState->Gateway = FederatedQuerySetup->SolomonGateway; solomonState->CredentialsFactory = FederatedQuerySetup->CredentialsFactory; @@ -1942,6 +1944,7 @@ class TKqpHost : public IKqpHost { if (FederatedQuerySetup->PqGateway) { InitPqProvider(); } + TypesCtx->StreamLookupJoin = true; } InitPgProvider(); diff --git a/ydb/core/kqp/node_service/kqp_node_service.cpp b/ydb/core/kqp/node_service/kqp_node_service.cpp index 258ebd005655..07e88d79b359 100644 --- a/ydb/core/kqp/node_service/kqp_node_service.cpp +++ b/ydb/core/kqp/node_service/kqp_node_service.cpp @@ -77,7 +77,6 @@ class TKqpNodeService : public TActorBootstrapped { , CaFactory_(std::move(caFactory)) , AsyncIoFactory(std::move(asyncIoFactory)) , FederatedQuerySetup(federatedQuerySetup) - , State_(std::make_shared()) , AccountDefaultPoolInScheduler(config.GetComputeSchedulerSettings().GetAccountDefaultPool()) { if (config.HasIteratorReadsRetrySettings()) { @@ -94,6 +93,8 @@ class TKqpNodeService : public TActorBootstrapped { void Bootstrap() { LOG_I("Starting KQP Node service"); + State_ = std::make_shared(ActorContext().ActorSystem()); + // Subscribe for TableService config changes ui32 tableServiceConfigKind = (ui32) NKikimrConsole::TConfigItem::TableServiceConfigItem; Send(NConsole::MakeConfigsDispatcherID(SelfId().NodeId()), diff --git a/ydb/core/kqp/node_service/kqp_node_service.h b/ydb/core/kqp/node_service/kqp_node_service.h index 3bb0e7b98f8f..b22c873b3d84 100644 --- a/ydb/core/kqp/node_service/kqp_node_service.h +++ b/ydb/core/kqp/node_service/kqp_node_service.h @@ -74,13 +74,18 @@ struct TEvKqpNode { struct TNodeServiceState : public NKikimr::NKqp::NComputeActor::IKqpNodeState { - TNodeServiceState() = default; static constexpr ui64 BucketsCount = 64; + const TActorSystem* ActorSystem = nullptr; + public: + explicit TNodeServiceState(const TActorSystem* actorSystem) + : ActorSystem(actorSystem) + {} + void OnTaskTerminate(ui64 txId, ui64 taskId, bool success) override { auto& bucket = GetStateBucketByTx(txId); - bucket.RemoveTask(txId, taskId, success); + bucket.RemoveTask(txId, taskId, success, ActorSystem); } NKqpNode::TState& GetStateBucketByTx(ui64 txId) { diff --git a/ydb/core/kqp/node_service/kqp_node_state.h b/ydb/core/kqp/node_service/kqp_node_state.h index c3c7d0b16f07..3b2fa708a81e 100644 --- a/ydb/core/kqp/node_service/kqp_node_state.h +++ b/ydb/core/kqp/node_service/kqp_node_state.h @@ -81,8 +81,7 @@ class TState { } } - TMaybe RemoveTask(ui64 txId, ui64 taskId, bool success) - { + TMaybe RemoveTask(ui64 txId, ui64 taskId, bool success, const TActorSystem* actorSystem) { TWriteGuard guard(RWLock); YQL_ENSURE(Requests.size() == SenderIdsByTxId.size()); const auto senders = SenderIdsByTxId.equal_range(txId); @@ -107,8 +106,7 @@ class TState { if (auto query = requestIt->second.Query) { auto removeQueryEvent = MakeHolder(); removeQueryEvent->Query = query; - const auto& actorCtx = NActors::TActorContext::AsActorContext(); - actorCtx.Send(MakeKqpSchedulerServiceId(actorCtx.SelfID.NodeId()), removeQueryEvent.Release()); + actorSystem->Send(MakeKqpSchedulerServiceId(actorSystem->NodeId), removeQueryEvent.Release()); } Requests.erase(*senderIt); diff --git a/ydb/core/kqp/opt/kqp_opt_effects.cpp b/ydb/core/kqp/opt/kqp_opt_effects.cpp index 5fa0e0a2cec9..9e2d5a103e01 100644 --- a/ydb/core/kqp/opt/kqp_opt_effects.cpp +++ b/ydb/core/kqp/opt/kqp_opt_effects.cpp @@ -525,20 +525,14 @@ bool BuildEffects(TPositionHandle pos, const TVector& effects, TKqlExternalEffect externalEffect = maybeExt.Cast(); TExprBase input = externalEffect.Input(); auto maybeStage = input.Maybe(); - if (!maybeStage) { - return false; - } + YQL_ENSURE(maybeStage, "External effect should be a DQ stage"); auto stage = maybeStage.Cast(); const auto outputsList = stage.Outputs(); - if (!outputsList) { - return false; - } + YQL_ENSURE(outputsList, "External effect DQ stage should have at least one output"); TDqStageOutputsList outputs = outputsList.Cast(); YQL_ENSURE(outputs.Size() == 1, "Multiple sinks are not supported yet"); TDqOutputAnnotationBase output = outputs.Item(0); - if (!output.Maybe()) { - return false; - } + YQL_ENSURE(TDqSink::Match(output.Raw()), "External effect DQ stage should have DQ sink as first output"); newEffect = Build(ctx, effect.Pos()) .Stage(maybeStage.Cast().Ptr()) .SinkIndex().Build("0") diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log.cpp index cb44be02dfab..22fb922a0dcd 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_log.cpp @@ -41,6 +41,7 @@ class TKqpLogicalOptTransformer : public TOptimizeTransformerBase { AddHandler(0, &TCoTake::Match, HNDL(RewriteTakeSortToTopSort)); AddHandler(0, &TCoFlatMap::Match, HNDL(RewriteSqlInToEquiJoin)); AddHandler(0, &TCoFlatMap::Match, HNDL(RewriteSqlInCompactToJoin)); + AddHandler(0, &TCoEquiJoin::Match, HNDL(RewriteStreamEquiJoinWithLookup)); AddHandler(0, &TCoEquiJoin::Match, HNDL(OptimizeEquiJoinWithCosts)); AddHandler(0, &TCoEquiJoin::Match, HNDL(RewriteEquiJoin)); AddHandler(0, &TDqJoin::Match, HNDL(JoinToIndexLookup)); @@ -167,6 +168,12 @@ class TKqpLogicalOptTransformer : public TOptimizeTransformerBase { return output; } + TMaybeNode RewriteStreamEquiJoinWithLookup(TExprBase node, TExprContext& ctx) { + TExprBase output = DqRewriteStreamEquiJoinWithLookup(node, ctx, TypesCtx); + DumpAppliedRule("KqpRewriteStreamEquiJoinWithLookup", node.Ptr(), output.Ptr(), ctx); + return output; + } + TMaybeNode OptimizeEquiJoinWithCosts(TExprBase node, TExprContext& ctx) { auto maxDPhypDPTableSize = Config->MaxDPHypDPTableSize.Get().GetOrElse(TDqSettings::TDefault::MaxDPHypDPTableSize); auto optLevel = Config->CostBasedOptimizationLevel.Get().GetOrElse(Config->DefaultCostBasedOptimizationLevel); diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy.cpp b/ydb/core/kqp/opt/physical/kqp_opt_phy.cpp index a130f5eb9ef6..32b99a6f1a36 100644 --- a/ydb/core/kqp/opt/physical/kqp_opt_phy.cpp +++ b/ydb/core/kqp/opt/physical/kqp_opt_phy.cpp @@ -71,6 +71,7 @@ class TKqpPhysicalOptTransformer : public TOptimizeTransformerBase { AddHandler(0, &TCoExtendBase::Match, HNDL(BuildExtendStage)); AddHandler(0, &TDqJoin::Match, HNDL(RewriteRightJoinToLeft)); AddHandler(0, &TDqJoin::Match, HNDL(RewriteLeftPureJoin)); + AddHandler(0, &TDqJoin::Match, HNDL(RewriteStreamLookupJoin)); AddHandler(0, &TDqJoin::Match, HNDL(BuildJoin)); AddHandler(0, &TDqPrecompute::Match, HNDL(BuildPrecompute)); AddHandler(0, &TCoLMap::Match, HNDL(PushLMapToStage)); @@ -507,6 +508,14 @@ class TKqpPhysicalOptTransformer : public TOptimizeTransformerBase { return output; } + TMaybeNode RewriteStreamLookupJoin(TExprBase node, TExprContext& ctx) { + TMaybeNode output = DqRewriteStreamLookupJoin(node, ctx); + if (output) { + DumpAppliedRule("RewriteStreamLookupJoin", node.Ptr(), output.Cast().Ptr(), ctx); + } + return output; + } + template TMaybeNode BuildJoin(TExprBase node, TExprContext& ctx, IOptimizationContext& optCtx, const TGetParents& getParents) diff --git a/ydb/core/kqp/provider/yql_kikimr_datasink.cpp b/ydb/core/kqp/provider/yql_kikimr_datasink.cpp index b276eacd8919..3d64d66313fa 100644 --- a/ydb/core/kqp/provider/yql_kikimr_datasink.cpp +++ b/ydb/core/kqp/provider/yql_kikimr_datasink.cpp @@ -200,9 +200,8 @@ class TKiSinkIntentDeterminationTransformer: public TKiSinkVisitorTransformer { } TStatus HandleModifyPermissions(TKiModifyPermissions node, TExprContext& ctx) override { - ctx.AddError(TIssue(ctx.GetPosition(node.Pos()), TStringBuilder() - << "ModifyPermissions is not yet implemented for intent determination transformer")); - return TStatus::Error; + Y_UNUSED(ctx, node); + return TStatus::Ok; } TStatus HandleCreateBackupCollection(TKiCreateBackupCollection node, TExprContext& ctx) override { @@ -242,27 +241,23 @@ class TKiSinkIntentDeterminationTransformer: public TKiSinkVisitorTransformer { } TStatus HandleCreateUser(TKiCreateUser node, TExprContext& ctx) override { - ctx.AddError(TIssue(ctx.GetPosition(node.Pos()), TStringBuilder() - << "CreateUser is not yet implemented for intent determination transformer")); - return TStatus::Error; + Y_UNUSED(ctx, node); + return TStatus::Ok; } TStatus HandleAlterUser(TKiAlterUser node, TExprContext& ctx) override { - ctx.AddError(TIssue(ctx.GetPosition(node.Pos()), TStringBuilder() - << "AlterUser is not yet implemented for intent determination transformer")); - return TStatus::Error; + Y_UNUSED(ctx, node); + return TStatus::Ok; } TStatus HandleDropUser(TKiDropUser node, TExprContext& ctx) override { - ctx.AddError(TIssue(ctx.GetPosition(node.Pos()), TStringBuilder() - << "DropUser is not yet implemented for intent determination transformer")); - return TStatus::Error; + Y_UNUSED(ctx, node); + return TStatus::Ok; } TStatus HandleUpsertObject(TKiUpsertObject node, TExprContext& ctx) override { - ctx.AddError(TIssue(ctx.GetPosition(node.Pos()), TStringBuilder() - << "UpsertObject is not yet implemented for intent determination transformer")); - return TStatus::Error; + Y_UNUSED(ctx, node); + return TStatus::Ok; } TStatus HandleCreateObject(TKiCreateObject node, TExprContext& ctx) override { @@ -284,50 +279,42 @@ class TKiSinkIntentDeterminationTransformer: public TKiSinkVisitorTransformer { } TStatus HandleCreateGroup(TKiCreateGroup node, TExprContext& ctx) override { - ctx.AddError(TIssue(ctx.GetPosition(node.Pos()), TStringBuilder() - << "CreateGroup is not yet implemented for intent determination transformer")); - return TStatus::Error; + Y_UNUSED(ctx, node); + return TStatus::Ok; } TStatus HandleAlterGroup(TKiAlterGroup node, TExprContext& ctx) override { - ctx.AddError(TIssue(ctx.GetPosition(node.Pos()), TStringBuilder() - << "AlterGroup is not yet implemented for intent determination transformer")); - return TStatus::Error; + Y_UNUSED(ctx, node); + return TStatus::Ok; } TStatus HandleRenameGroup(TKiRenameGroup node, TExprContext& ctx) override { - ctx.AddError(TIssue(ctx.GetPosition(node.Pos()), TStringBuilder() - << "RenameGroup is not yet implemented for intent determination transformer")); - return TStatus::Error; + Y_UNUSED(ctx, node); + return TStatus::Ok; } TStatus HandleDropGroup(TKiDropGroup node, TExprContext& ctx) override { - ctx.AddError(TIssue(ctx.GetPosition(node.Pos()), TStringBuilder() - << "DropGroup is not yet implemented for intent determination transformer")); - return TStatus::Error; + Y_UNUSED(ctx, node); + return TStatus::Ok; } TStatus HandlePgDropObject(TPgDropObject node, TExprContext& ctx) override { - ctx.AddError(TIssue(ctx.GetPosition(node.Pos()), TStringBuilder() - << "PgDropObject is not yet implemented for intent determination transformer")); - return TStatus::Error; + Y_UNUSED(ctx, node); + return TStatus::Ok; } TStatus HandleCreateSecret(TKiCreateSecret node, TExprContext& ctx) override { - ctx.AddError(TIssue(ctx.GetPosition(node.Pos()), TStringBuilder() - << "CreateSecret is not yet implemented for intent determination transformer")); + Y_UNUSED(ctx, node); return TStatus::Ok; } TStatus HandleAlterSecret(TKiAlterSecret node, TExprContext& ctx) override { - ctx.AddError(TIssue(ctx.GetPosition(node.Pos()), TStringBuilder() - << "AlterSecret is not yet implemented for intent determination transformer")); + Y_UNUSED(ctx, node); return TStatus::Ok; } TStatus HandleDropSecret(TKiDropSecret node, TExprContext& ctx) override { - ctx.AddError(TIssue(ctx.GetPosition(node.Pos()), TStringBuilder() - << "DropSecret is not yet implemented for intent determination transformer")); + Y_UNUSED(ctx, node); return TStatus::Ok; } diff --git a/ydb/core/kqp/provider/yql_kikimr_datasource.cpp b/ydb/core/kqp/provider/yql_kikimr_datasource.cpp index 87e92b3e3851..f24e34923a9f 100644 --- a/ydb/core/kqp/provider/yql_kikimr_datasource.cpp +++ b/ydb/core/kqp/provider/yql_kikimr_datasource.cpp @@ -666,7 +666,8 @@ class TKikimrDataSource : public TDataProviderBase { node.IsCallable(TDqReadWrap::CallableName()) || node.IsCallable(TDqReadWideWrap::CallableName()) || node.IsCallable(TDqReadBlockWideWrap::CallableName()) || - node.IsCallable(TDqSource::CallableName()) + node.IsCallable(TDqSource::CallableName()) || + node.IsCallable(TDqLookupSourceWrap::CallableName()) ) ) { diff --git a/ydb/core/kqp/proxy_service/kqp_script_executions.cpp b/ydb/core/kqp/proxy_service/kqp_script_executions.cpp index 292a56c3e58a..4c19e619269c 100644 --- a/ydb/core/kqp/proxy_service/kqp_script_executions.cpp +++ b/ydb/core/kqp/proxy_service/kqp_script_executions.cpp @@ -9,26 +9,28 @@ #include #include #include -#include +#include +#include +#include +#include #include +#include #include -#include #include #include -#include #include #include +#include + +#include + +#include -#include -#include -#include -#include #include #include #include #include -#include #include #include @@ -51,18 +53,6 @@ constexpr TDuration DEADLINE_OFFSET = TDuration::Minutes(20); constexpr TDuration BRO_RUN_INTERVAL = TDuration::Minutes(60); constexpr ui64 MAX_TRANSIENT_ISSUES_COUNT = 10; -TString SerializeIssues(const NYql::TIssues& issues) { - NYql::TIssue root; - for (const NYql::TIssue& issue : issues) { - root.AddSubIssue(MakeIntrusive(issue)); - } - Ydb::Issue::IssueMessage rootMessage; - if (issues) { - NYql::IssueToMessage(root, &rootMessage); - } - return NProtobufJson::Proto2Json(rootMessage, NProtobufJson::TProto2JsonConfig()); -} - NYql::TIssues DeserializeIssues(const std::string& issuesSerialized) { Ydb::Issue::IssueMessage rootMessage = NProtobufJson::Json2Proto(issuesSerialized); NYql::TIssue root = NYql::IssueFromMessage(rootMessage); @@ -310,19 +300,6 @@ NKikimrKqp::EQueryAction GetActionFromExecMode(Ydb::Query::ExecMode execMode) { } } -NYql::TIssues AddRootIssue(const TString& message, const NYql::TIssues& issues, bool force = false) { - if (!issues && !force) { - return {}; - } - - NYql::TIssue rootIssue(message); - for (const auto& issue : issues) { - rootIssue.AddSubIssue(MakeIntrusive(issue)); - } - - return {rootIssue}; -} - class TCreateScriptOperationQuery : public TQueryBase { public: TCreateScriptOperationQuery(const TString& executionId, const TActorId& runScriptActorId, @@ -432,7 +409,7 @@ class TCreateScriptOperationQuery : public TQueryBase { .Utf8(token.GetUserSID()) .Build() .AddParam("$user_group_sids") - .JsonDocument(SerializeGroupSids(token.GetGroupSIDs())) + .JsonDocument(SequenceToJsonString(token.GetGroupSIDs())) .Build() .AddParam("$parameters") .String(SerializeParameters()) @@ -466,22 +443,6 @@ class TCreateScriptOperationQuery : public TQueryBase { } private: - static TString SerializeGroupSids(const TVector& groupSids) { - NJson::TJsonValue value; - value.SetType(NJson::EJsonValueType::JSON_ARRAY); - - NJson::TJsonValue::TArray& jsonArray = value.GetArraySafe(); - jsonArray.resize(groupSids.size()); - for (size_t i = 0; i < groupSids.size(); ++i) { - jsonArray[i] = NJson::TJsonValue(groupSids[i]); - } - - NJsonWriter::TBuf serializedGroupSids; - serializedGroupSids.WriteJsonValue(&value, false, PREC_NDIGITS, 17); - - return serializedGroupSids.Str(); - } - TString SerializeParameters() const { NJson::TJsonValue value; value.SetType(NJson::EJsonValueType::JSON_MAP); @@ -2134,7 +2095,8 @@ class TGetScriptExecutionOperationQueryActor : public TQueryBase { ast, ast_compressed, ast_compression_method, - graph_compressed IS NOT NULL AS has_graph + graph_compressed IS NOT NULL AS has_graph, + retry_state FROM `.metadata/script_executions` WHERE database = $database AND execution_id = $execution_id AND (expire_at > CurrentUtcTimestamp() OR expire_at IS NULL); @@ -2245,6 +2207,16 @@ class TGetScriptExecutionOperationQueryActor : public TQueryBase { ScriptExecutionRunnerActorIdFromString(*runScriptActorIdString, Response->RunScriptActorId); } + if (const auto& serializedRetryState = result.ColumnParser("retry_state").GetOptionalJsonDocument()) { + NJson::TJsonValue value; + if (!NJson::ReadJsonTree(*serializedRetryState, &value)) { + Finish(Ydb::StatusIds::INTERNAL_ERROR, "Retry state is corrupted"); + return; + } + + NProtobufJson::Json2Proto(value, Response->RetryState); + } + Response->StateSaved = result.ColumnParser("has_graph").GetBool(); } @@ -2263,14 +2235,14 @@ class TGetScriptExecutionOperationQueryActor : public TQueryBase { return; } + Response->LeaseDeadline = *leaseDeadline; Response->LeaseGeneration = *leaseGenerationInDatabase; LeaseStatus = static_cast(result.ColumnParser("lease_state").GetOptionalInt32().value_or(static_cast(ELeaseState::ScriptRunning))); + Response->WaitRetry = *LeaseStatus == ELeaseState::WaitRetry; if (*leaseDeadline < StartActorTime) { Response->LeaseExpired = true; - if (*LeaseStatus == ELeaseState::WaitRetry) { - Response->RetryRequired = true; - } else { + if (*LeaseStatus != ELeaseState::WaitRetry) { Response->FinalizationStatus = EFinalizationStatus::FS_ROLLBACK; } } @@ -2391,16 +2363,23 @@ class TGetScriptExecutionOperationActor : public TCheckLeaseStatusActorBase { << ", Issues: " << Response->Get()->Issues.ToOneLineString() << ", Ready: " << Response->Get()->Ready << ", LeaseExpired: " << Response->Get()->LeaseExpired - << ", RetryRequired: " << Response->Get()->RetryRequired + << ", WaitRetry: " << Response->Get()->WaitRetry << (Response->Get()->FinalizationStatus ? (TStringBuilder() << ", FinalizationStatus: " << static_cast(*Response->Get()->FinalizationStatus)) : TStringBuilder()) << ", RunScriptActorId: " << Response->Get()->RunScriptActorId << ", LeaseGeneration: " << Response->Get()->LeaseGeneration); + if (!Request->Get()->CheckLeaseState) { + Reply(); + return; + } + const auto& event = *Response->Get(); - if (event.RetryRequired) { - RestartScriptExecution(event.LeaseGeneration); - } else if (event.LeaseExpired) { - StartLeaseChecking(event.RunScriptActorId, event.LeaseGeneration); + if (event.LeaseExpired) { + if (event.WaitRetry) { + RestartScriptExecution(event.LeaseGeneration); + } else { + StartLeaseChecking(event.RunScriptActorId, event.LeaseGeneration); + } } else if (const auto finalizationStatus = event.FinalizationStatus) { TMaybe execStatus; if (Response->Get()->Ready) { @@ -2419,11 +2398,16 @@ class TGetScriptExecutionOperationActor : public TCheckLeaseStatusActorBase { TMaybe metadata; metadata.ConstructInPlace().PackFrom(event.Metadata); + const auto& retryState = event.RetryState; + Send(Request->Sender, new TEvGetScriptExecutionOperationResponse(event.Status, { .Metadata = std::move(metadata), .Ready = event.Ready, .StateSaved = event.StateSaved, - }, event.Issues)); + .RetryCount = retryState.GetRetryCounter(), + .LastFailAt = NProtoInterop::CastFromProto(retryState.GetRetryCounterUpdatedAt()), + .SuspendedUntil = event.WaitRetry ? event.LeaseDeadline : TInstant::Zero(), + }, event.Issues), 0, Request->Cookie); PassAway(); } @@ -2763,8 +2747,15 @@ class TResetScriptExecutionRetriesQueryActor : public TQueryBase { // Script execution info if (NYdb::TResultSetParser result(ResultSets[0]); result.TryNextRow()) { - if (result.ColumnParser("retry_state").GetOptionalJsonDocument()) { - ResetRetryState = true; + if (const auto& serializedRetryState = result.ColumnParser("retry_state").GetOptionalJsonDocument()) { + NJson::TJsonValue value; + if (!NJson::ReadJsonTree(*serializedRetryState, &value)) { + Finish(Ydb::StatusIds::INTERNAL_ERROR, "Retry state is corrupted"); + return; + } + + NProtobufJson::Json2Proto(value, RetryState.emplace()); + RetryState->ClearRetryPolicyMapping(); } } @@ -2775,7 +2766,7 @@ class TResetScriptExecutionRetriesQueryActor : public TQueryBase { } } - if (ResetRetryState || DropLease) { + if (RetryState || DropLease) { DropRetryState(); } else { Finish(); @@ -2787,14 +2778,15 @@ class TResetScriptExecutionRetriesQueryActor : public TQueryBase { -- TResetScriptExecutionRetriesQueryActor::DropRetryState DECLARE $database AS Text; DECLARE $execution_id AS Text; + DECLARE $retry_state AS Optional; )"; - if (ResetRetryState) { + if (RetryState) { sql += R"( UPSERT INTO `.metadata/script_executions` ( database, execution_id, retry_state ) VALUES ( - $database, $execution_id, NULL + $database, $execution_id, $retry_state ); )"; } @@ -2813,6 +2805,9 @@ class TResetScriptExecutionRetriesQueryActor : public TQueryBase { .Build() .AddParam("$execution_id") .Utf8(ExecutionId) + .Build() + .AddParam("$retry_state") + .OptionalJsonDocument(RetryState ? std::optional(NProtobufJson::Proto2Json(*RetryState, NProtobufJson::TProto2JsonConfig())) : std::nullopt) .Build(); SetQueryResultHandler(&TResetScriptExecutionRetriesQueryActor::OnQueryResult, "Drop retry state"); @@ -2830,7 +2825,7 @@ class TResetScriptExecutionRetriesQueryActor : public TQueryBase { private: const TString Database; const TString ExecutionId; - bool ResetRetryState = false; + std::optional RetryState; bool DropLease = false; }; @@ -3606,10 +3601,12 @@ class TSaveScriptExternalEffectActor : public TQueryBase { .Utf8(Request.CustomerSuppliedId) .Build() .AddParam("$script_sinks") - .JsonDocument(SerializeSinks(Request.Sinks)) + .JsonDocument(SequenceToJsonString(Request.Sinks.size(), [&](ui64 i, NJson::TJsonValue& value) { + SerializeBinaryProto(Request.Sinks[i], value); + })) .Build() .AddParam("$script_secret_names") - .JsonDocument(SerializeSecretNames(Request.SecretNames)) + .JsonDocument(SequenceToJsonString(Request.SecretNames)) .Build() .AddParam("$lease_generation") .Int64(LeaseGeneration) @@ -3626,39 +3623,6 @@ class TSaveScriptExternalEffectActor : public TQueryBase { Send(Owner, new TEvSaveScriptExternalEffectResponse(status, std::move(issues))); } -private: - static TString SerializeSinks(const std::vector& sinks) { - NJson::TJsonValue value; - value.SetType(NJson::EJsonValueType::JSON_ARRAY); - - NJson::TJsonValue::TArray& jsonArray = value.GetArraySafe(); - jsonArray.resize(sinks.size()); - for (size_t i = 0; i < sinks.size(); ++i) { - SerializeBinaryProto(sinks[i], jsonArray[i]); - } - - NJsonWriter::TBuf serializedSinks; - serializedSinks.WriteJsonValue(&value, false, PREC_NDIGITS, 17); - - return serializedSinks.Str(); - } - - static TString SerializeSecretNames(const std::vector& secretNames) { - NJson::TJsonValue value; - value.SetType(NJson::EJsonValueType::JSON_ARRAY); - - NJson::TJsonValue::TArray& jsonArray = value.GetArraySafe(); - jsonArray.resize(secretNames.size()); - for (size_t i = 0; i < secretNames.size(); ++i) { - jsonArray[i] = NJson::TJsonValue(secretNames[i]); - } - - NJsonWriter::TBuf serializedSecretNames; - serializedSecretNames.WriteJsonValue(&value, false, PREC_NDIGITS, 17); - - return serializedSecretNames.Str(); - } - private: const TEvSaveScriptExternalEffectRequest::TDescription Request; const i64 LeaseGeneration; @@ -3678,7 +3642,7 @@ LeaseFinalizationInfo GetLeaseFinalizationSql(TInstant now, Ydb::StatusIds::Stat retryState.GetRetryRate() ); - const bool retry = retryLimiter.UpdateOnRetry(TInstant::Now(), policy); + const bool retry = policy && retryLimiter.UpdateOnRetry(TInstant::Now(), *policy); retryState.SetRetryCounter(retryLimiter.RetryCount); *retryState.MutableRetryCounterUpdatedAt() = NProtoInterop::CastToProto(now); retryState.SetRetryRate(retryLimiter.RetryRate); @@ -3701,10 +3665,10 @@ LeaseFinalizationInfo GetLeaseFinalizationSql(TInstant now, Ydb::StatusIds::Stat .NewLeaseState = ELeaseState::WaitRetry }; } else { - if (retryState.RetryPolicyMappingSize()) { + if (policy) { TStringBuilder finalIssue; finalIssue << "Script execution operation failed with code " << Ydb::StatusIds::StatusCode_Name(status); - if (policy.RetryCount) { + if (policy->RetryCount) { finalIssue << " (" << retryLimiter.LastError << ")"; } issues = AddRootIssue(finalIssue << " at " << now, issues); diff --git a/ydb/core/kqp/proxy_service/kqp_script_executions_ut.cpp b/ydb/core/kqp/proxy_service/kqp_script_executions_ut.cpp index ae25400a6016..b580319e4676 100644 --- a/ydb/core/kqp/proxy_service/kqp_script_executions_ut.cpp +++ b/ydb/core/kqp/proxy_service/kqp_script_executions_ut.cpp @@ -649,15 +649,19 @@ Y_UNIT_TEST_SUITE(TestScriptExecutionsUtils) { mapping.MutableBackoffPolicy()->SetRetryRateLimit(84); } - const auto checkStatus = [&](Ydb::StatusIds::StatusCode status, ui64 expectedRateLimit) { + const auto checkStatus = [&](Ydb::StatusIds::StatusCode status, std::optional expectedRateLimit) { const auto policy = TRetryPolicyItem::FromProto(status, retryState); - UNIT_ASSERT_VALUES_EQUAL(policy.RetryCount, expectedRateLimit); + if (expectedRateLimit) { + UNIT_ASSERT_VALUES_EQUAL(policy->RetryCount, *expectedRateLimit); + } else { + UNIT_ASSERT(!policy); + } }; checkStatus(Ydb::StatusIds::SCHEME_ERROR, 42); checkStatus(Ydb::StatusIds::UNAVAILABLE, 84); checkStatus(Ydb::StatusIds::INTERNAL_ERROR, 84); - checkStatus(Ydb::StatusIds::BAD_REQUEST, 0); + checkStatus(Ydb::StatusIds::BAD_REQUEST, std::nullopt); } Y_UNIT_TEST(TestRetryLimiter) { diff --git a/ydb/core/kqp/proxy_service/script_executions_utils/kqp_script_execution_retries.cpp b/ydb/core/kqp/proxy_service/script_executions_utils/kqp_script_execution_retries.cpp index 924d5d1355bc..54c9534d89fb 100644 --- a/ydb/core/kqp/proxy_service/script_executions_utils/kqp_script_execution_retries.cpp +++ b/ydb/core/kqp/proxy_service/script_executions_utils/kqp_script_execution_retries.cpp @@ -11,7 +11,7 @@ TRetryPolicyItem::TRetryPolicyItem(ui64 retryCount, ui64 retryLimit, TDuration r , BackoffPeriod(backoffPeriod) {} -TRetryPolicyItem TRetryPolicyItem::FromProto(Ydb::StatusIds::StatusCode status, const NKikimrKqp::TScriptExecutionRetryState& retryState) { +std::optional TRetryPolicyItem::FromProto(Ydb::StatusIds::StatusCode status, const NKikimrKqp::TScriptExecutionRetryState& retryState) { for (const auto& mapping : retryState.GetRetryPolicyMapping()) { for (const auto mappingStatus : mapping.GetStatusCode()) { if (mappingStatus != status) { @@ -35,7 +35,7 @@ TRetryPolicyItem TRetryPolicyItem::FromProto(Ydb::StatusIds::StatusCode status, } } - return TRetryPolicyItem(); + return std::nullopt; } TRetryLimiter::TRetryLimiter(ui64 retryCount, TInstant retryCounterUpdatedAt, double retryRate) diff --git a/ydb/core/kqp/proxy_service/script_executions_utils/kqp_script_execution_retries.h b/ydb/core/kqp/proxy_service/script_executions_utils/kqp_script_execution_retries.h index b7b8a61b5608..663219e6f13c 100644 --- a/ydb/core/kqp/proxy_service/script_executions_utils/kqp_script_execution_retries.h +++ b/ydb/core/kqp/proxy_service/script_executions_utils/kqp_script_execution_retries.h @@ -14,7 +14,7 @@ class TRetryPolicyItem { TRetryPolicyItem(ui64 retryCount, ui64 retryLimit, TDuration retryPeriod, TDuration backoffPeriod); - static TRetryPolicyItem FromProto(Ydb::StatusIds::StatusCode status, const NKikimrKqp::TScriptExecutionRetryState& mapping); + static std::optional FromProto(Ydb::StatusIds::StatusCode status, const NKikimrKqp::TScriptExecutionRetryState& mapping); ui64 RetryCount = 0; ui64 RetryLimit = 0; diff --git a/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp b/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp index 76b7c3a63f43..84fc1a5d2c3d 100644 --- a/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp +++ b/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp @@ -1,5 +1,6 @@ #include "kqp_query_compiler.h" +#include #include #include #include @@ -8,24 +9,23 @@ #include #include #include -#include - -#include #include +#include #include - -#include #include #include #include -#include -#include -#include -#include +#include #include #include + +#include #include #include +#include +#include +#include +#include namespace NKikimr { @@ -589,6 +589,14 @@ TIssues ApplyOverridePlannerSettings(const TString& overridePlannerJson, NKqpPro return issues; } +TStringBuf RemoveJoinAliases(TStringBuf keyName) { + if (const auto idx = keyName.find_last_of('.'); idx != TString::npos) { + return keyName.substr(idx + 1); + } + + return keyName; +} + class TKqpQueryCompiler : public IKqpQueryCompiler { public: TKqpQueryCompiler(const TString& cluster, const TIntrusivePtr tablesData, @@ -795,7 +803,7 @@ class TKqpQueryCompiler : public IKqpQueryCompiler { auto connection = input.Cast(); auto& protoInput = *stageProto.AddInputs(); - FillConnection(connection, stagesMap, protoInput, ctx, tablesMap, physicalStageByID); + FillConnection(connection, stagesMap, protoInput, ctx, tablesMap, physicalStageByID, &stage, inputIndex); protoInput.SetInputIndex(inputIndex); } } @@ -1017,7 +1025,7 @@ class TKqpQueryCompiler : public IKqpQueryCompiler { auto& resultProto = *txProto.AddResults(); auto& connectionProto = *resultProto.MutableConnection(); - FillConnection(connection, stagesMap, connectionProto, ctx, tablesMap, physicalStageByID); + FillConnection(connection, stagesMap, connectionProto, ctx, tablesMap, physicalStageByID, nullptr, 0); const TTypeAnnotationNode* itemType = nullptr; switch (connectionProto.GetTypeCase()) { @@ -1452,7 +1460,9 @@ class TKqpQueryCompiler : public IKqpQueryCompiler { NKqpProto::TKqpPhyConnection& connectionProto, TExprContext& ctx, THashMap>& tablesMap, - THashMap& physicalStageByID + THashMap& physicalStageByID, + const TDqPhyStage* stage, + ui32 inputIndex ) { auto inputStageIndex = stagesMap.FindPtr(connection.Output().Stage().Ref().UniqueId()); YQL_ENSURE(inputStageIndex, "stage #" << connection.Output().Stage().Ref().UniqueId() << " not found in stages map: " @@ -1819,6 +1829,59 @@ class TKqpQueryCompiler : public IKqpQueryCompiler { return; } + if (auto maybeDqSourceStreamLookup = connection.Maybe()) { + const auto streamLookup = maybeDqSourceStreamLookup.Cast(); + const auto lookupSourceWrap = streamLookup.RightInput().Cast(); + + const TStringBuf dataSourceCategory = lookupSourceWrap.DataSource().Category(); + const auto provider = TypesCtx.DataSourceMap.find(dataSourceCategory); + YQL_ENSURE(provider != TypesCtx.DataSourceMap.end(), "Unsupported data source category: \"" << dataSourceCategory << "\""); + NYql::IDqIntegration* dqIntegration = provider->second->GetDqIntegration(); + YQL_ENSURE(dqIntegration, "Unsupported dq source for provider: \"" << dataSourceCategory << "\""); + + auto& dqSourceLookupCn = *connectionProto.MutableDqSourceStreamLookup(); + auto& lookupSource = *dqSourceLookupCn.MutableLookupSource(); + auto& lookupSourceSettings = *lookupSource.MutableSettings(); + auto& lookupSourceType = *lookupSource.MutableType(); + dqIntegration->FillLookupSourceSettings(lookupSourceWrap.Ref(), lookupSourceSettings, lookupSourceType); + YQL_ENSURE(!lookupSourceSettings.type_url().empty(), "Data source provider \"" << dataSourceCategory << "\" did't fill dq source settings for its dq source node"); + YQL_ENSURE(lookupSourceType, "Data source provider \"" << dataSourceCategory << "\" did't fill dq source settings type for its dq source node"); + + const auto& streamLookupOutput = streamLookup.Output(); + const auto connectionInputRowType = GetSeqItemType(streamLookupOutput.Ref().GetTypeAnn()); + YQL_ENSURE(connectionInputRowType->GetKind() == ETypeAnnotationKind::Struct); + const auto connectionOutputRowType = GetSeqItemType(streamLookup.Ref().GetTypeAnn()); + YQL_ENSURE(connectionOutputRowType->GetKind() == ETypeAnnotationKind::Struct); + YQL_ENSURE(stage); + dqSourceLookupCn.SetConnectionInputRowType(NYql::NCommon::GetSerializedTypeAnnotation(connectionInputRowType)); + dqSourceLookupCn.SetConnectionOutputRowType(NYql::NCommon::GetSerializedTypeAnnotation(connectionOutputRowType)); + dqSourceLookupCn.SetLookupRowType(NYql::NCommon::GetSerializedTypeAnnotation(lookupSourceWrap.RowType().Ref().GetTypeAnn())); + dqSourceLookupCn.SetInputStageRowType(NYql::NCommon::GetSerializedTypeAnnotation(GetSeqItemType(streamLookupOutput.Stage().Program().Ref().GetTypeAnn()))); + dqSourceLookupCn.SetOutputStageRowType(NYql::NCommon::GetSerializedTypeAnnotation(GetSeqItemType(stage->Program().Args().Arg(inputIndex).Ref().GetTypeAnn()))); + + const TString leftLabel(streamLookup.LeftLabel()); + dqSourceLookupCn.SetLeftLabel(leftLabel); + dqSourceLookupCn.SetRightLabel(streamLookup.RightLabel().StringValue()); + dqSourceLookupCn.SetJoinType(streamLookup.JoinType().StringValue()); + dqSourceLookupCn.SetCacheLimit(FromString(streamLookup.MaxCachedRows())); + dqSourceLookupCn.SetCacheTtlSeconds(FromString(streamLookup.TTL())); + dqSourceLookupCn.SetMaxDelayedRows(FromString(streamLookup.MaxDelayedRows())); + + if (const auto maybeMultiget = streamLookup.IsMultiget()) { + dqSourceLookupCn.SetIsMultiGet(FromString(maybeMultiget.Cast())); + } + + for (const auto& key : streamLookup.LeftJoinKeyNames()) { + *dqSourceLookupCn.AddLeftJoinKeyNames() = leftLabel ? RemoveJoinAliases(key) : key; + } + + for (const auto& key : streamLookup.RightJoinKeyNames()) { + *dqSourceLookupCn.AddRightJoinKeyNames() = RemoveJoinAliases(key); + } + + return; + } + YQL_ENSURE(false, "Unexpected connection type: " << connection.CallableName()); } diff --git a/ydb/core/kqp/run_script_actor/kqp_run_script_actor.cpp b/ydb/core/kqp/run_script_actor/kqp_run_script_actor.cpp index b2818d2f3787..d62f031529be 100644 --- a/ydb/core/kqp/run_script_actor/kqp_run_script_actor.cpp +++ b/ydb/core/kqp/run_script_actor/kqp_run_script_actor.cpp @@ -2,17 +2,18 @@ #include #include +#include #include #include #include #include #include -#include -#include #include #include #include #include +#include +#include #include #include @@ -39,19 +40,6 @@ constexpr ui64 MIN_SAVE_RESULT_BATCH_SIZE = 5_MB; constexpr i32 MIN_SAVE_RESULT_BATCH_ROWS = 5000; constexpr ui64 RUN_SCRIPT_ACTOR_BUFFER_SIZE = 40_MB; -NYql::TIssues AddRootIssue(const TString& message, const NYql::TIssues& issues, bool addEmptyRoot = false) { - if (!issues && !addEmptyRoot) { - return {}; - } - - NYql::TIssue rootIssue(message); - for (const auto& issue : issues) { - rootIssue.AddSubIssue(MakeIntrusive(issue)); - } - - return {rootIssue}; -} - struct TProducerState { TMaybe LastSeqNo; i64 AckedFreeSpaceBytes = 0; @@ -581,6 +569,8 @@ class TRunScriptActor : public NActors::TActorBootstrapped { void Handle(TEvKqpExecuter::TEvExecuterProgress::TPtr& ev) { LOG_T("Got script progress from " << ev->Sender); const auto& record = ev->Get()->Record; + QueryPlan = record.GetQueryPlan(); + QueryAst = record.GetQueryAst(); UpdateScriptProgress(record.GetQueryPlan(), record.GetQueryAst()); } diff --git a/ydb/core/kqp/ut/common/kqp_ut_common.cpp b/ydb/core/kqp/ut/common/kqp_ut_common.cpp index e10a49ed5956..010f2fa4d02c 100644 --- a/ydb/core/kqp/ut/common/kqp_ut_common.cpp +++ b/ydb/core/kqp/ut/common/kqp_ut_common.cpp @@ -129,6 +129,11 @@ TVector SyntaxV1Settings() { return {setting}; } +TTestLogSettings& TTestLogSettings::AddLogPriority(NKikimrServices::EServiceKikimr service, NLog::EPriority priority) { + LogPriorities.emplace(service, priority); + return *this; +} + TKikimrRunner::TKikimrRunner(const TKikimrSettings& settings) { EnableYDBBacktraceFormat(); @@ -651,18 +656,22 @@ static TMaybe ParseLogLevel(const TString& level) { } } -void TKikimrRunner::SetupLogLevelFromTestParam(NKikimrServices::EServiceKikimr service) { +bool TKikimrRunner::SetupLogLevelFromTestParam(NKikimrServices::EServiceKikimr service) { if (const TString paramForService = GetTestParam(TStringBuilder() << "KQP_LOG_" << NKikimrServices::EServiceKikimr_Name(service))) { if (const TMaybe level = ParseLogLevel(paramForService)) { Server->GetRuntime()->SetLogPriority(service, *level); - return; + return true; } } + if (const TString commonParam = GetTestParam("KQP_LOG")) { if (const TMaybe level = ParseLogLevel(commonParam)) { Server->GetRuntime()->SetLogPriority(service, *level); + return true; } } + + return false; } void TKikimrRunner::Initialize(const TKikimrSettings& settings) { @@ -672,39 +681,21 @@ void TKikimrRunner::Initialize(const TKikimrSettings& settings) { // For example: // --test-param KQP_LOG=TRACE // --test-param KQP_LOG_FLAT_TX_SCHEMESHARD=debug - SetupLogLevelFromTestParam(NKikimrServices::FLAT_TX_SCHEMESHARD); - SetupLogLevelFromTestParam(NKikimrServices::KQP_YQL); - SetupLogLevelFromTestParam(NKikimrServices::TX_DATASHARD); - SetupLogLevelFromTestParam(NKikimrServices::TX_COORDINATOR); - SetupLogLevelFromTestParam(NKikimrServices::KQP_COMPUTE); - SetupLogLevelFromTestParam(NKikimrServices::KQP_TASKS_RUNNER); - SetupLogLevelFromTestParam(NKikimrServices::KQP_EXECUTER); - SetupLogLevelFromTestParam(NKikimrServices::TX_PROXY_SCHEME_CACHE); - SetupLogLevelFromTestParam(NKikimrServices::TX_PROXY); - SetupLogLevelFromTestParam(NKikimrServices::SCHEME_BOARD_REPLICA); - SetupLogLevelFromTestParam(NKikimrServices::KQP_WORKER); - SetupLogLevelFromTestParam(NKikimrServices::KQP_SESSION); - SetupLogLevelFromTestParam(NKikimrServices::TABLET_EXECUTOR); - SetupLogLevelFromTestParam(NKikimrServices::KQP_SLOW_LOG); - SetupLogLevelFromTestParam(NKikimrServices::KQP_PROXY); - SetupLogLevelFromTestParam(NKikimrServices::KQP_COMPILE_SERVICE); - SetupLogLevelFromTestParam(NKikimrServices::KQP_COMPILE_ACTOR); - SetupLogLevelFromTestParam(NKikimrServices::KQP_COMPILE_REQUEST); - SetupLogLevelFromTestParam(NKikimrServices::KQP_GATEWAY); - SetupLogLevelFromTestParam(NKikimrServices::RPC_REQUEST); - SetupLogLevelFromTestParam(NKikimrServices::KQP_RESOURCE_MANAGER); - SetupLogLevelFromTestParam(NKikimrServices::KQP_NODE); - SetupLogLevelFromTestParam(NKikimrServices::KQP_BLOBS_STORAGE); - SetupLogLevelFromTestParam(NKikimrServices::KQP_WORKLOAD_SERVICE); - SetupLogLevelFromTestParam(NKikimrServices::TX_COLUMNSHARD); - SetupLogLevelFromTestParam(NKikimrServices::TX_COLUMNSHARD_SCAN); - SetupLogLevelFromTestParam(NKikimrServices::LOCAL_PGWIRE); - SetupLogLevelFromTestParam(NKikimrServices::SSA_GRAPH_EXECUTION); - SetupLogLevelFromTestParam(NKikimrServices::STREAMS_CHECKPOINT_COORDINATOR); - SetupLogLevelFromTestParam(NKikimrServices::STREAMS_STORAGE_SERVICE); - SetupLogLevelFromTestParam(NKikimrServices::YDB_SDK); - SetupLogLevelFromTestParam(NKikimrServices::DISCOVERY); - SetupLogLevelFromTestParam(NKikimrServices::DISCOVERY_CACHE); + auto descriptor = NKikimrServices::EServiceKikimr_descriptor(); + for (i32 i = 0; i < descriptor->value_count(); ++i) { + const auto service = static_cast(descriptor->value(i)->number()); + if (SetupLogLevelFromTestParam(service)) { + continue; + } + + if (const auto& logSettings = settings.LogSettings) { + if (const auto it = logSettings->LogPriorities.find(service); it != logSettings->LogPriorities.end()) { + Server->GetRuntime()->SetLogPriority(service, it->second); + } else { + Server->GetRuntime()->SetLogPriority(service, settings.LogSettings->DefaultLogPriority); + } + } + } RunCall([this, domain = settings.DomainRoot]{ this->Client->InitRootScheme(domain); diff --git a/ydb/core/kqp/ut/common/kqp_ut_common.h b/ydb/core/kqp/ut/common/kqp_ut_common.h index c5433611cdcd..f7dea52cb984 100644 --- a/ydb/core/kqp/ut/common/kqp_ut_common.h +++ b/ydb/core/kqp/ut/common/kqp_ut_common.h @@ -40,6 +40,13 @@ extern const TString EXPECTED_EIGHTSHARD_VALUE1; TVector SyntaxV1Settings(); +struct TTestLogSettings { + NLog::EPriority DefaultLogPriority = NLog::PRI_WARN; + std::unordered_map LogPriorities; + + TTestLogSettings& AddLogPriority(NKikimrServices::EServiceKikimr service, NLog::EPriority priority); +}; + struct TKikimrSettings: public TTestFeatureFlagsHolder { private: void InitDefaultConfig() { @@ -88,6 +95,7 @@ struct TKikimrSettings: public TTestFeatureFlagsHolder { TMaybe GrpcServerOptions; bool EnableStorageProxy = false; TDuration CheckpointPeriod = TDuration::MilliSeconds(200); + std::optional LogSettings; TKikimrSettings() { InitDefaultConfig(); @@ -128,6 +136,7 @@ struct TKikimrSettings: public TTestFeatureFlagsHolder { TKikimrSettings& SetGrpcServerOptions(const NYdbGrpc::TServerOptions& grpcServerOptions) { GrpcServerOptions = grpcServerOptions; return *this; }; TKikimrSettings& SetEnableStorageProxy(bool value) { EnableStorageProxy = value; return *this; }; TKikimrSettings& SetCheckpointPeriod(TDuration value) { CheckpointPeriod = value; return *this; }; + TKikimrSettings& SetLogSettings(TTestLogSettings value) { LogSettings = value; return *this; }; }; class TKikimrRunner { @@ -207,7 +216,7 @@ class TKikimrRunner { void Initialize(const TKikimrSettings& settings); void WaitForKqpProxyInit(); void CreateSampleTables(); - void SetupLogLevelFromTestParam(NKikimrServices::EServiceKikimr service); + bool SetupLogLevelFromTestParam(NKikimrServices::EServiceKikimr service); private: THolder ServerSettings; diff --git a/ydb/core/kqp/ut/federated_query/common/common.cpp b/ydb/core/kqp/ut/federated_query/common/common.cpp index 5c1b13d09495..85383f128ddb 100644 --- a/ydb/core/kqp/ut/federated_query/common/common.cpp +++ b/ydb/core/kqp/ut/federated_query/common/common.cpp @@ -86,30 +86,40 @@ namespace NKikimr::NKqp::NFederatedQueryTest { auto settings = TKikimrSettings(*appConfig); NYql::IHTTPGateway::TPtr httpGateway; + const auto& queryServiceConfig = appConfig->GetQueryServiceConfig(); if (initializeHttpGateway) { - httpGateway = MakeHttpGateway(appConfig->GetQueryServiceConfig().GetHttpGateway(), settings.CountersRoot); + httpGateway = MakeHttpGateway(queryServiceConfig.GetHttpGateway(), settings.CountersRoot); } auto driver = std::make_shared(NYdb::TDriverConfig()); + const auto& s3Config = queryServiceConfig.GetS3(); + const auto& solomonConfig = queryServiceConfig.GetSolomon(); auto federatedQuerySetupFactory = std::make_shared( httpGateway, connectorClient, options.CredentialsFactory, databaseAsyncResolver, - appConfig->GetQueryServiceConfig().GetS3(), - appConfig->GetQueryServiceConfig().GetGeneric(), - appConfig->GetQueryServiceConfig().GetYt(), + s3Config, + queryServiceConfig.GetGeneric(), + queryServiceConfig.GetYt(), nullptr, - appConfig->GetQueryServiceConfig().GetSolomon(), + solomonConfig, + NYql::CreateSolomonGateway(solomonConfig), nullptr, - nullptr, - NYql::NDq::CreateReadActorFactoryConfig(appConfig->GetQueryServiceConfig().GetS3()), + NYql::NDq::CreateReadActorFactoryConfig(s3Config), nullptr, NYql::TPqGatewayConfig{}, options.PqGateway ? options.PqGateway : NKqp::MakePqGateway(driver, NYql::TPqGatewayConfig{}), nullptr, driver); + auto logSettings = options.LogSettings; + logSettings.DefaultLogPriority = std::max(NLog::PRI_NOTICE, logSettings.DefaultLogPriority); + logSettings + .AddLogPriority(NKikimrServices::KQP_EXECUTER, NLog::PRI_INFO) + .AddLogPriority(NKikimrServices::KQP_PROXY, NLog::PRI_DEBUG) + .AddLogPriority(NKikimrServices::KQP_COMPUTE, NLog::PRI_INFO); + const auto& kqpSettings = appConfig->GetKQPConfig().GetSettings(); settings .SetFeatureFlags(featureFlags) @@ -120,26 +130,12 @@ namespace NKikimr::NKqp::NFederatedQueryTest { .SetDomainRoot(options.DomainRoot) .SetNodeCount(options.NodeCount) .SetEnableStorageProxy(true) - .SetCheckpointPeriod(options.CheckpointPeriod); + .SetCheckpointPeriod(options.CheckpointPeriod) + .SetLogSettings(std::move(logSettings)); settings.EnableScriptExecutionBackgroundChecks = options.EnableScriptExecutionBackgroundChecks; - auto kikimr = std::make_shared(settings); - - if (GetTestParam("DEFAULT_LOG", "enabled") == "enabled") { - auto& runtime = *kikimr->GetTestServer().GetRuntime(); - - const auto descriptor = NKikimrServices::EServiceKikimr_descriptor(); - for (i64 i = 0; i < descriptor->value_count(); ++i) { - runtime.SetLogPriority(static_cast(descriptor->value(i)->number()), NLog::PRI_NOTICE); - } - - runtime.SetLogPriority(NKikimrServices::KQP_EXECUTER, NLog::PRI_INFO); - runtime.SetLogPriority(NKikimrServices::KQP_PROXY, NLog::PRI_DEBUG); - runtime.SetLogPriority(NKikimrServices::KQP_COMPUTE, NLog::PRI_INFO); - } - - return kikimr; + return std::make_shared(settings); } class TStaticCredentialsProvider: public NYdb::ICredentialsProvider { diff --git a/ydb/core/kqp/ut/federated_query/common/common.h b/ydb/core/kqp/ut/federated_query/common/common.h index 6012996d5555..886c64dcc6ec 100644 --- a/ydb/core/kqp/ut/federated_query/common/common.h +++ b/ydb/core/kqp/ut/federated_query/common/common.h @@ -25,6 +25,7 @@ namespace NKikimr::NKqp::NFederatedQueryTest { bool EnableScriptExecutionBackgroundChecks = true; TIntrusivePtr PqGateway; TDuration CheckpointPeriod = TDuration::MilliSeconds(200); + TTestLogSettings LogSettings; }; std::shared_ptr MakeKikimrRunner( diff --git a/ydb/core/kqp/ut/federated_query/datastreams/datastreams_ut.cpp b/ydb/core/kqp/ut/federated_query/datastreams/datastreams_ut.cpp index e607a32f960f..2783f12c1dc6 100644 --- a/ydb/core/kqp/ut/federated_query/datastreams/datastreams_ut.cpp +++ b/ydb/core/kqp/ut/federated_query/datastreams/datastreams_ut.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -34,6 +35,27 @@ struct TScriptQuerySettings { TDuration Timeout = TDuration::Seconds(30); }; +struct TColumn { + TString Name; + Ydb::Type::PrimitiveTypeId Type; +}; + +struct TMockConnectorTableDescriptionSettings { + TString TableName; + std::vector Columns; + ui64 DescribeCount = 1; + ui64 ListSplitsCount = 1; + bool ValidateListSplitsArgs = true; +}; + +struct TMockConnectorReadSplitsSettings { + TString TableName; + std::vector Columns; + ui64 NumberReadSplits; + bool ValidateReadSplitsArgs = true; + std::function()> ResultFactory; +}; + class TStreamingTestFixture : public NUnitTest::TBaseFixture { using TBase = NUnitTest::TBaseFixture; @@ -85,16 +107,23 @@ class TStreamingTestFixture : public NUnitTest::TBaseFixture { AppConfig.emplace(); } - AppConfig->MutableFeatureFlags()->SetEnableStreamingQueries(true); + auto& featureFlags = *AppConfig->MutableFeatureFlags(); + featureFlags.SetEnableStreamingQueries(true); + featureFlags.SetEnableSchemaSecrets(UseSchemaSecrets()); auto& queryServiceConfig = *AppConfig->MutableQueryServiceConfig(); queryServiceConfig.SetEnableMatchRecognize(true); queryServiceConfig.SetProgressStatsPeriodMs(1000); + LogSettings + .AddLogPriority(NKikimrServices::STREAMS_STORAGE_SERVICE, NLog::PRI_DEBUG) + .AddLogPriority(NKikimrServices::STREAMS_CHECKPOINT_COORDINATOR, NLog::PRI_DEBUG); + Kikimr = MakeKikimrRunner(true, ConnectorClient, nullptr, AppConfig, NYql::NDq::CreateS3ActorsFactory(), { .CredentialsFactory = CreateCredentialsFactory(), .PqGateway = PqGateway, .CheckpointPeriod = CheckpointPeriod, + .LogSettings = LogSettings, }); if (GetTestParam("DEFAULT_LOG", "enabled") == "enabled") { @@ -221,16 +250,16 @@ class TStreamingTestFixture : public NUnitTest::TBaseFixture { } } - void ReadTopicMessage(const TString& topicName, const TString& expectedMessage, TDuration disposition = TDuration::Seconds(100)) { - ReadTopicMessages(topicName, {expectedMessage}, disposition); + void ReadTopicMessage(const TString& topicName, const TString& expectedMessage, TInstant disposition = TInstant::Now() - TDuration::Seconds(100), bool sort = false) { + ReadTopicMessages(topicName, {expectedMessage}, disposition, sort); } - void ReadTopicMessages(const TString& topicName, const TVector& expectedMessages, TDuration disposition = TDuration::Seconds(100)) { + void ReadTopicMessages(const TString& topicName, const TVector& expectedMessages, TInstant disposition = TInstant::Now() - TDuration::Seconds(100), bool sort = false) { NTopic::TReadSessionSettings readSettings; readSettings .WithoutConsumer() .AppendTopics( - NTopic::TTopicReadSettings(topicName).ReadFromTimestamp(TInstant::Now() - disposition) + NTopic::TTopicReadSettings(topicName).ReadFromTimestamp(disposition) .AppendPartitionIds(0) ); @@ -259,12 +288,19 @@ class TStreamingTestFixture : public NUnitTest::TBaseFixture { } } - UNIT_ASSERT_GE(expectedMessages.size(), received.size()); + UNIT_ASSERT_C(expectedMessages.size() >= received.size(), TStringBuilder() + << "expected #" << expectedMessages.size() << " messages (" + << JoinSeq(", ", expectedMessages) << "), got #" << received.size() << " messages (" + << JoinSeq(", ", received) << ")"); error = TStringBuilder() << "got new event, received #" << received.size() << " / " << expectedMessages.size() << " messages"; return false; }); + if (sort) { + std::sort(received.begin(), received.end()); + } + UNIT_ASSERT_VALUES_EQUAL(received.size(), expectedMessages.size()); for (size_t i = 0; i < received.size(); ++i) { UNIT_ASSERT_VALUES_EQUAL(received[i], expectedMessages[i]); @@ -348,6 +384,24 @@ class TStreamingTestFixture : public NUnitTest::TBaseFixture { )); } + void CreateYdbSource(const TString& ydbSourceName) { + ExecQuery(fmt::format(R"( + UPSERT OBJECT ydb_source_secret (TYPE SECRET) WITH (value = "{token}"); + CREATE EXTERNAL DATA SOURCE `{ydb_source}` WITH ( + SOURCE_TYPE = "Ydb", + LOCATION = "{ydb_location}", + DATABASE_NAME = "{ydb_database_name}", + AUTH_METHOD = "TOKEN", + TOKEN_SECRET_NAME = "ydb_source_secret", + USE_TLS = "FALSE" + );)", + "ydb_source"_a = ydbSourceName, + "ydb_location"_a = YDB_ENDPOINT, + "ydb_database_name"_a = YDB_DATABASE, + "token"_a = BUILTIN_ACL_ROOT + )); + } + // Script executions (using query client SDK) TOperation::TOperationId ExecScript(const TString& query, std::optional settings = std::nullopt, bool waitRunning = true) { @@ -562,7 +616,7 @@ class TStreamingTestFixture : public NUnitTest::TBaseFixture { }); } - // Utils + // Mock PQ utils static IMockPqReadSession::TPtr WaitMockPqReadSession(IMockPqGateway::TPtr gateway, const TString& topic) { return WaitForPqMockSession(TEST_OPERATION_TIMEOUT, "read", [gateway, topic]() { @@ -610,6 +664,93 @@ class TStreamingTestFixture : public NUnitTest::TBaseFixture { ReadMockPqMessages(session, {message}); } + // Mock Connector utils + + static NYql::TGenericDataSourceInstance GetMockConnectorSourceInstance() { + NYql::TGenericDataSourceInstance dataSourceInstance; + dataSourceInstance.set_kind(NYql::YDB); + dataSourceInstance.set_database(YDB_DATABASE); + dataSourceInstance.set_use_tls(false); + dataSourceInstance.set_protocol(NYql::NATIVE); + + auto& endpoint = *dataSourceInstance.mutable_endpoint(); + TIpPort port; + NHttp::CrackAddress(YDB_ENDPOINT, *endpoint.mutable_host(), port); + endpoint.set_port(port); + + auto& iamToken = *dataSourceInstance.mutable_credentials()->mutable_token(); + iamToken.set_type("IAM"); + iamToken.set_value(BUILTIN_ACL_ROOT); + + return dataSourceInstance; + } + + template + static void FillMockConnectorRequestColumns(TRequestBuilder& builder, const std::vector& columns) { + for (const auto& column : columns) { + builder.Column(column.Name, column.Type); + } + } + + // Should be called at most once + static void SetupMockConnectorTableDescription(std::shared_ptr mockClient, const TMockConnectorTableDescriptionSettings& settings) { + TTypeMappingSettings typeMappingSettings; + typeMappingSettings.set_date_time_format(STRING_FORMAT); + + auto describeTableBuilder = mockClient->ExpectDescribeTable(); + describeTableBuilder + .Table(settings.TableName) + .DataSourceInstance(GetMockConnectorSourceInstance()) + .TypeMappingSettings(typeMappingSettings); + + auto listSplitsBuilder = mockClient->ExpectListSplits(); + listSplitsBuilder + .ValidateArgs(settings.ValidateListSplitsArgs) + .Select() + .DataSourceInstance(GetMockConnectorSourceInstance()) + .Table(settings.TableName); + + for (ui64 i = 0; i < settings.DescribeCount; ++i) { + auto responseBuilder = describeTableBuilder.Response(); + FillMockConnectorRequestColumns(responseBuilder, settings.Columns); + } + + for (ui64 i = 0; i < settings.ListSplitsCount; ++i) { + auto responseBuilder = listSplitsBuilder.Result() + .AddResponse(NYql::NConnector::NewSuccess()) + .Description("some binary description") + .Select() + .DataSourceInstance(GetMockConnectorSourceInstance()) + .What(); + FillMockConnectorRequestColumns(responseBuilder, settings.Columns); + } + } + + // Should be called at most once + static void SetupMockConnectorTableData(std::shared_ptr mockClient, const TMockConnectorReadSplitsSettings& settings) { + auto readSplitsBuilder = mockClient->ExpectReadSplits(); + + { + auto columnsBuilder = readSplitsBuilder + .Filtering(TReadSplitsRequest::FILTERING_OPTIONAL) + .ValidateArgs(settings.ValidateReadSplitsArgs) + .Split() + .Description("some binary description") + .Select() + .Table(settings.TableName) + .DataSourceInstance(GetMockConnectorSourceInstance()) + .What(); + FillMockConnectorRequestColumns(columnsBuilder, settings.Columns); + } + + for (ui64 i = 0; i < settings.NumberReadSplits; ++i) { + readSplitsBuilder.Result() + .AddResponse(settings.ResultFactory(), NYql::NConnector::NewSuccess()); + } + } + + // Utils + static void WaitFor(TDuration timeout, const TString& description, std::function callback) { TInstant start = TInstant::Now(); TString errorString; @@ -641,6 +782,7 @@ class TStreamingTestFixture : public NUnitTest::TBaseFixture { protected: TDuration CheckpointPeriod = TDuration::MilliSeconds(200); + TTestLogSettings LogSettings; private: std::optional AppConfig; @@ -1112,6 +1254,7 @@ Y_UNIT_TEST_SUITE(KqpFederatedQueryDatastreams) { WaitCheckpointUpdate(executionId); auto readSession = WaitMockPqReadSession(pqGateway, inputTopicName); + auto writeSession = WaitMockPqWriteSession(pqGateway, outputTopicName); readSession->AddDataReceivedEvent(1, R"({"key": "key1", "value": "value1"})"); readSession->AddDataReceivedEvent(2, R"({"key": "key2", "value": "value2"})"); readSession->AddDataReceivedEvent(3, R"({"key": "key3", "value": "value3"})"); @@ -1122,7 +1265,8 @@ Y_UNIT_TEST_SUITE(KqpFederatedQueryDatastreams) { WaitCheckpointUpdate(executionId); WaitMockPqReadSession(pqGateway, inputTopicName)->AddDataReceivedEvent(4, R"({"key": "key4", "value": "value4"})"); - ReadMockPqMessage(WaitMockPqWriteSession(pqGateway, outputTopicName), "key4value4"); + writeSession = WaitMockPqWriteSession(pqGateway, outputTopicName); + ReadMockPqMessage(writeSession, "key4value4"); CancelScriptExecution(operationId); } @@ -1253,7 +1397,7 @@ Y_UNIT_TEST_SUITE(KqpFederatedQueryDatastreams) { } Y_UNIT_TEST_SUITE(KqpStreamingQueriesDdl) { - Y_UNIT_TEST_F(CreateAndAlterStreamingQuery, TStreamingTestFixture) { + Y_UNIT_TEST_F(CreateAndAlterStreamingQuery, TStreamingWithSchemaSecretsTestFixture) { constexpr char inputTopicName[] = "createAndAlterStreamingQueryInputTopic"; constexpr char outputTopicName[] = "createAndAlterStreamingQueryOutputTopic"; CreateTopic(inputTopicName); @@ -1264,7 +1408,9 @@ Y_UNIT_TEST_SUITE(KqpStreamingQueriesDdl) { constexpr char queryName[] = "streamingQuery"; ExecQuery(fmt::format(R"( + CREATE SECRET test_secret WITH (value = "1234"); CREATE TABLE test_table1 (Key Int32 NOT NULL, PRIMARY KEY (Key)); + GRANT ALL ON `/Root/test_table1` TO `test@builtin`; CREATE STREAMING QUERY `{query_name}` AS DO BEGIN INSERT INTO `{pq_source}`.`{output_topic}` @@ -1283,6 +1429,13 @@ Y_UNIT_TEST_SUITE(KqpStreamingQueriesDdl) { "output_topic"_a = outputTopicName )); + { + const auto tableDesc = Navigate(GetRuntime(), GetRuntime().AllocateEdgeActor(), "/Root/test_table1", NSchemeCache::TSchemeCacheNavigate::EOp::OpUnknown); + const auto& table = tableDesc->ResultSet.at(0); + UNIT_ASSERT_VALUES_EQUAL(table.Kind, NSchemeCache::TSchemeCacheNavigate::EKind::KindTable); + UNIT_ASSERT(table.SecurityObject->CheckAccess(NACLib::GenericFull, NACLib::TUserToken("test@builtin", {}))); + } + CheckScriptExecutionsCount(1, 1); Sleep(TDuration::Seconds(1)); @@ -1587,6 +1740,104 @@ Y_UNIT_TEST_SUITE(KqpStreamingQueriesDdl) { ReadTopicMessage(outputTopicName, "test message"); } + Y_UNIT_TEST_F(StreamingQueryWithSolomonInsert, TStreamingTestFixture) { + const auto pqGateway = SetupMockPqGateway(); + + constexpr char inputTopicName[] = "createAndAlterStreamingQueryInputTopic"; + CreateTopic(inputTopicName); + + constexpr char pqSourceName[] = "sourceName"; + CreatePqSource(pqSourceName); + + constexpr char solomonSinkName[] = "sinkName"; + ExecQuery(fmt::format(R"( + CREATE EXTERNAL DATA SOURCE `{solomon_source}` WITH ( + SOURCE_TYPE = "Solomon", + LOCATION = "localhost:{solomon_port}", + AUTH_METHOD = "NONE", + USE_TLS = "false" + );)", + "solomon_source"_a = solomonSinkName, + "solomon_port"_a = getenv("SOLOMON_HTTP_PORT") + )); + + constexpr char queryName[] = "streamingQuery"; + const TSolomonLocation soLocation = { + .ProjectId = "cloudId1", + .FolderId = "folderId1", + .Service = "custom", + .IsCloud = false, + }; + ExecQuery(fmt::format(R"( + CREATE STREAMING QUERY `{query_name}` AS + DO BEGIN + INSERT INTO `{solomon_sink}`.`{solomon_project}/{solomon_folder}/{solomon_service}` + SELECT + Unwrap(CAST(Data AS Uint64)) AS value, + "test-solomon-insert" AS sensor, + Timestamp("2025-03-12T14:40:39Z") AS ts + FROM `{pq_source}`.`{input_topic}` + END DO;)", + "query_name"_a = queryName, + "pq_source"_a = pqSourceName, + "solomon_sink"_a = solomonSinkName, + "solomon_project"_a = soLocation.ProjectId, + "solomon_folder"_a = soLocation.FolderId, + "solomon_service"_a = soLocation.Service, + "input_topic"_a = inputTopicName + )); + + CheckScriptExecutionsCount(1, 1); + + CleanupSolomon(soLocation); + auto readSession = WaitMockPqReadSession(pqGateway, inputTopicName); + readSession->AddDataReceivedEvent(0, "1234"); + + Sleep(TDuration::Seconds(2)); + + TString expectedMetrics = R"([ + { + "labels": [ + [ + "name", + "value" + ], + [ + "sensor", + "test-solomon-insert" + ] + ], + "ts": 1741790439, + "value": 1234 + } +])"; + UNIT_ASSERT_STRINGS_EQUAL(GetSolomonMetrics(soLocation), expectedMetrics); + CleanupSolomon(soLocation); + + readSession->AddCloseSessionEvent(EStatus::UNAVAILABLE, {NIssue::TIssue("Test pq session failure")}); + + WaitMockPqReadSession(pqGateway, inputTopicName)->AddDataReceivedEvent(1, "4321"); + Sleep(TDuration::Seconds(2)); + + expectedMetrics = R"([ + { + "labels": [ + [ + "name", + "value" + ], + [ + "sensor", + "test-solomon-insert" + ] + ], + "ts": 1741790439, + "value": 4321 + } +])"; + UNIT_ASSERT_STRINGS_EQUAL(GetSolomonMetrics(soLocation), expectedMetrics); + } + Y_UNIT_TEST_F(StreamingQueryWithS3Join, TStreamingTestFixture) { // Test that defaults are overridden for streaming queries auto& setting = *SetupAppConfig().MutableKQPConfig()->AddSettings(); @@ -1683,24 +1934,9 @@ Y_UNIT_TEST_SUITE(KqpStreamingQueriesDdl) { CreateTopic(outputTopicName); constexpr char pqSourceName[] = "pqSourceName"; - CreatePqSource(pqSourceName); - constexpr char ydbSourceName[] = "ydbSourceName"; - ExecQuery(fmt::format(R"( - CREATE OBJECT secret_name (TYPE SECRET) WITH (value = "{token}"); - CREATE EXTERNAL DATA SOURCE `{ydb_source}` WITH ( - SOURCE_TYPE = "Ydb", - LOCATION = "{ydb_location}", - DATABASE_NAME = "{ydb_database_name}", - AUTH_METHOD = "TOKEN", - TOKEN_SECRET_NAME = "secret_name", - USE_TLS = "FALSE" - );)", - "ydb_source"_a = ydbSourceName, - "ydb_location"_a = YDB_ENDPOINT, - "ydb_database_name"_a = YDB_DATABASE, - "token"_a = BUILTIN_ACL_ROOT - )); + CreatePqSource(pqSourceName); + CreateYdbSource(ydbSourceName); constexpr char ydbTable[] = "lookup"; ExecExternalQuery(fmt::format(R"( @@ -1713,75 +1949,30 @@ Y_UNIT_TEST_SUITE(KqpStreamingQueriesDdl) { )); { // Prepare connector mock - NYql::TGenericDataSourceInstance dataSourceInstance; - dataSourceInstance.set_kind(NYql::YDB); - dataSourceInstance.set_database(YDB_DATABASE); - dataSourceInstance.set_use_tls(false); - dataSourceInstance.set_protocol(NYql::NATIVE); - - auto& endpoint = *dataSourceInstance.mutable_endpoint(); - TIpPort port; - NHttp::CrackAddress(YDB_ENDPOINT, *endpoint.mutable_host(), port); - endpoint.set_port(port); - - auto& iamToken = *dataSourceInstance.mutable_credentials()->mutable_token(); - iamToken.set_type("IAM"); - iamToken.set_value(BUILTIN_ACL_ROOT); - - TTypeMappingSettings typeMappingSettings; - typeMappingSettings.set_date_time_format(STRING_FORMAT); - - auto describeTableBuilder = connectorClient->ExpectDescribeTable(); - describeTableBuilder - .Table(ydbTable) - .DataSourceInstance(dataSourceInstance) - .TypeMappingSettings(typeMappingSettings); - - auto listSplitsBuilder = connectorClient->ExpectListSplits(); - listSplitsBuilder.Select() - .DataSourceInstance(dataSourceInstance) - .Table(ydbTable); + const std::vector columns = { + {"fqdn", Ydb::Type::STRING}, + {"payload", Ydb::Type::STRING} + }; + SetupMockConnectorTableDescription(connectorClient, { + .TableName = ydbTable, + .Columns = columns, + .DescribeCount = 2, + .ListSplitsCount = 2 + }); const std::vector fqdnColumn = {"host1.example.com", "host2.example.com", "host3.example.com"}; const std::vector payloadColumn = {"P1", "P2", "P3"}; - auto readSplitsBuilder = connectorClient->ExpectReadSplits(); - readSplitsBuilder - .Filtering(TReadSplitsRequest::FILTERING_OPTIONAL) - .Split() - .Description("some binary description") - .Select() - .Table(ydbTable) - .DataSourceInstance(dataSourceInstance) - .What() - .Column("fqdn", Ydb::Type::STRING) - .Column("payload", Ydb::Type::STRING); - - const auto builtResults = [&]() { - describeTableBuilder.Response() - .Column("fqdn", Ydb::Type::STRING) - .Column("payload", Ydb::Type::STRING); - - listSplitsBuilder.Result() - .AddResponse(NYql::NConnector::NewSuccess()) - .Description("some binary description") - .Select() - .DataSourceInstance(dataSourceInstance) - .What() - .Column("fqdn", Ydb::Type::STRING) - .Column("payload", Ydb::Type::STRING); - - readSplitsBuilder.Result() - .AddResponse( - MakeRecordBatch( - MakeArray("fqdn", fqdnColumn, arrow::binary()), - MakeArray("payload", payloadColumn, arrow::binary()) - ), - NYql::NConnector::NewSuccess() + SetupMockConnectorTableData(connectorClient, { + .TableName = ydbTable, + .Columns = columns, + .NumberReadSplits = 2, + .ResultFactory = [&]() { + return MakeRecordBatch( + MakeArray("fqdn", fqdnColumn, arrow::binary()), + MakeArray("payload", payloadColumn, arrow::binary()) ); - }; - - builtResults(); - builtResults(); // Streaming queries compiled twice, also in test results requested twice due to retry + } + }); } constexpr char queryName[] = "streamingQuery"; @@ -1834,6 +2025,259 @@ Y_UNIT_TEST_SUITE(KqpStreamingQueriesDdl) { WaitMockPqReadSession(pqGateway, inputTopicName)->AddDataReceivedEvent(sampleMessages); ReadMockPqMessages(WaitMockPqWriteSession(pqGateway, outputTopicName), sampleResult); } + + Y_UNIT_TEST_F(StreamingQueryWithStreamLookupJoin, TStreamingTestFixture) { + const auto connectorClient = SetupMockConnectorClient(); + const auto pqGateway = SetupMockPqGateway(); + + constexpr char inputTopicName[] = "sljInputTopicName"; + constexpr char outputTopicName[] = "sljOutputTopicName"; + CreateTopic(inputTopicName); + CreateTopic(outputTopicName); + + constexpr char pqSourceName[] = "pqSourceName"; + constexpr char ydbSourceName[] = "ydbSourceName"; + CreatePqSource(pqSourceName); + CreateYdbSource(ydbSourceName); + + constexpr char ydbTable[] = "lookup"; + ExecExternalQuery(fmt::format(R"( + CREATE TABLE `{table}` ( + fqdn String, + payload String, + PRIMARY KEY (fqdn) + ))", + "table"_a = ydbTable + )); + + { // Prepare connector mock + const std::vector columns = { + {"fqdn", Ydb::Type::STRING}, + {"payload", Ydb::Type::STRING} + }; + SetupMockConnectorTableDescription(connectorClient, { + .TableName = ydbTable, + .Columns = columns, + .DescribeCount = 2, + .ListSplitsCount = 5, + .ValidateListSplitsArgs = false + }); + + ui64 readSplitsCount = 0; + const std::vector fqdnColumn = {"host1.example.com", "host2.example.com", "host3.example.com"}; + SetupMockConnectorTableData(connectorClient, { + .TableName = ydbTable, + .Columns = columns, + .NumberReadSplits = 3, + .ValidateReadSplitsArgs = false, + .ResultFactory = [&]() { + readSplitsCount += 1; + const auto payloadColumn = readSplitsCount < 3 + ? std::vector{"P1", "P2", "P3"} + : std::vector{"P4", "P5", "P6"}; + + return MakeRecordBatch( + MakeArray("fqdn", fqdnColumn, arrow::binary()), + MakeArray("payload", payloadColumn, arrow::binary()) + ); + } + }); + } + + constexpr char queryName[] = "streamingQuery"; + ExecQuery(fmt::format(R"( + CREATE STREAMING QUERY `{query_name}` AS + DO BEGIN + $ydb_lookup = SELECT * FROM `{ydb_source}`.`{ydb_table}`; + + $pq_source = SELECT * FROM `{pq_source}`.`{input_topic}` WITH ( + FORMAT = "json_each_row", + SCHEMA ( + time Int32 NOT NULL, + event String, + host String + ) + ); + + $joined = SELECT l.payload AS payload, p.* FROM $pq_source AS p + LEFT JOIN /*+ streamlookup(TTL 1) */ ANY $ydb_lookup AS l + ON (l.fqdn = p.host); + + INSERT INTO `{pq_source}`.`{output_topic}` + SELECT Unwrap(event || "-" || payload) FROM $joined + END DO;)", + "query_name"_a = queryName, + "pq_source"_a = pqSourceName, + "ydb_source"_a = ydbSourceName, + "ydb_table"_a = ydbTable, + "input_topic"_a = inputTopicName, + "output_topic"_a = outputTopicName + )); + + CheckScriptExecutionsCount(1, 1); + + auto readSession = WaitMockPqReadSession(pqGateway, inputTopicName); + const std::vector sampleMessages = { + {0, R"({"time": 0, "event": "A", "host": "host1.example.com"})"}, + {1, R"({"time": 1, "event": "B", "host": "host3.example.com"})"}, + {2, R"({"time": 2, "event": "A", "host": "host1.example.com"})"}, + }; + readSession->AddDataReceivedEvent(sampleMessages); + + auto writeSession = WaitMockPqWriteSession(pqGateway, outputTopicName); + const std::vector sampleResult = {"A-P1", "B-P3", "A-P1"}; + ReadMockPqMessages(writeSession, sampleResult); + + readSession->AddCloseSessionEvent(EStatus::UNAVAILABLE, {NIssue::TIssue("Test pq session failure")}); + + readSession = WaitMockPqReadSession(pqGateway, inputTopicName); + readSession->AddDataReceivedEvent(sampleMessages); + writeSession = WaitMockPqWriteSession(pqGateway, outputTopicName); + ReadMockPqMessages(writeSession, sampleResult); + + Sleep(TDuration::Seconds(2)); + readSession->AddDataReceivedEvent(sampleMessages); + ReadMockPqMessages(writeSession, {"A-P4", "B-P6", "A-P4"}); + + CheckScriptExecutionsCount(1, 1); + const auto results = ExecQuery( + "SELECT ast_compressed FROM `.metadata/script_executions`;" + ); + UNIT_ASSERT_VALUES_EQUAL(results.size(), 1); + CheckScriptResult(results[0], 1, 1, [](TResultSetParser& result) { + const auto& ast = result.ColumnParser(0).GetOptionalString(); + UNIT_ASSERT(ast); + UNIT_ASSERT_STRING_CONTAINS(*ast, "DqCnStreamLookup"); + }); + } + + Y_UNIT_TEST_F(OffsetsAndStateRecoveryOnInternalRetry, TStreamingTestFixture) { + // Join with S3 used for introducing temporary failure and force retry on specific key + + constexpr char sourceBucket[] = "test_streaming_query_recovery_on_internal_retry"; + constexpr char objectContent[] = R"( +{"fqdn": "host1.example.com", "payload": "P1"} +{"fqdn": "host2.example.com" })"; + constexpr char objectPath[] = "path/test_object.json"; + CreateBucketWithObject(sourceBucket, objectPath, objectContent); + + constexpr char inputTopicName[] = "internalRetryInputTopicName"; + constexpr char outputTopicName[] = "internalRetryOutputTopicName"; + CreateTopic(inputTopicName); + CreateTopic(outputTopicName); + + constexpr char pqSourceName[] = "pqSourceName"; + constexpr char s3SourceName[] = "s3Source"; + CreatePqSource(pqSourceName); + CreateS3Source(sourceBucket, s3SourceName); + + constexpr char queryName[] = "streamingQuery"; + ExecQuery(fmt::format(R"( + CREATE STREAMING QUERY `{query_name}` AS + DO BEGIN + PRAGMA ydb.HashJoinMode = "map"; + $s3_lookup = SELECT * FROM `{s3_source}`.`path/` WITH ( + FORMAT = "json_each_row", + SCHEMA ( + fqdn String NOT NULL, + payload String + ) + ); + + -- Test that offsets are recovered + $pq_source = SELECT * FROM `{pq_source}`.`{input_topic}` WITH ( + FORMAT = "json_each_row", + SCHEMA ( + time String NOT NULL, + event String, + host String + ) + ); + + $joined = SELECT + Unwrap(l.payload) AS payload, -- Test failure here + p.* + FROM $pq_source AS p + LEFT JOIN $s3_lookup AS l + ON (l.fqdn = p.host); + + -- Test that state also recovered + $grouped = SELECT + event, + CAST(SOME(time) AS String) AS time, + SOME(payload) AS payload, + CAST(COUNT(*) AS String) AS count + FROM $joined + GROUP BY + HOP (CAST(time AS Timestamp), "PT1H", "PT1H", "PT0H"), + event; + + INSERT INTO `{pq_source}`.`{output_topic}` + SELECT Unwrap(event || "-" || time || "-" || payload || "-" || count) FROM $grouped + END DO;)", + "query_name"_a = queryName, + "pq_source"_a = pqSourceName, + "s3_source"_a = s3SourceName, + "input_topic"_a = inputTopicName, + "output_topic"_a = outputTopicName + )); + + CheckScriptExecutionsCount(1, 1); + Sleep(TDuration::Seconds(1)); + + // Fill HOP state for key A + WriteTopicMessages(inputTopicName, { + R"({"time": "2025-08-24T00:00:00.000000Z", "event": "A", "host": "host1.example.com"})", + R"({"time": "2025-08-25T00:00:00.000000Z", "event": "A", "host": "host1.example.com"})", + }); + ReadTopicMessage(outputTopicName, "A-2025-08-24T00:00:00.000000Z-P1-1"); + + Sleep(TDuration::Seconds(2)); + auto readDisposition = TInstant::Now(); + + // Write failure message for key B + WriteTopicMessage(inputTopicName, R"({"time": "2025-08-24T00:00:00.000000Z", "event": "B", "host": "host2.example.com"})"); + + // Wait script execution retry + WaitFor(TDuration::Seconds(10), "wait retry", [&](TString& error) { + const auto& results = ExecQuery(R"( + SELECT MAX(lease_generation) AS generation FROM `.metadata/script_executions`; + )"); + UNIT_ASSERT_VALUES_EQUAL(results.size(), 1); + + std::optional generation; + CheckScriptResult(results[0], 1, 1, [&](TResultSetParser& result) { + generation = result.ColumnParser(0).GetOptionalInt64(); + }); + + if (!generation || *generation < 2) { + error = TStringBuilder() << "generation is: " << (generation ? ToString(*generation) : "null"); + return false; + } + + return true; + }); + + // Resolve query failure + UploadObject(sourceBucket, objectPath, R"( +{"fqdn": "host1.example.com", "payload": "P1"} +{"fqdn": "host2.example.com", "payload": "P2" })"); + Sleep(TDuration::Seconds(2)); + + // Check that offset is restored + WriteTopicMessage(inputTopicName, R"({"time": "2025-08-25T00:00:00.000000Z", "event": "B", "host": "host2.example.com"})"); + ReadTopicMessage(outputTopicName, "B-2025-08-24T00:00:00.000000Z-P2-1", readDisposition); + + Sleep(TDuration::Seconds(1)); + readDisposition = TInstant::Now(); + + // Check that HOP state is restored + WriteTopicMessage(inputTopicName, R"({"time": "2025-08-26T00:00:00.000000Z", "event": "A", "host": "host1.example.com"})"); + ReadTopicMessages(outputTopicName, { + "A-2025-08-25T00:00:00.000000Z-P1-1", + "B-2025-08-25T00:00:00.000000Z-P2-1" + }, readDisposition, /* sort */ true); + } } } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/ut/federated_query/datastreams/ya.make b/ydb/core/kqp/ut/federated_query/datastreams/ya.make index 575dc3a287e3..b5ca1ec31e87 100644 --- a/ydb/core/kqp/ut/federated_query/datastreams/ya.make +++ b/ydb/core/kqp/ut/federated_query/datastreams/ya.make @@ -16,6 +16,7 @@ PEERDIR( ydb/core/kqp/ut/federated_query/common ydb/library/testlib/pq_helpers ydb/library/testlib/s3_recipe_helper + ydb/library/testlib/solomon_helpers ydb/library/yql/providers/generic/connector/libcpp ydb/library/yql/providers/generic/connector/libcpp/ut_helpers yql/essentials/sql/pg @@ -24,6 +25,7 @@ PEERDIR( INCLUDE(${ARCADIA_ROOT}/ydb/public/tools/ydb_recipe/recipe.inc) INCLUDE(${ARCADIA_ROOT}/ydb/tests/tools/s3_recipe/recipe.inc) +INCLUDE(${ARCADIA_ROOT}/ydb/library/yql/tools/solomon_emulator/recipe/recipe.inc) YQL_LAST_ABI_VERSION() diff --git a/ydb/core/kqp/workload_service/ut/common/kqp_workload_service_ut_common.cpp b/ydb/core/kqp/workload_service/ut/common/kqp_workload_service_ut_common.cpp index e6b66e85a3ba..abc51cb194a0 100644 --- a/ydb/core/kqp/workload_service/ut/common/kqp_workload_service_ut_common.cpp +++ b/ydb/core/kqp/workload_service/ut/common/kqp_workload_service_ut_common.cpp @@ -250,16 +250,21 @@ class TWorkloadServiceYdbSetup : public IYdbSetup { private: TAppConfig GetAppConfig() const { TAppConfig appConfig; - appConfig.MutableFeatureFlags()->SetEnableResourcePools(Settings_.EnableResourcePools_); - appConfig.MutableFeatureFlags()->SetEnableResourcePoolsOnServerless(Settings_.EnableResourcePoolsOnServerless_); - appConfig.MutableFeatureFlags()->SetEnableMetadataObjectsOnServerless(Settings_.EnableMetadataObjectsOnServerless_); - appConfig.MutableFeatureFlags()->SetEnableExternalDataSourcesOnServerless(Settings_.EnableExternalDataSourcesOnServerless_); - appConfig.MutableFeatureFlags()->SetEnableExternalDataSources(true); - appConfig.MutableFeatureFlags()->SetEnableResourcePoolsCounters(true); - appConfig.MutableFeatureFlags()->SetEnableStreamingQueries(true); - appConfig.MutableQueryServiceConfig()->SetAllExternalDataSourcesAreAvailable(true); *appConfig.MutableWorkloadManagerConfig() = Settings_.WorkloadManagerConfig_; + auto& featureFlags = *appConfig.MutableFeatureFlags(); + featureFlags.SetEnableResourcePools(Settings_.EnableResourcePools_); + featureFlags.SetEnableResourcePoolsOnServerless(Settings_.EnableResourcePoolsOnServerless_); + featureFlags.SetEnableMetadataObjectsOnServerless(Settings_.EnableMetadataObjectsOnServerless_); + featureFlags.SetEnableExternalDataSourcesOnServerless(Settings_.EnableExternalDataSourcesOnServerless_); + featureFlags.SetEnableExternalDataSources(true); + featureFlags.SetEnableResourcePoolsCounters(true); + featureFlags.SetEnableStreamingQueries(true); + + auto& queryServiceConfig = *appConfig.MutableQueryServiceConfig(); + queryServiceConfig.SetAllExternalDataSourcesAreAvailable(true); + queryServiceConfig.SetProgressStatsPeriodMs(1000); + appConfig.MutableQueryServiceConfig()->AddAvailableExternalDataSources("ObjectStorage"); return appConfig; } diff --git a/ydb/core/protos/kqp_physical.proto b/ydb/core/protos/kqp_physical.proto index 2573510b6d70..6f4c37f84a47 100644 --- a/ydb/core/protos/kqp_physical.proto +++ b/ydb/core/protos/kqp_physical.proto @@ -335,6 +335,34 @@ message TKqpPhyCnSequencer { bytes OutputType = 5; } +message TKqpPhyCnDqSourceStreamLookup { + // + // |<- InputStageRowType + // [maybe wide DQ channel] + // |<- ConnectionInputRowType |<- LookupRowType + // ------------------+ + // |<- ConnectionOutputRowType + // [maybe wide DQ channel] + // |<- OutputStageRowType + // + + bytes InputStageRowType = 1; + bytes OutputStageRowType = 2; + bytes LookupRowType = 3; + bytes ConnectionInputRowType = 4; + bytes ConnectionOutputRowType = 5; + TKqpExternalSource LookupSource = 6; + string LeftLabel = 7; + string RightLabel = 8; + string JoinType = 9; + repeated string LeftJoinKeyNames = 10; + repeated string RightJoinKeyNames = 11; + uint64 CacheLimit = 12; + uint64 CacheTtlSeconds = 13; + uint64 MaxDelayedRows = 14; + bool IsMultiGet = 15; +} + message TKqpPhyConnection { uint32 StageIndex = 1; uint32 OutputIndex = 2; @@ -354,6 +382,7 @@ message TKqpPhyConnection { TKqpPhyCnSequencer Sequencer = 14; TKqpPhyCnParallelUnionAll ParallelUnionAll = 15; TKqpPhyCnVectorResolve VectorResolve = 16; + TKqpPhyCnDqSourceStreamLookup DqSourceStreamLookup = 17; }; } diff --git a/ydb/library/testlib/solomon_helpers/solomon_emulator_helpers.cpp b/ydb/library/testlib/solomon_helpers/solomon_emulator_helpers.cpp new file mode 100644 index 000000000000..4172b63912fe --- /dev/null +++ b/ydb/library/testlib/solomon_helpers/solomon_emulator_helpers.cpp @@ -0,0 +1,43 @@ +#include "solomon_emulator_helpers.h" + +#include +#include + +#include + +namespace NTestUtils { + +namespace { + +TSimpleHttpClient CreateHttpClient() { + return TSimpleHttpClient("localhost", std::stoi(getenv("SOLOMON_HTTP_PORT"))); +} + +TString BuildLocationUrl(const TSolomonLocation& location, const TString& url) { + auto builder = TStringBuilder() << url; + + if (location.IsCloud) { + builder << "?folderId=" << location.FolderId << "&service=" << location.Service; + } else { + builder << "?project=" << location.ProjectId << "&cluster=" << location.FolderId << "&service=" << location.Service; + } + + return builder; +} + +} // anonymous namespace + +void CleanupSolomon(const TSolomonLocation& location) { + DoWithRetry([url = BuildLocationUrl(location, "/cleanup"), client = CreateHttpClient()]() { + TStringStream str; + client.DoPost(url, "", &str); + }, TRetryOptions(3, TDuration::Seconds(1)), true); +} + +TString GetSolomonMetrics(const TSolomonLocation& location) { + TStringStream str; + CreateHttpClient().DoGet(BuildLocationUrl(location, "/metrics/get"), &str); + return str.Str(); +} + +} // namespace NTestUtils diff --git a/ydb/library/testlib/solomon_helpers/solomon_emulator_helpers.h b/ydb/library/testlib/solomon_helpers/solomon_emulator_helpers.h new file mode 100644 index 000000000000..9871f43cdbc0 --- /dev/null +++ b/ydb/library/testlib/solomon_helpers/solomon_emulator_helpers.h @@ -0,0 +1,18 @@ +#pragma once + +#include + +namespace NTestUtils { + +struct TSolomonLocation { + TString ProjectId; // Used only if IsCloud=false + TString FolderId; // Used as cluster ID if IsCloud=false + TString Service; + bool IsCloud = false; +}; + +void CleanupSolomon(const TSolomonLocation& location); + +TString GetSolomonMetrics(const TSolomonLocation& location); + +} // namespace NTestUtils diff --git a/ydb/library/testlib/solomon_helpers/ya.make b/ydb/library/testlib/solomon_helpers/ya.make new file mode 100644 index 000000000000..d4993cfd6f19 --- /dev/null +++ b/ydb/library/testlib/solomon_helpers/ya.make @@ -0,0 +1,14 @@ +LIBRARY() + +SRCS( + solomon_emulator_helpers.cpp +) + +PEERDIR( + library/cpp/http/simple + library/cpp/retry +) + +YQL_LAST_ABI_VERSION() + +END() diff --git a/ydb/library/testlib/ya.make b/ydb/library/testlib/ya.make index 6e00632a333f..1d3ebf3c33f6 100644 --- a/ydb/library/testlib/ya.make +++ b/ydb/library/testlib/ya.make @@ -2,6 +2,7 @@ RECURSE( pq_helpers s3_recipe_helper service_mocks + solomon_helpers ) RECURSE_FOR_TESTS( diff --git a/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.cpp b/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.cpp index 5b481d95d70d..292a0325c052 100644 --- a/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.cpp +++ b/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.cpp @@ -586,13 +586,28 @@ NDqProto::ECheckpointingMode GetTaskCheckpointingMode(const TDqTaskSettings& tas } bool IsIngress(const TDqTaskSettings& task) { - // No inputs at all or the only inputs are sources. - for (const auto& input : task.GetInputs()) { - if (!input.HasSource()) { - return false; + // No inputs at all or there is no input channels with checkpoints. + + const auto& inputs = task.GetInputs(); + if (inputs.empty()) { + return true; + } + + bool hasSource = false; + for (const auto& input : inputs) { + if (input.HasSource()) { + hasSource = true; + continue; + } + + for (const auto& channel : input.GetChannels()) { + if (channel.GetCheckpointingMode() != NDqProto::CHECKPOINTING_MODE_DISABLED) { + return false; + } } } - return true; + + return hasSource; } bool IsEgress(const TDqTaskSettings& task) { diff --git a/ydb/library/yql/dq/actors/compute/dq_compute_actor_impl.h b/ydb/library/yql/dq/actors/compute/dq_compute_actor_impl.h index 7618a3119cc0..dbab28c3c08e 100644 --- a/ydb/library/yql/dq/actors/compute/dq_compute_actor_impl.h +++ b/ydb/library/yql/dq/actors/compute/dq_compute_actor_impl.h @@ -779,7 +779,9 @@ class TDqComputeActorBase : public NActors::TActorBootstrapped void ResumeInputsByCheckpoint() override final { for (auto& [id, channelInfo] : InputChannelsMap) { - channelInfo.ResumeByCheckpoint(); + if (channelInfo.PendingCheckpoint) { + channelInfo.ResumeByCheckpoint(); + } } } diff --git a/ydb/library/yql/dq/opt/dq_opt_join.cpp b/ydb/library/yql/dq/opt/dq_opt_join.cpp index 8b4f8856b748..63fbb13b38ea 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_join.cpp @@ -1,12 +1,14 @@ #include "dq_opt_join.h" #include "dq_opt_phy.h" +#include +#include +#include #include #include -#include -#include -#include #include +#include +#include namespace NYql::NDq { @@ -2067,4 +2069,119 @@ TExprBase DqBuildHashJoin( .Done(); } +namespace { + +bool IsStreamLookup(const TCoEquiJoinTuple& joinTuple) { + for (const auto& outer : joinTuple.Options()) { + for (const auto& inner : outer.Cast()) { + if (auto maybeForceStreamLookupOption = inner.Maybe()) { + if (maybeForceStreamLookupOption.Cast().StringValue() == "forceStreamLookup") { + return true; + } + } + } + } + return false; +} + +IDqOptimization* GetDqOptCallback(const TExprBase& providerRead, TTypeAnnotationContext& typeCtx) { + if (providerRead.Ref().ChildrenSize() > 1 && TCoDataSource::Match(providerRead.Ref().Child(1))) { + auto dataSourceName = providerRead.Ref().Child(1)->Child(0)->Content(); + auto datasource = typeCtx.DataSourceMap.FindPtr(dataSourceName); + YQL_ENSURE(datasource); + return (*datasource)->GetDqOptimization(); + } + return nullptr; +} + +TDqLookupSourceWrap LookupSourceFromSource(TDqSourceWrap source, TExprContext& ctx) { + return Build(ctx, source.Pos()) + .Input(source.Input()) + .DataSource(source.DataSource()) + .RowType(source.RowType()) + .Settings(source.Settings()) + .Done(); +} + +TDqLookupSourceWrap LookupSourceFromRead(TDqReadWrap read, TExprContext& ctx, TTypeAnnotationContext& typeCtx) { // temp replace with yt source + IDqOptimization* dqOptimization = GetDqOptCallback(read.Input(), typeCtx); + YQL_ENSURE(dqOptimization); + auto lookupSourceWrap = dqOptimization->RewriteLookupRead(read.Input().Ptr(), ctx); + YQL_ENSURE(lookupSourceWrap, "Lookup read is not supported"); + return TDqLookupSourceWrap(lookupSourceWrap); +} + +// Recursively walk join tree and replace right-side of StreamLookupJoin +ui32 RewriteStreamJoinTuple(ui32 idx, const TCoEquiJoin& equiJoin, const TCoEquiJoinTuple& joinTuple, std::vector& args, TExprContext& ctx, TTypeAnnotationContext& typeCtx, bool& changed) { + // recursion depth O(args.size()) + Y_ENSURE(idx < args.size()); + + // handle left side + if (!joinTuple.LeftScope().Maybe()) { + idx = RewriteStreamJoinTuple(idx, equiJoin, joinTuple.LeftScope().Cast(), args, ctx, typeCtx, changed); + } else { + ++idx; + } + + // handle right side + if (!joinTuple.RightScope().Maybe()) { + return RewriteStreamJoinTuple(idx, equiJoin, joinTuple.RightScope().Cast(), args, ctx, typeCtx, changed); + } + + Y_ENSURE(idx < args.size()); + + if (!IsStreamLookup(joinTuple)) { + return idx + 1; + } + + auto right = equiJoin.Arg(idx).Cast(); + auto rightList = right.List(); + if (auto maybeExtractMembers = rightList.Maybe()) { + rightList = maybeExtractMembers.Cast().Input(); + } + + TExprNode::TPtr lookupSourceWrap; + if (auto maybeSource = rightList.Maybe()) { + lookupSourceWrap = LookupSourceFromSource(maybeSource.Cast(), ctx).Ptr(); + } else if (auto maybeRead = rightList.Maybe()) { + lookupSourceWrap = LookupSourceFromRead(maybeRead.Cast(), ctx, typeCtx).Ptr(); + } else { + return idx + 1; + } + + changed = true; + args[idx] = + Build(ctx, joinTuple.Pos()) + .List(lookupSourceWrap) + .Scope(right.Scope()) + .Done().Ptr(); + + return idx + 1; +} + +} // anonymous namespace + +TExprBase DqRewriteStreamEquiJoinWithLookup(const TExprBase& node, TExprContext& ctx, TTypeAnnotationContext& typeCtx) { + const auto equiJoin = node.Cast(); + auto argCount = equiJoin.ArgCount(); + const auto joinTuple = equiJoin.Arg(argCount - 2).Cast(); + std::vector args(argCount); + bool changed = false; + auto rightIdx = RewriteStreamJoinTuple(0u, equiJoin, joinTuple, args, ctx, typeCtx, changed); + Y_ENSURE(rightIdx + 2 == argCount); + + if (!changed) { + return node; + } + + // fill copies of remaining args + for (ui32 i = 0; i < argCount; ++i) { + if (!args[i]) { + args[i] = equiJoin.Arg(i).Ptr(); + } + } + + return Build(ctx, node.Pos()).Add(std::move(args)).Done(); +} + } // namespace NYql::NDq diff --git a/ydb/library/yql/dq/opt/dq_opt_join.h b/ydb/library/yql/dq/opt/dq_opt_join.h index b3eea0b87f15..57047bad61f1 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join.h +++ b/ydb/library/yql/dq/opt/dq_opt_join.h @@ -49,5 +49,7 @@ bool DqCollectJoinRelationsWithStats( const NNodes::TCoEquiJoin& equiJoin, const std::function>&, TStringBuf, const TExprNode::TPtr, const std::shared_ptr&)>& collector); +NNodes::TExprBase DqRewriteStreamEquiJoinWithLookup(const NNodes::TExprBase& node, TExprContext& ctx, TTypeAnnotationContext& typeCtx); + } // namespace NDq } // namespace NYql diff --git a/ydb/library/yql/dq/opt/dq_opt_phy.cpp b/ydb/library/yql/dq/opt/dq_opt_phy.cpp index eddd9b265120..9874fc525681 100644 --- a/ydb/library/yql/dq/opt/dq_opt_phy.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_phy.cpp @@ -3463,5 +3463,132 @@ TMaybeNode DqUnorderedOverStageInput(TExprBase node, TExprContext& ct return TExprBase(res); } +namespace { + +bool ValidateStreamLookupJoinFlags(const TDqJoin& join, TExprContext& ctx) { + bool leftAny = false; + bool rightAny = false; + if (const auto maybeFlags = join.Flags()) { + for (auto&& flag: maybeFlags.Cast()) { + auto&& name = flag.StringValue(); + if (name == "LeftAny"sv) { + leftAny = true; + continue; + } else if (name == "RightAny"sv) { + rightAny = true; + continue; + } + } + if (leftAny) { + ctx.AddError(TIssue(ctx.GetPosition(maybeFlags.Cast().Pos()), "Streamlookup ANY LEFT join is not implemented")); + return false; + } + } + + if (!rightAny) { + if (false) { // Temporary change to waring to allow for smooth transition + ctx.AddError(TIssue(ctx.GetPosition(join.Pos()), "Streamlookup: must be LEFT JOIN /*+streamlookup(...)*/ ANY")); + return false; + } else { + ctx.AddWarning(TIssue(ctx.GetPosition(join.Pos()), "(Deprecation) Streamlookup: must be LEFT JOIN /*+streamlookup(...)*/ ANY")); + } + } + + return true; +} + +} // anonymous namespace + +TMaybeNode DqRewriteStreamLookupJoin(TExprBase node, TExprContext& ctx) { + const auto join = node.Cast(); + if (join.JoinAlgo().StringValue() != "StreamLookupJoin") { + return node; + } + + const auto left = join.LeftInput().Maybe(); + if (!left) { + return node; + } + + if (!ValidateStreamLookupJoinFlags(join, ctx)) { + return {}; + } + + TExprNode::TPtr ttl; + TExprNode::TPtr maxCachedRows; + TExprNode::TPtr maxDelayedRows; + TExprNode::TPtr isMultiget; + if (const auto maybeOptions = join.JoinAlgoOptions()) { + for (auto&& option: maybeOptions.Cast()) { + auto&& name = option.Name().Value(); + if (name == "TTL"sv) { + ttl = option.Value().Cast().Ptr(); + } else if (name == "MaxCachedRows"sv) { + maxCachedRows = option.Value().Cast().Ptr(); + } else if (name == "MaxDelayedRows"sv) { + maxDelayedRows = option.Value().Cast().Ptr(); + } else if (name == "MultiGet"sv) { + isMultiget = option.Value().Cast().Ptr(); + } + } + } + + const auto pos = node.Pos(); + + if (!ttl) { + ttl = ctx.NewAtom(pos, 300); + } + + if (!maxCachedRows) { + maxCachedRows = ctx.NewAtom(pos, 1'000'000); + } + + if (!maxDelayedRows) { + maxDelayedRows = ctx.NewAtom(pos, 1'000'000); + } + + auto rightInput = join.RightInput().Ptr(); + if (auto maybe = TExprBase(rightInput).Maybe()) { + rightInput = maybe.Cast().Input().Ptr(); + } + + auto leftLabel = join.LeftLabel().Maybe() ? join.LeftLabel().Cast().Ptr() : ctx.NewAtom(pos, ""); + Y_ENSURE(join.RightLabel().Maybe()); + auto cn = Build(ctx, pos) + .Output(left.Output().Cast()) + .LeftLabel(leftLabel) + .RightInput(rightInput) + .RightLabel(join.RightLabel().Cast()) + .JoinKeys(join.JoinKeys()) + .JoinType(join.JoinType()) + .LeftJoinKeyNames(join.LeftJoinKeyNames()) + .RightJoinKeyNames(join.RightJoinKeyNames()) + .TTL(ttl) + .MaxCachedRows(maxCachedRows) + .MaxDelayedRows(maxDelayedRows); + + if (isMultiget) { + cn.IsMultiget(isMultiget); + } + + auto lambda = Build(ctx, pos) + .Args({"stream"}) + .Body("stream") + .Done(); + const auto stage = Build(ctx, pos) + .Inputs() + .Add(cn.Done()) + .Build() + .Program(lambda) + .Settings(TDqStageSettings().BuildNode(ctx, pos)) + .Done(); + + return Build(ctx, pos) + .Output() + .Stage(stage) + .Index().Build("0") + .Build() + .Done(); +} } // namespace NYql::NDq diff --git a/ydb/library/yql/dq/opt/dq_opt_phy.h b/ydb/library/yql/dq/opt/dq_opt_phy.h index 70fea30826d3..aa1d63366866 100644 --- a/ydb/library/yql/dq/opt/dq_opt_phy.h +++ b/ydb/library/yql/dq/opt/dq_opt_phy.h @@ -180,5 +180,6 @@ NNodes::TExprBase DqPushUnorderedToStage(NNodes::TExprBase node, TExprContext& c NNodes::TMaybeNode DqUnorderedOverStageInput(NNodes::TExprBase node, TExprContext& ctx, IOptimizationContext& optCtx, const TTypeAnnotationContext& typeAnnCtx, const TParentsMap& parentsMap, bool allowStageMultiUsage); +NNodes::TMaybeNode DqRewriteStreamLookupJoin(NNodes::TExprBase node, TExprContext& ctx); } // namespace NYql::NDq diff --git a/ydb/library/yql/dq/tasks/dq_tasks_graph.h b/ydb/library/yql/dq/tasks/dq_tasks_graph.h index dfc7c14a82d7..89cc82f124ce 100644 --- a/ydb/library/yql/dq/tasks/dq_tasks_graph.h +++ b/ydb/library/yql/dq/tasks/dq_tasks_graph.h @@ -325,12 +325,27 @@ class TDqTasksGraph : private TMoveOnly { } bool IsIngress(const TTaskType& task) const { + // No inputs at all or there is no input channels with checkpoints. + + if (!task.Inputs) { + return true; + } + + bool hasSource = false; for (const auto& input : task.Inputs) { - if (!input.SourceType) { - return false; + if (input.SourceType) { + hasSource = true; + continue; + } + + for (ui64 channelId : input.Channels) { + if (GetChannel(channelId).CheckpointingMode != NDqProto::CHECKPOINTING_MODE_DISABLED) { + return false; + } } } - return true; + + return hasSource; } static bool IsInfiniteSourceType(const TString& sourceType) { diff --git a/ydb/library/yql/providers/dq/opt/logical_optimize.cpp b/ydb/library/yql/providers/dq/opt/logical_optimize.cpp index 1457a800c673..70a5f6e7c4fb 100644 --- a/ydb/library/yql/providers/dq/opt/logical_optimize.cpp +++ b/ydb/library/yql/providers/dq/opt/logical_optimize.cpp @@ -26,23 +26,6 @@ using namespace NYql; using namespace NYql::NDq; using namespace NYql::NNodes; -namespace { - -bool IsStreamLookup(const TCoEquiJoinTuple& joinTuple) { - for (const auto& outer: joinTuple.Options()) { - for (const auto& inner: outer.Cast()) { - if (auto maybeForceStreamLookupOption = inner.Maybe()) { - if (maybeForceStreamLookupOption.Cast().StringValue() == "forceStreamLookup") { - return true; - } - } - } - } - return false; -} - -} - /** * DQ Specific cost function and join applicability cost function */ @@ -223,81 +206,8 @@ class TDqsLogicalOptProposalTransformer : public TOptimizeTransformerBase { return node; } - TDqLookupSourceWrap LookupSourceFromSource(TDqSourceWrap source, TExprContext& ctx) { - return Build(ctx, source.Pos()) - .Input(source.Input()) - .DataSource(source.DataSource()) - .RowType(source.RowType()) - .Settings(source.Settings()) - .Done(); - } - - TDqLookupSourceWrap LookupSourceFromRead(TDqReadWrap read, TExprContext& ctx){ //temp replace with yt source - IDqOptimization* dqOptimization = GetDqOptCallback(read.Input()); - YQL_ENSURE(dqOptimization); - auto lookupSourceWrap = dqOptimization->RewriteLookupRead(read.Input().Ptr(), ctx); - YQL_ENSURE(lookupSourceWrap, "Lookup read is not supported"); - return TDqLookupSourceWrap(lookupSourceWrap); - } - - // Recursively walk join tree and replace right-side of StreamLookupJoin - ui32 RewriteStreamJoinTuple(ui32 idx, const TCoEquiJoin& equiJoin, const TCoEquiJoinTuple& joinTuple, std::vector& args, TExprContext& ctx, bool& changed) { - // recursion depth O(args.size()) - Y_ENSURE(idx < args.size()); - // handle left side - if (!joinTuple.LeftScope().Maybe()) { - idx = RewriteStreamJoinTuple(idx, equiJoin, joinTuple.LeftScope().Cast(), args, ctx, changed); - } else { - ++idx; - } - // handle right side - if (!joinTuple.RightScope().Maybe()) { - return RewriteStreamJoinTuple(idx, equiJoin, joinTuple.RightScope().Cast(), args, ctx, changed); - } - Y_ENSURE(idx < args.size()); - if (!IsStreamLookup(joinTuple)) { - return idx + 1; - } - auto right = equiJoin.Arg(idx).Cast(); - auto rightList = right.List(); - if (auto maybeExtractMembers = rightList.Maybe()) { - rightList = maybeExtractMembers.Cast().Input(); - } - TExprNode::TPtr lookupSourceWrap; - if (auto maybeSource = rightList.Maybe()) { - lookupSourceWrap = LookupSourceFromSource(maybeSource.Cast(), ctx).Ptr(); - } else if (auto maybeRead = rightList.Maybe()) { - lookupSourceWrap = LookupSourceFromRead(maybeRead.Cast(), ctx).Ptr(); - } else { - return idx + 1; - } - changed = true; - args[idx] = - Build(ctx, joinTuple.Pos()) - .List(lookupSourceWrap) - .Scope(right.Scope()) - .Done().Ptr(); - return idx + 1; - } - TMaybeNode RewriteStreamEquiJoinWithLookup(TExprBase node, TExprContext& ctx) { - const auto equiJoin = node.Cast(); - auto argCount = equiJoin.ArgCount(); - const auto joinTuple = equiJoin.Arg(argCount - 2).Cast(); - std::vector args(argCount); - bool changed = false; - auto rightIdx = RewriteStreamJoinTuple(0u, equiJoin, joinTuple, args, ctx, changed); - Y_ENSURE(rightIdx + 2 == argCount); - if (!changed) { - return node; - } - // fill copies of remaining args - for (ui32 i = 0; i < argCount; ++i) { - if (!args[i]) { - args[i] = equiJoin.Arg(i).Ptr(); - } - } - return Build(ctx, node.Pos()).Add(std::move(args)).Done(); + return DqRewriteStreamEquiJoinWithLookup(node, ctx, TypesCtx); } TMaybeNode OptimizeEquiJoinWithCosts(TExprBase node, TExprContext& ctx) { @@ -446,16 +356,6 @@ class TDqsLogicalOptProposalTransformer : public TOptimizeTransformerBase { "Distinct is not supported for aggregation with hop"); } - IDqOptimization* GetDqOptCallback(const TExprBase& providerRead) const { - if (providerRead.Ref().ChildrenSize() > 1 && TCoDataSource::Match(providerRead.Ref().Child(1))) { - auto dataSourceName = providerRead.Ref().Child(1)->Child(0)->Content(); - auto datasource = TypesCtx.DataSourceMap.FindPtr(dataSourceName); - YQL_ENSURE(datasource); - return (*datasource)->GetDqOptimization(); - } - return nullptr; - } - private: TDqConfiguration::TPtr Config; TTypeAnnotationContext& TypesCtx; diff --git a/ydb/library/yql/providers/dq/opt/physical_optimize.cpp b/ydb/library/yql/providers/dq/opt/physical_optimize.cpp index 8792d4979e25..f305be8c72e3 100644 --- a/ydb/library/yql/providers/dq/opt/physical_optimize.cpp +++ b/ydb/library/yql/providers/dq/opt/physical_optimize.cpp @@ -264,121 +264,8 @@ class TDqsPhysicalOptProposalTransformer : public TOptimizeTransformerBase { return DqRewriteLeftPureJoin(node, ctx, *getParents(), IsGlobal); } - bool ValidateStreamLookupJoinFlags(const TDqJoin& join, TExprContext& ctx) { - bool leftAny = false; - bool rightAny = false; - if (const auto maybeFlags = join.Flags()) { - for (auto&& flag: maybeFlags.Cast()) { - auto&& name = flag.StringValue(); - if (name == "LeftAny"sv) { - leftAny = true; - continue; - } else if (name == "RightAny"sv) { - rightAny = true; - continue; - } - } - if (leftAny) { - ctx.AddError(TIssue(ctx.GetPosition(maybeFlags.Cast().Pos()), "Streamlookup ANY LEFT join is not implemented")); - return false; - } - } - if (!rightAny) { - if (false) { // Tempoarily change to waring to allow for smooth transition - ctx.AddError(TIssue(ctx.GetPosition(join.Pos()), "Streamlookup: must be LEFT JOIN /*+streamlookup(...)*/ ANY")); - return false; - } else { - ctx.AddWarning(TIssue(ctx.GetPosition(join.Pos()), "(Deprecation) Streamlookup: must be LEFT JOIN /*+streamlookup(...)*/ ANY")); - } - } - return true; - } - TMaybeNode RewriteStreamLookupJoin(TExprBase node, TExprContext& ctx) { - const auto join = node.Cast(); - if (join.JoinAlgo().StringValue() != "StreamLookupJoin") { - return node; - } - - const auto pos = node.Pos(); - const auto left = join.LeftInput().Maybe(); - if (!left) { - return node; - } - - if (!ValidateStreamLookupJoinFlags(join, ctx)) { - return {}; - } - - TExprNode::TPtr ttl; - TExprNode::TPtr maxCachedRows; - TExprNode::TPtr maxDelayedRows; - TExprNode::TPtr isMultiget; - if (const auto maybeOptions = join.JoinAlgoOptions()) { - for (auto&& option: maybeOptions.Cast()) { - auto&& name = option.Name().Value(); - if (name == "TTL"sv) { - ttl = option.Value().Cast().Ptr(); - } else if (name == "MaxCachedRows"sv) { - maxCachedRows = option.Value().Cast().Ptr(); - } else if (name == "MaxDelayedRows"sv) { - maxDelayedRows = option.Value().Cast().Ptr(); - } else if (name == "MultiGet"sv) { - isMultiget = option.Value().Cast().Ptr(); - } - } - } - - if (!ttl) { - ttl = ctx.NewAtom(pos, 300); - } - if (!maxCachedRows) { - maxCachedRows = ctx.NewAtom(pos, 1'000'000); - } - if (!maxDelayedRows) { - maxDelayedRows = ctx.NewAtom(pos, 1'000'000); - } - auto rightInput = join.RightInput().Ptr(); - if (auto maybe = TExprBase(rightInput).Maybe()) { - rightInput = maybe.Cast().Input().Ptr(); - } - auto leftLabel = join.LeftLabel().Maybe() ? join.LeftLabel().Cast().Ptr() : ctx.NewAtom(pos, ""); - Y_ENSURE(join.RightLabel().Maybe()); - auto cn = Build(ctx, pos) - .Output(left.Output().Cast()) - .LeftLabel(leftLabel) - .RightInput(rightInput) - .RightLabel(join.RightLabel().Cast()) - .JoinKeys(join.JoinKeys()) - .JoinType(join.JoinType()) - .LeftJoinKeyNames(join.LeftJoinKeyNames()) - .RightJoinKeyNames(join.RightJoinKeyNames()) - .TTL(ttl) - .MaxCachedRows(maxCachedRows) - .MaxDelayedRows(maxDelayedRows); - - if (isMultiget) { - cn.IsMultiget(isMultiget); - } - - auto lambda = Build(ctx, pos) - .Args({"stream"}) - .Body("stream") - .Done(); - const auto stage = Build(ctx, pos) - .Inputs() - .Add(cn.Done()) - .Build() - .Program(lambda) - .Settings(TDqStageSettings().BuildNode(ctx, pos)) - .Done(); - - return Build(ctx, pos) - .Output() - .Stage(stage) - .Index().Build("0") - .Build() - .Done(); + return DqRewriteStreamLookupJoin(node, ctx); } template diff --git a/ydb/library/yql/providers/dq/task_runner_actor/task_runner_actor.cpp b/ydb/library/yql/providers/dq/task_runner_actor/task_runner_actor.cpp index 8b8d1addad05..4ad197329ad1 100644 --- a/ydb/library/yql/providers/dq/task_runner_actor/task_runner_actor.cpp +++ b/ydb/library/yql/providers/dq/task_runner_actor/task_runner_actor.cpp @@ -521,8 +521,8 @@ class TTaskRunnerActor ev->Get()->Index, std::move(batch), std::move(ev->Get()->Checkpoint), - ev->Get()->CheckpointSize, ev->Get()->Size, + ev->Get()->CheckpointSize, ev->Get()->Finished, ev->Get()->Changed); } diff --git a/ydb/library/yql/providers/generic/connector/libcpp/ut_helpers/connector_client_mock.h b/ydb/library/yql/providers/generic/connector/libcpp/ut_helpers/connector_client_mock.h index 152125db4e27..897a4a156aba 100644 --- a/ydb/library/yql/providers/generic/connector/libcpp/ut_helpers/connector_client_mock.h +++ b/ydb/library/yql/providers/generic/connector/libcpp/ut_helpers/connector_client_mock.h @@ -68,6 +68,11 @@ namespace NYql::NConnector::NTest { return google::protobuf::util::MessageDifferencer::Equals(arg, expected); } + MATCHER_P(RequestRelaxedMatcher, expected, "") { + Y_UNUSED(arg); + return true; + } + #define MATCH_RESULT_WITH_INPUT(INPUT, RESULT_SET, GETTER) \ { \ for (const auto& val : INPUT) { \ @@ -689,13 +694,21 @@ namespace NYql::NConnector::NTest { return *this; } + auto& ValidateArgs(bool validate) { + ValidateArgs_ = validate; + return *this; + } + private: void SetExpectation() { if (ResponseResults_.empty()) { Result(); } - auto& expectBuilder = EXPECT_CALL(*Mock_, ListSplitsImpl(ProtobufRequestMatcher(*Result_))); + auto& expectBuilder = ValidateArgs_ + ? EXPECT_CALL(*Mock_, ListSplitsImpl(ProtobufRequestMatcher(*Result_))) + : EXPECT_CALL(*Mock_, ListSplitsImpl(RequestRelaxedMatcher(*Result_))); + for (auto response : ResponseResults_) { expectBuilder.WillOnce(Return(TIteratorResult{ResponseStatus_, response})); } @@ -705,6 +718,7 @@ namespace NYql::NConnector::NTest { TConnectorClientMock* Mock_ = nullptr; std::vector ResponseResults_; NYdbGrpc::TGrpcStatus ResponseStatus_ {}; + bool ValidateArgs_ = true; }; template @@ -767,6 +781,11 @@ namespace NYql::NConnector::NTest { return *this; } + auto& ValidateArgs(bool validate) { + ValidateArgs_ = validate; + return *this; + } + void FillWithDefaults() { Format(NApi::TReadSplitsRequest::ARROW_IPC_STREAMING); } @@ -777,7 +796,10 @@ namespace NYql::NConnector::NTest { Result(); } - auto& expectBuilder = EXPECT_CALL(*Mock_, ReadSplitsImpl(ProtobufRequestMatcher(*Result_))); + auto& expectBuilder = ValidateArgs_ + ? EXPECT_CALL(*Mock_, ReadSplitsImpl(ProtobufRequestMatcher(*Result_))) + : EXPECT_CALL(*Mock_, ReadSplitsImpl(RequestRelaxedMatcher(*Result_))); + for (auto response : ResponseResults_) { expectBuilder.WillOnce(Return(TIteratorResult{ResponseStatus_, response})); } @@ -787,6 +809,7 @@ namespace NYql::NConnector::NTest { TConnectorClientMock* Mock_ = nullptr; std::vector ResponseResults_; NYdbGrpc::TGrpcStatus ResponseStatus_ {}; + bool ValidateArgs_ = true; }; TDescribeTableExpectationBuilder ExpectDescribeTable() { diff --git a/ydb/library/yql/providers/solomon/actors/ut/dq_solomon_write_actor_ut.cpp b/ydb/library/yql/providers/solomon/actors/ut/dq_solomon_write_actor_ut.cpp index 17468601434d..be49e1eac46f 100644 --- a/ydb/library/yql/providers/solomon/actors/ut/dq_solomon_write_actor_ut.cpp +++ b/ydb/library/yql/providers/solomon/actors/ut/dq_solomon_write_actor_ut.cpp @@ -1,22 +1,25 @@ #include "ut_helpers.h" +#include + #include #include #include -#include - namespace NYql::NDq { using namespace NKikimr::NMiniKQL; +using namespace NTestUtils; constexpr TDuration WaitTimeout = TDuration::Seconds(10); namespace { - void TestWriteBigBatch(bool isCloud) { + +void TestWriteBigBatch(bool isCloud) { const int batchSize = 7500; - CleanupSolomon("cloudId1", "folderId1", "custom", isCloud); + const TSolomonLocation location = {.ProjectId = "cloudId1", .FolderId = "folderId1", .Service = "custom", .IsCloud = isCloud}; + CleanupSolomon(location); TFakeCASetup setup; InitAsyncOutput(setup, BuildSolomonShardSettings(isCloud)); @@ -25,25 +28,27 @@ namespace { setup.AsyncOutputWrite([](NKikimr::NMiniKQL::THolderFactory& holderFactory){ TUnboxedValueBatch res; for (int i = 0; i < batchSize; i++) { - res.emplace_back(CreateStruct(holderFactory, { - NUdf::TUnboxedValuePod(static_cast::TLayout>(i + 200000)), - NKikimr::NMiniKQL::MakeString(std::to_string(i)), - NUdf::TUnboxedValuePod(678) - })); + res.emplace_back(CreateStruct(holderFactory, { + NUdf::TUnboxedValuePod(static_cast::TLayout>(i + 200000)), + NKikimr::NMiniKQL::MakeString(std::to_string(i)), + NUdf::TUnboxedValuePod(678) + })); } return res; }); UNIT_ASSERT_C(!issue.Wait(WaitTimeout), issue.GetValue().ToString()); - const auto metrics = GetSolomonMetrics("folderId1", "custom"); - UNIT_ASSERT_EQUAL(GetMetricsCount(metrics), batchSize); - } + const auto metrics = GetSolomonMetrics(location); + UNIT_ASSERT_VALUES_EQUAL(GetMetricsCount(metrics), batchSize); } +} // anonymous namespace + Y_UNIT_TEST_SUITE(TDqSolomonWriteActorTest) { Y_UNIT_TEST(TestWriteFormat) { - CleanupSolomon("cloudId1", "folderId1", "custom", true); + const TSolomonLocation location = {.ProjectId = "cloudId1", .FolderId = "folderId1", .Service = "custom", .IsCloud = true}; + CleanupSolomon(location); TFakeCASetup setup; InitAsyncOutput(setup, BuildSolomonShardSettings(true)); @@ -60,7 +65,7 @@ Y_UNIT_TEST_SUITE(TDqSolomonWriteActorTest) { }); UNIT_ASSERT_C(!issue.Wait(WaitTimeout), issue.GetValue().ToString()); - const auto metrics = GetSolomonMetrics("folderId1", "custom"); + const auto metrics = GetSolomonMetrics(location); const auto expected = R"([ { "labels": [ @@ -81,46 +86,47 @@ Y_UNIT_TEST_SUITE(TDqSolomonWriteActorTest) { } Y_UNIT_TEST(TestWriteBigBatchMonitoring) { - TestWriteBigBatch(true); + TestWriteBigBatch(true); } Y_UNIT_TEST(TestWriteBigBatchSolomon) { - //TestWriteBigBatch(false); + TestWriteBigBatch(false); } Y_UNIT_TEST(TestWriteWithTimeseries) { - const int batchSize = 10; - CleanupSolomon("cloudId1", "folderId1", "custom", true); + const int batchSize = 10; + const TSolomonLocation location = {.ProjectId = "cloudId1", .FolderId = "folderId1", .Service = "custom", .IsCloud = true}; + CleanupSolomon(location); - TFakeCASetup setup; - InitAsyncOutput(setup, BuildSolomonShardSettings(true)); + TFakeCASetup setup; + InitAsyncOutput(setup, BuildSolomonShardSettings(true)); - auto issue = setup.AsyncOutputPromises.Issue.GetFuture(); - setup.AsyncOutputWrite([](NKikimr::NMiniKQL::THolderFactory& holderFactory){ - TUnboxedValueBatch res; + auto issue = setup.AsyncOutputPromises.Issue.GetFuture(); + setup.AsyncOutputWrite([](NKikimr::NMiniKQL::THolderFactory& holderFactory){ + TUnboxedValueBatch res; - for (int i = 0; i < batchSize; i++) { - res.emplace_back(CreateStruct(holderFactory, { - NUdf::TUnboxedValuePod(static_cast::TLayout>(i + 200000)), - NKikimr::NMiniKQL::MakeString("123"), - NUdf::TUnboxedValuePod(678) - })); - } + for (int i = 0; i < batchSize; i++) { + res.emplace_back(CreateStruct(holderFactory, { + NUdf::TUnboxedValuePod(static_cast::TLayout>(i + 200000)), + NKikimr::NMiniKQL::MakeString("123"), + NUdf::TUnboxedValuePod(678) + })); + } - return res; - }); - UNIT_ASSERT_C(!issue.Wait(WaitTimeout), issue.GetValue().ToString()); + return res; + }); + UNIT_ASSERT_C(!issue.Wait(WaitTimeout), issue.GetValue().ToString()); - const auto metrics = GetSolomonMetrics("folderId1", "custom"); - UNIT_ASSERT_EQUAL(GetMetricsCount(metrics), batchSize); + const auto metrics = GetSolomonMetrics(location); + UNIT_ASSERT_VALUES_EQUAL(GetMetricsCount(metrics), batchSize); } Y_UNIT_TEST(TestCheckpoints) { - const int batchSize = 2400; - - { + const int batchSize = 2400; TFakeCASetup setup; - CleanupSolomon("cloudId1", "folderId1", "custom", true); + + const TSolomonLocation location = {.ProjectId = "cloudId1", .FolderId = "folderId1", .Service = "custom", .IsCloud = true}; + CleanupSolomon(location); InitAsyncOutput(setup, BuildSolomonShardSettings(true)); auto stateSaved = setup.AsyncOutputPromises.StateSaved.GetFuture(); @@ -128,26 +134,26 @@ Y_UNIT_TEST_SUITE(TDqSolomonWriteActorTest) { TUnboxedValueBatch res; for (int i = 0; i < batchSize; i++) { - res.emplace_back(CreateStruct(holderFactory, { - NUdf::TUnboxedValuePod(static_cast::TLayout>(i + 200000)), - NKikimr::NMiniKQL::MakeString(std::to_string(i)), - NUdf::TUnboxedValuePod(678) - })); + res.emplace_back(CreateStruct(holderFactory, { + NUdf::TUnboxedValuePod(static_cast::TLayout>(i + 200000)), + NKikimr::NMiniKQL::MakeString(std::to_string(i)), + NUdf::TUnboxedValuePod(678) + })); } return res; }, CreateCheckpoint(1)); UNIT_ASSERT(stateSaved.Wait(WaitTimeout)); - const auto metrics = GetSolomonMetrics("folderId1", "custom"); - UNIT_ASSERT_EQUAL(GetMetricsCount(metrics), batchSize); - } + const auto metrics = GetSolomonMetrics(location); + UNIT_ASSERT_VALUES_EQUAL(GetMetricsCount(metrics), batchSize); } Y_UNIT_TEST(TestShouldReturnAfterCheckpoint) { - { TFakeCASetup setup; - CleanupSolomon("cloudId1", "folderId1", "custom", true); + + const TSolomonLocation location = {.ProjectId = "cloudId1", .FolderId = "folderId1", .Service = "custom", .IsCloud = true}; + CleanupSolomon(location); InitAsyncOutput(setup, BuildSolomonShardSettings(true)); auto stateSaved = setup.AsyncOutputPromises.StateSaved.GetFuture(); @@ -169,14 +175,13 @@ Y_UNIT_TEST_SUITE(TDqSolomonWriteActorTest) { NUdf::TUnboxedValuePod(static_cast::TLayout>(200001)), NKikimr::NMiniKQL::MakeString("cba"), NUdf::TUnboxedValuePod(678) - })); - return res; + })); + return res; }); UNIT_ASSERT_C(!issue.Wait(WaitTimeout), issue.GetValue().ToString()); - const auto metrics = GetSolomonMetrics("folderId1", "custom"); - UNIT_ASSERT_EQUAL(GetMetricsCount(metrics), 2); - } + const auto metrics = GetSolomonMetrics(location); + UNIT_ASSERT_VALUES_EQUAL(GetMetricsCount(metrics), 2); } } diff --git a/ydb/library/yql/providers/solomon/actors/ut/ut_helpers.cpp b/ydb/library/yql/providers/solomon/actors/ut/ut_helpers.cpp index 971dbd1af781..a37b378abff8 100644 --- a/ydb/library/yql/providers/solomon/actors/ut/ut_helpers.cpp +++ b/ydb/library/yql/providers/solomon/actors/ut/ut_helpers.cpp @@ -60,32 +60,6 @@ void InitAsyncOutput( }); } -void CleanupSolomon(TString cloudId, TString folderId, TString service, bool isCloud) { - const auto solomonPort = TString(getenv("SOLOMON_HTTP_PORT")); - TSimpleHttpClient httpClient("localhost", std::stoi(solomonPort)); - TStringStream str; - TStringBuilder builder; - builder << "/cleanup"; - if (isCloud) { - builder << "?folderId=" << folderId << "&service=" << service; - } else { - builder << "?project=" << cloudId << "&cluster=" << folderId << "&service=" << service; - } - - DoWithRetry( - [&]{ httpClient.DoPost(builder, "", &str); }, - TRetryOptions(3, TDuration::Seconds(1)), - true); -} - -TString GetSolomonMetrics(TString folderId, TString service) { - const auto solomonPort = TString(getenv("SOLOMON_HTTP_PORT")); - TSimpleHttpClient httpClient("localhost", std::stoi(solomonPort)); - TStringStream str; - httpClient.DoGet("/metrics/get?folderId=" + folderId + "&service=" + service, &str); - return TString(str.Str()); -} - NSo::NProto::TDqSolomonShard BuildSolomonShardSettings(bool isCloud) { NSo::NProto::TDqSolomonShard settings; settings.SetEndpoint(TString(getenv("SOLOMON_HTTP_ENDPOINT"))); diff --git a/ydb/library/yql/providers/solomon/actors/ut/ut_helpers.h b/ydb/library/yql/providers/solomon/actors/ut/ut_helpers.h index b858a2d95128..174ba76aa35c 100644 --- a/ydb/library/yql/providers/solomon/actors/ut/ut_helpers.h +++ b/ydb/library/yql/providers/solomon/actors/ut/ut_helpers.h @@ -21,10 +21,6 @@ void InitAsyncOutput( NSo::NProto::TDqSolomonShard&& settings, i64 freeSpace = 100000); -void CleanupSolomon(TString cloudId, TString folderId, TString service, bool isCloud); - -TString GetSolomonMetrics(TString folderId, TString service); - NSo::NProto::TDqSolomonShard BuildSolomonShardSettings(bool isCloud); NUdf::TUnboxedValue CreateStruct( diff --git a/ydb/library/yql/providers/solomon/actors/ut/ya.make b/ydb/library/yql/providers/solomon/actors/ut/ya.make index 0fd0a6a037c4..05f63bff0e1c 100644 --- a/ydb/library/yql/providers/solomon/actors/ut/ya.make +++ b/ydb/library/yql/providers/solomon/actors/ut/ya.make @@ -11,13 +11,14 @@ PEERDIR( library/cpp/http/simple library/cpp/retry ydb/core/testlib/basics + ydb/library/testlib/solomon_helpers + ydb/library/yql/providers/common/ut_helpers yql/essentials/minikql/computation/llvm16 yql/essentials/minikql/comp_nodes/llvm16 + yql/essentials/providers/common/comp_nodes yql/essentials/public/udf/service/exception_policy yql/essentials/sql yql/essentials/sql/pg_dummy - yql/essentials/providers/common/comp_nodes - ydb/library/yql/providers/common/ut_helpers ) YQL_LAST_ABI_VERSION() diff --git a/ydb/library/yql/providers/solomon/expr_nodes/yql_solomon_expr_nodes.json b/ydb/library/yql/providers/solomon/expr_nodes/yql_solomon_expr_nodes.json index a23eb6c22289..182394ea97cb 100644 --- a/ydb/library/yql/providers/solomon/expr_nodes/yql_solomon_expr_nodes.json +++ b/ydb/library/yql/providers/solomon/expr_nodes/yql_solomon_expr_nodes.json @@ -104,6 +104,17 @@ {"Index": 3, "Name": "Input", "Type": "TExprBase"} ] }, + { + "Name": "TSoInsert", + "Base": "TCallable", + "Match": {"Type": "Callable", "Name": "SoInsert"}, + "Children": [ + {"Index": 0, "Name": "World", "Type": "TExprBase"}, + {"Index": 1, "Name": "DataSink", "Type": "TSoDataSink"}, + {"Index": 2, "Name": "Shard", "Type": "TCoAtom"}, + {"Index": 3, "Name": "Input", "Type": "TExprBase"} + ] + }, { "Name": "TSoShard", "Base": "TCallable", diff --git a/ydb/library/yql/providers/solomon/provider/yql_solomon_datasink_type_ann.cpp b/ydb/library/yql/providers/solomon/provider/yql_solomon_datasink_type_ann.cpp index 5414d7e6d764..b0f6831bebad 100644 --- a/ydb/library/yql/providers/solomon/provider/yql_solomon_datasink_type_ann.cpp +++ b/ydb/library/yql/providers/solomon/provider/yql_solomon_datasink_type_ann.cpp @@ -18,6 +18,7 @@ class TSolomonDataSinkTypeAnnotationTransformer : public TVisitorTransformerBase AddHandler({TSoWriteToShard::CallableName()}, Hndl(&TSelf::HandleWriteToShard)); AddHandler({TSoShard::CallableName()}, Hndl(&TSelf::HandleSoShard)); AddHandler({TCoCommit::CallableName()}, Hndl(&TSelf::HandleCommit)); + AddHandler({TSoInsert::CallableName()}, Hndl(&TSelf::HandleInsert)); } private: @@ -25,85 +26,29 @@ class TSolomonDataSinkTypeAnnotationTransformer : public TVisitorTransformerBase if (!EnsureArgsCount(input.Ref(), 4, ctx)) { return TStatus::Error; } + TSoWriteToShard write = input.Cast(); if (!EnsureWorldType(write.World().Ref(), ctx)) { return TStatus::Error; } + if (!EnsureSpecificDataSink(write.DataSink().Ref(), SolomonProviderName, ctx)) { return TStatus::Error; } + if (!EnsureAtom(write.Shard().Ref(), ctx)) { return TStatus::Error; } if (!State_->IsRtmrMode()) { + const auto& writeInput = write.Input().Ref(); + const auto inputPos = writeInput.Pos(); const TTypeAnnotationNode* inputItemType = nullptr; - if (!EnsureNewSeqType(write.Input().Pos(), *write.Input().Ref().GetTypeAnn(), ctx, &inputItemType)) { + if (!EnsureNewSeqType(inputPos, *writeInput.GetTypeAnn(), ctx, &inputItemType)) { return TStatus::Error; } - if (!EnsureStructType(write.Input().Pos(), *inputItemType, ctx)) { - return TStatus::Error; - } - - auto structType = inputItemType->Cast(); - - bool hasTimestampMember = false; - ui32 labelMembers = 0; - ui32 sensorMembers = 0; - - for (auto* structItem : structType->GetItems()) { - const auto itemName = structItem->GetName(); - const TDataExprType* itemType = nullptr; - - bool isOptional = false; - if (!IsDataOrOptionalOfData(structItem->GetItemType(), isOptional, itemType)) { - return TStatus::Error; - } - - const auto dataType = NUdf::GetDataTypeInfo(itemType->GetSlot()); - - if (dataType.Features & NUdf::DateType || dataType.Features & NUdf::TzDateType) { - if (hasTimestampMember) { - ctx.AddError(TIssue(ctx.GetPosition(write.Input().Pos()), "Multiple timestamps should not used when writing into Monitoring")); - return TStatus::Error; - } - hasTimestampMember = true; - continue; - } - - if (isOptional) { - ctx.AddError(TIssue(ctx.GetPosition(write.Input().Pos()), TStringBuilder() << "Optional types for labels and metric values are not supported in writing into Monitoring. FieldName: " << itemName)); - return TStatus::Error; - } - - if (dataType.Features & NUdf::StringType) { - labelMembers++; - } else if (dataType.Features & NUdf::NumericType) { - sensorMembers++; - } else { - ctx.AddError(TIssue(ctx.GetPosition(write.Input().Pos()), TStringBuilder() << "Field " << itemName << " of type " << dataType.Name << " could not be written into Monitoring")); - return TStatus::Error; - } - } - - if (!hasTimestampMember) { - ctx.AddError(TIssue(ctx.GetPosition(write.Input().Pos()), "Timestamp wasn't provided for Monitoring")); - return TStatus::Error; - } - - if (!sensorMembers) { - ctx.AddError(TIssue(ctx.GetPosition(write.Input().Pos()), "No sensors were provided for Monitoring")); - return TStatus::Error; - } - - if (labelMembers > SolomonMaxLabelsCount) { - ctx.AddError(TIssue(ctx.GetPosition(write.Input().Pos()), TStringBuilder() << "Max labels count is " << SolomonMaxLabelsCount << " but " << labelMembers << " were provided")); - return TStatus::Error; - } - - if (sensorMembers > SolomonMaxSensorsCount) { - ctx.AddError(TIssue(ctx.GetPosition(write.Input().Pos()), TStringBuilder() << "Max sensors count is " << SolomonMaxSensorsCount << " but " << sensorMembers << " were provided")); + if (!ValidateWriteTypeAnnotation(inputPos, inputItemType, ctx)) { return TStatus::Error; } } @@ -164,6 +109,110 @@ class TSolomonDataSinkTypeAnnotationTransformer : public TVisitorTransformerBase return TStatus::Ok; } + static TStatus HandleInsert(TExprBase input, TExprContext& ctx) { + if (!EnsureArgsCount(input.Ref(), 4U, ctx)) { + return TStatus::Error; + } + + const auto insert = input.Cast(); + if (!EnsureWorldType(insert.World().Ref(), ctx)) { + return TStatus::Error; + } + + if (!EnsureSpecificDataSink(insert.DataSink().Ref(), SolomonProviderName, ctx)) { + return TStatus::Error; + } + + if (!EnsureAtom(insert.Shard().Ref(), ctx)) { + return TStatus::Error; + } + + const auto& insertInput = insert.Input().Ref(); + const auto inputPos = insertInput.Pos(); + const TTypeAnnotationNode* inputItemType = nullptr; + if (!EnsureNewSeqType(inputPos, *insertInput.GetTypeAnn(), ctx, &inputItemType)) { + return TStatus::Error; + } + + if (!ValidateWriteTypeAnnotation(inputPos, inputItemType, ctx)) { + return TStatus::Error; + } + + input.Ptr()->SetTypeAnn(ctx.MakeType(TTypeAnnotationNode::TListType{ + ctx.MakeType(inputItemType) + })); + return TStatus::Ok; + } + + static bool ValidateWriteTypeAnnotation(TPositionHandle position, const TTypeAnnotationNode* inputItemType, TExprContext& ctx) { + if (!EnsureStructType(position, *inputItemType, ctx)) { + return false; + } + + auto structType = inputItemType->Cast(); + + bool hasTimestampMember = false; + ui32 labelMembers = 0; + ui32 sensorMembers = 0; + + for (auto* structItem : structType->GetItems()) { + const auto itemName = structItem->GetName(); + const TDataExprType* itemType = nullptr; + + bool isOptional = false; + if (!IsDataOrOptionalOfData(structItem->GetItemType(), isOptional, itemType)) { + return false; + } + + const auto dataType = NUdf::GetDataTypeInfo(itemType->GetSlot()); + + if (dataType.Features & NUdf::DateType || dataType.Features & NUdf::TzDateType) { + if (hasTimestampMember) { + ctx.AddError(TIssue(ctx.GetPosition(position), "Multiple timestamps should not be used when writing into Monitoring")); + return false; + } + hasTimestampMember = true; + continue; + } + + if (isOptional) { + ctx.AddError(TIssue(ctx.GetPosition(position), TStringBuilder() << "Optional types for labels and metric values are not supported in writing into Monitoring. FieldName: " << itemName)); + return false; + } + + if (dataType.Features & NUdf::StringType) { + labelMembers++; + } else if (dataType.Features & NUdf::NumericType) { + sensorMembers++; + } else { + ctx.AddError(TIssue(ctx.GetPosition(position), TStringBuilder() << "Field " << itemName << " of type " << dataType.Name << " could not be written into Monitoring")); + return false; + } + } + + if (!hasTimestampMember) { + ctx.AddError(TIssue(ctx.GetPosition(position), "Timestamp wasn't provided for Monitoring")); + return false; + } + + if (!sensorMembers) { + ctx.AddError(TIssue(ctx.GetPosition(position), "No sensors were provided for Monitoring")); + return false; + } + + if (labelMembers > SolomonMaxLabelsCount) { + ctx.AddError(TIssue(ctx.GetPosition(position), TStringBuilder() << "Max labels count is " << SolomonMaxLabelsCount << " but " << labelMembers << " were provided")); + return false; + } + + if (sensorMembers > SolomonMaxSensorsCount) { + ctx.AddError(TIssue(ctx.GetPosition(position), TStringBuilder() << "Max sensors count is " << SolomonMaxSensorsCount << " but " << sensorMembers << " were provided")); + return false; + } + + return true; + } + TSolomonState::TPtr State_; }; diff --git a/ydb/library/yql/providers/solomon/provider/yql_solomon_dq_integration.cpp b/ydb/library/yql/providers/solomon/provider/yql_solomon_dq_integration.cpp index 34fb99a3a999..e80cef667195 100644 --- a/ydb/library/yql/providers/solomon/provider/yql_solomon_dq_integration.cpp +++ b/ydb/library/yql/providers/solomon/provider/yql_solomon_dq_integration.cpp @@ -280,7 +280,26 @@ class TSolomonDqIntegration: public TDqIntegrationBase { } TMaybe CanWrite(const TExprNode& write, TExprContext&) override { - return TSoWrite::Match(&write); + if (!State_->WriteThroughDqIntegration) { + return TSoWrite::Match(&write); + } + + return TSoWriteToShard::Match(&write); + } + + TExprNode::TPtr WrapWrite(const TExprNode::TPtr& writeNode, TExprContext& ctx) override { + if (!State_->WriteThroughDqIntegration) { + return writeNode; + } + + const auto write = TMaybeNode(writeNode).Cast(); + return Build(ctx, write.Pos()) + .World(write.World()) + .DataSink(write.DataSink()) + .Shard(write.Shard()) + .Input(write.Input()) + .Done() + .Ptr(); } void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& protoSettings, TString& sourceType, size_t maxTasksPerStage, TExprContext&) override { diff --git a/ydb/library/yql/providers/solomon/provider/yql_solomon_physical_optimize.cpp b/ydb/library/yql/providers/solomon/provider/yql_solomon_physical_optimize.cpp index f372b50b9257..5f3ee78b3a49 100644 --- a/ydb/library/yql/providers/solomon/provider/yql_solomon_physical_optimize.cpp +++ b/ydb/library/yql/providers/solomon/provider/yql_solomon_physical_optimize.cpp @@ -62,7 +62,10 @@ class TSoPhysicalOptProposalTransformer : public TOptimizeTransformerBase { , State_(std::move(state)) { #define HNDL(name) "PhysicalOptimizer-"#name, Hndl(&TSoPhysicalOptProposalTransformer::name) - AddHandler(0, &TSoWriteToShard::Match, HNDL(SoWriteToShard)); + if (!State_->WriteThroughDqIntegration) { + AddHandler(0, &TSoWriteToShard::Match, HNDL(SoWriteToShard)); + } + AddHandler(0, &TSoInsert::Match, HNDL(SoInsert)); AddHandler(0, &TCoLeft::Match, HNDL(TrimReadWorld)); #undef HNDL @@ -80,42 +83,38 @@ class TSoPhysicalOptProposalTransformer : public TOptimizeTransformerBase { return TExprBase(maybeRead.Cast().World().Ptr()); } - TMaybeNode SoWriteToShard(TExprBase node, TExprContext& ctx, IOptimizationContext& optCtx, const TGetParents& getParents) const { - if (State_->IsRtmrMode()) { - return node; - } - - auto write = node.Cast(); - if (!TDqCnUnionAll::Match(write.Input().Raw())) { + TMaybe BuildSinkStage(TPositionHandle writePos, TSoDataSink dataSink, TCoAtom writeShard, TExprBase input, TExprContext& ctx, const TGetParents& getParents) const { + const auto maybeDqUnion = input.Maybe(); + if (!maybeDqUnion) { // If input is not DqCnUnionAll, it means not all dq optimizations are done yet - return node; + return {}; } - const TParentsMap* parentsMap = getParents(); - auto dqUnion = write.Input().Cast(); + const auto dqUnion = maybeDqUnion.Cast(); + const auto* parentsMap = getParents(); if (!NDq::IsSingleConsumerConnection(dqUnion, *parentsMap)) { - return node; + return {}; } - YQL_CLOG(INFO, ProviderSolomon) << "Optimize SoWriteToShard"; + YQL_CLOG(INFO, ProviderSolomon) << "Optimize insert into solomon (SoWriteToShard / SoInsert)"; - const auto solomonCluster = TString(write.DataSink().Cluster().Value()); - auto* typeAnn = write.Input().Ref().GetTypeAnn(); + const auto* typeAnn = input.Ref().GetTypeAnn(); const TTypeAnnotationNode* inputItemType = nullptr; - if (!EnsureNewSeqType(write.Input().Pos(), *typeAnn, ctx, &inputItemType)) { + if (!EnsureNewSeqType(input.Pos(), *typeAnn, ctx, &inputItemType)) { return {}; } - auto rowTypeNode = ExpandType(write.Pos(), *inputItemType, ctx); - auto shard = BuildSolomonShard(write.Shard().Cast(), TExprBase(rowTypeNode), ctx, solomonCluster); + const auto rowTypeNode = ExpandType(writePos, *inputItemType, ctx); + const TString solomonCluster(dataSink.Cluster()); + const auto shard = BuildSolomonShard(writeShard, TExprBase(rowTypeNode), ctx, solomonCluster); - auto dqSink = Build(ctx, write.Pos()) - .DataSink(write.DataSink()) + const auto dqSink = Build(ctx, writePos) + .DataSink(dataSink) .Settings(shard) .Index(dqUnion.Output().Index()) .Done(); - TDqStage inputStage = dqUnion.Output().Stage().Cast(); + const auto inputStage = dqUnion.Output().Stage().Cast(); auto sinksBuilder = Build(ctx, inputStage.Pos()); if (inputStage.Outputs()) { @@ -123,18 +122,38 @@ class TSoPhysicalOptProposalTransformer : public TOptimizeTransformerBase { } sinksBuilder.Add(dqSink); - auto dqStageWithSink = Build(ctx, inputStage.Pos()) + return Build(ctx, inputStage.Pos()) .InitFrom(inputStage) .Outputs(sinksBuilder.Done()) .Done(); + } - auto dqQueryBuilder = Build(ctx, write.Pos()); - dqQueryBuilder.World(write.World()); - dqQueryBuilder.SinkStages().Add(dqStageWithSink).Build(); + TMaybeNode SoWriteToShard(TExprBase node, TExprContext& ctx, const TGetParents& getParents) const { + if (State_->IsRtmrMode()) { + return node; + } + + const auto write = node.Cast(); + const auto stage = BuildSinkStage(write.Pos(), write.DataSink(), write.Shard(), write.Input(), ctx, getParents); + if (!stage) { + return node; + } - optCtx.RemapNode(inputStage.Ref(), dqStageWithSink.Ptr()); + return Build(ctx, write.Pos()) + .World(write.World()) + .SinkStages() + .Add(*stage) + .Build() + .Done(); + } + + TMaybeNode SoInsert(TExprBase node, TExprContext& ctx, const TGetParents& getParents) const { + const auto insert = node.Cast(); + if (const auto stage = BuildSinkStage(insert.Pos(), insert.DataSink(), insert.Shard(), insert.Input(), ctx, getParents)) { + return *stage; + } - return dqQueryBuilder.Done(); + return node; } private: diff --git a/ydb/library/yql/providers/solomon/provider/yql_solomon_provider.h b/ydb/library/yql/providers/solomon/provider/yql_solomon_provider.h index 3d89dc4916e6..5fa979236612 100644 --- a/ydb/library/yql/providers/solomon/provider/yql_solomon_provider.h +++ b/ydb/library/yql/providers/solomon/provider/yql_solomon_provider.h @@ -22,6 +22,7 @@ struct TSolomonState : public TThrRefBase public: bool SupportRtmrMode = true; + bool WriteThroughDqIntegration = false; ISolomonGateway::TPtr Gateway; TTypeAnnotationContext* Types = nullptr; diff --git a/ydb/tests/fq/generic/streaming/test_join.py b/ydb/tests/fq/generic/streaming/test_join.py index 78fdbba9038a..d86034c65a46 100644 --- a/ydb/tests/fq/generic/streaming/test_join.py +++ b/ydb/tests/fq/generic/streaming/test_join.py @@ -539,7 +539,7 @@ def freeze(json): e.Data as data, u.id as lookup from $input as e - left join {streamlookup} ydb_conn_{table_name}.{table_name} as u + left join {streamlookup} any ydb_conn_{table_name}.{table_name} as u on(AsList(e.Data) = u.data) -- MultiGet true ; @@ -582,7 +582,7 @@ def freeze(json): u.data as lookup from $input as e - left join {streamlookup} ydb_conn_{table_name}.{table_name} as u + left join {streamlookup} any ydb_conn_{table_name}.{table_name} as u on(e.user = u.id) -- MultiGet true ; @@ -656,7 +656,7 @@ def freeze(json): u.data as lookup from $input as e - left join {streamlookup} ydb_conn_{table_name}.{table_name} as u + left join {streamlookup} any ydb_conn_{table_name}.{table_name} as u on(e.user = u.id) -- MultiGet true ; @@ -713,7 +713,7 @@ def freeze(json): $enriched = select a, b, c, d, e, f, za, yb, yc, zd from $input as e - left join {streamlookup} $listified as u + left join {streamlookup} any $listified as u on(e.za = u.a AND e.yb = u.b) -- MultiGet true ; @@ -760,9 +760,9 @@ def freeze(json): $enriched = select u.a as la, u.b as lb, u.c as lc, u2.a as sa, u2.b as sb, u2.c as sc, lza, lyb, sza, syb, yc from $input as e - left join {streamlookup} $listified as u + left join {streamlookup} any $listified as u on(e.lza = u.a AND e.lyb = u.b) - left join /*+streamlookup()*/ $listified as u2 + left join /*+streamlookup()*/ any $listified as u2 on(e.sza = u2.a AND e.syb = u2.b) -- MultiGet true ; diff --git a/ydb/tests/tools/kqprun/configuration/app_config.conf b/ydb/tests/tools/kqprun/configuration/app_config.conf index 94cd16d5ad98..d746387c5b3a 100644 --- a/ydb/tests/tools/kqprun/configuration/app_config.conf +++ b/ydb/tests/tools/kqprun/configuration/app_config.conf @@ -66,6 +66,7 @@ FeatureFlags { EnableStreamingQueries: true EnableMetadataObjectsOnServerless: true EnableExternalDataSourcesOnServerless: true + EnableSchemaSecrets: true } KQPConfig {