Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 43 additions & 6 deletions config/tests/nestedConfig.data.xml
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,15 @@
<file path="config/daqsystemtest/hosts.data.xml"/>
</include>

<comments>
<comment creation-time="20240826T084126" created-by="maroda" created-on="np04-srv-015.cern.ch" author="maroda" text="Correct cern stream"/>
<comment creation-time="20240909T081245" created-by="glehmann" created-on="np04-srv-024.cern.ch" author="glehmann" text=" "/>
<comment creation-time="20240909T082108" created-by="glehmann" created-on="np04-srv-024.cern.ch" author="glehmann" text=" "/>
<comment creation-time="20240918T124331" created-by="maroda" created-on="np04-srv-015.cern.ch" author="maroda" text="change local session opmon URI"/>
</comments>

<obj class="Variable" id="drunc-simulate-failure-long-command-execution">
<attr name="name" type="string" val="DRUNC_FAILURE_TESTING_CMD_DELAY"/>
<attr name="value" type="string" val="70"/>
</obj>
<obj class="Variable" id="drunc-simulate-failure-long-command-execution-app-name">
<attr name="name" type="string" val="DRUNC_FAILURE_TESTING_CMD_DELAY_APP_NAME"/>
<attr name="value" type="string" val="bottom-segment-2-application"/>
</obj>

<obj class="Service" id="controller_control">
<attr name="protocol" type="string" val="grpc"/>
Expand All @@ -104,6 +106,16 @@
<rel name="modules">
</rel>
</obj>
<obj class="DaqApplication" id="bottom-segment-2.1-application">
<attr name="application_name" type="string" val="fake_daq_application"/>
<rel name="runs_on" class="VirtualHost" id="vlocalhost"/>
<rel name="exposes_service">
<ref class="Service" id="daqapp_control"/>
</rel>
<rel name="opmon_conf" class="OpMonConf" id="slow-all-monitoring"/>
<rel name="modules">
</rel>
</obj>
<obj class="DaqApplication" id="nested-segment-application">
<attr name="application_name" type="string" val="fake_daq_application"/>
<rel name="runs_on" class="VirtualHost" id="vlocalhost"/>
Expand Down Expand Up @@ -166,6 +178,7 @@
<obj class="Segment" id="bottom-segment-2">
<rel name="applications">
<ref class="DaqApplication" id="bottom-segment-2-application"/>
<ref class="DaqApplication" id="bottom-segment-2.1-application"/>
</rel>
<rel name="controller" class="RCApplication" id="bottom-segment-2-controller"/>
</obj>
Expand Down Expand Up @@ -220,4 +233,28 @@
</obj>


<obj class="Session" id="test-config-long-timeouts">
<attr name="data_request_timeout_ms" type="u32" val="1000"/>
<attr name="data_rate_slowdown_factor" type="u32" val="1"/>
<attr name="controller_log_level" type="enum" val="INFO"/>
<rel name="connectivity_service" class="ConnectivityService" id="local-connectivity-service-config"/>
<rel name="environment">
<ref class="Variable" id="local-env-ers-verb"/>
<ref class="Variable" id="local-env-ers-info"/>
<ref class="Variable" id="local-env-ers-warning"/>
<ref class="Variable" id="local-env-ers-error"/>
<ref class="Variable" id="local-env-ers-fatal"/>
<ref class="Variable" id="local-env-connectivity-port"/>
<ref class="Variable" id="drunc-simulate-failure-long-command-execution"/>
<ref class="Variable" id="drunc-simulate-failure-long-command-execution-app-name"/>
</rel>
<rel name="segment" class="Segment" id="top-segment"/>
<rel name="infrastructure_applications">
<ref class="ConnectionService" id="local-connection-server"/>
</rel>
<rel name="detector_configuration" class="DetectorConfig" id="dummy-detector"/>
<rel name="opmon_uri" class="OpMonURI" id="local-opmon-uri"/>
</obj>


</oks-data>
23 changes: 22 additions & 1 deletion src/drunc/apps/fake_daqapp_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import argparse
import copy as cp
import os
import random
import threading
import time
Expand All @@ -22,7 +23,7 @@

__version__ = "1.0.0"
get_root_logger("info")
log = get_logger("fake_daqapp_rest", rich_handler=True)
log = get_logger("fake_daqapp_rest", stream_handler=True)


class AppState:
Expand Down Expand Up @@ -58,6 +59,25 @@ def send_response_to_response_listener(
def execute_command(
self, req_data, answer_port, answer_host, remote_host
) -> Response:

self.log.critical(f"{req_data=}")
self.log.critical(f"{answer_port=}")
self.log.critical(f"{answer_host=}")
self.log.critical(f"{remote_host=}")

# For testing purposes, we can delay the execution of the command to simulate a
# long running command and test timeouts in the run control
drunc_testing_app_timeout_delay = os.getenv("DRUNC_FAILURE_TESTING_CMD_DELAY")
drunc_testing_app_timeout_delay_app_name = os.getenv("DRUNC_FAILURE_TESTING_CMD_DELAY_APP_NAME")
if drunc_testing_app_timeout_delay and not drunc_testing_app_timeout_delay_app_name:
self.log.error("DRUNC_FAILURE_TESTING_CMD_DELAY is set but DRUNC_FAILURE_TESTING_CMD_DELAY_APP_NAME is not set, not delaying execution")
if not drunc_testing_app_timeout_delay and drunc_testing_app_timeout_delay_app_name:
self.log.error("DRUNC_FAILURE_TESTING_CMD_DELAY_APP_NAME is set but DRUNC_FAILURE_TESTING_CMD_DELAY is not set, not delaying execution")
if drunc_testing_app_timeout_delay and drunc_testing_app_timeout_delay_app_name == self.appname:
delay = int(os.getenv("DRUNC_FAILURE_TESTING_CMD_DELAY"))
self.log.info(f"Delaying execution by {delay} seconds for testing purposes")
time.sleep(delay)

reply_address = (
f"http://{answer_host}:{answer_port}/response"
if answer_host
Expand Down Expand Up @@ -303,6 +323,7 @@ def main():
response = requests.get(flask_url + "/")
log.info(f"Response: {response.status_code}")
if response.status_code == 200:
log.critical("Fake DAQ app started successfully and is responding to requests")
break
if i == 9:
log.error("Failed to start fake DAQ app")
Expand Down
7 changes: 5 additions & 2 deletions src/drunc/controller/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -702,16 +702,19 @@ def execute_fsm_command(
flag=ResponseFlag.EXECUTED_SUCCESSFULLY,
)

# Parse and validate target.
try:
# Parse and validate target.
request.target = self.parse_target_string(request.target)
except ValueError:
response.flag = ResponseFlag.NOT_EXECUTED_BAD_REQUEST_FORMAT
return response

# Extract command information.
command = request.command
command_name = command.command_name
self.log.debug(f"FSM command: {command_name}")

# Extract FSM transition.
transition = self.stateful_node.get_fsm_transition(command_name)
self.log.debug(f"FSM transition: {transition}")

Expand Down Expand Up @@ -748,7 +751,7 @@ def execute_fsm_command(
response.fsm_flag = FSMResponseFlag.FSM_INVALID_TRANSITION
return response

# This node.
# Execute FSM transition on this node.
if request.target == self.name or request.execute_along_path:
fsm_args = self.stateful_node.decode_fsm_arguments(command)
fsm_data = self.stateful_node.prepare_transition(
Expand Down
2 changes: 2 additions & 0 deletions src/drunc/controller/interface/shell_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,8 @@ class DummyCommand:
) # TODO: Manually raise exception, see if the str declaration is needed with rich handling
return
except ServerTimeout as e:
import traceback
traceback.print_exc()
log.error(e)
log.error(
"The command timed out, unfortunately this means the server is in undefined state, and [red]your best option at this stage is to [bold]terminate[/bold] and [bold]boot[/bold][/]."
Expand Down
Loading