Commit 6f511e73 authored by Zhang Xiaoli's avatar Zhang Xiaoli
Browse files

Change the text format to LF

parent e06f46dc
FROM hub.cstcloud.cn/scalebox/agent:dev
FROM hub.cstcloud.cn/scalebox/agent:0.9
LABEL maintainer="Xiaoli Zhang<zhangxiaoli@cnic.cn>"
......
IMAGE_NAME:=hub.cstcloud.cn/csst/adml1:dev
IMAGE_NAME:=csst/adml1
build:
docker build --network=host -t $(IMAGE_NAME) .
......@@ -12,3 +12,5 @@ run:
docker run -it --entrypoint bash $(IMAGE_NAME)
down:
docker stop $(IMAGE_NAME)
scp:
scp -r ./ csst-zjs:/root/csst/admL1/
\ No newline at end of file
......@@ -40,7 +40,7 @@ class admL1Api():
test = control_pb2.JobKey()
test.cross_app_job_id = int(jobId)
test.key_text = obsid
reflag = stub.SendJobMessage(test)
reflag = stub.SendMessage(test)
print(reflag.value)
return reflag.value
if __name__ == '__main__':
......
......@@ -11,98 +11,95 @@ import "google/protobuf/timestamp.proto";
import "google/protobuf/empty.proto";
import "google/protobuf/wrappers.proto";
service ControlService {
// //////////////////////////////////////////////////////////////////
// agent client
// actuator client
// //////////////////////////////////////////////////////////////////
// parameter : inline_cluster_name
rpc GetInlineSlotList(google.protobuf.StringValue) returns (InlineSlotList);
// input : slot id('ON')
// return : job-key of the task, id==0(NULL)
// task id : 'READY/-1' -> 'QUEUED/-2'
rpc GetNextTask(google.protobuf.Int32Value) returns (TaskItem);
// input : task id('QUEUED/-2')
// return : ret_code : 0(OK), -1(task NOT FOUND), -2(WRONG STATUS)
// task_status_code:'QUEUED'/-2 -> 'RUNNING'/-3
rpc SetTaskStarted(google.protobuf.Int64Value) returns (google.protobuf.Int32Value);
// input : TaskExecMessage
// return : ret_code : 0(OK), -1(task NOT FOUND), -2(task WRONG STATUS)
// task_status_code:'RUNNING'/-3 -> 'OK'/0, ...
rpc SetTaskFinished(TaskExecMessage) returns (google.protobuf.Int32Value);
// parameter : external_cluster_name
rpc GetExternalSlotList(google.protobuf.StringValue) returns (ExternalSlotList);
// slot exit automatically, called in agent side
// input : slot id
// return : ret_code : 0(OK), -1(slot NOT FOUND), -2(slot WRONG STATUS)
// slot : 'ON' -> 'OFF'
rpc SetSlotTerminated(google.protobuf.Int32Value) returns (google.protobuf.Int32Value);
// 'slot_run' only for the inline cluster,called by actuator
// 'agent_setup'/'agent_teardown', called by agent
// rpc SetSlotRunning(SlotExecMessage) returns (google.protobuf.Empty);
rpc SaveClientExecInfo(ClientExecMessage) returns (google.protobuf.Empty);
// //////////////////////////////////////////////////////////////////
// actuator client
// agent client
// //////////////////////////////////////////////////////////////////
// input: token_string ?
// job:'RUNNING' && slot:'READY'
rpc GetRunnableSlotList(google.protobuf.Empty) returns (CommandList);
// input : slot id
// return : ret_code : 0(OK), -1(slot NOT FOUND), -2(slot WRONG STATUS)
// parameter : slot_id
// slot: 'READY' -> 'ON'
rpc SetSlotInitialized(google.protobuf.Int32Value) returns (google.protobuf.Int32Value);
rpc SetSlotInitialized(google.protobuf.Int32Value) returns (google.protobuf.Empty);
// job:'PAUSED'/'ARCHIVED' && slot:'ON'
rpc GetTerminableSlotList(google.protobuf.Empty) returns (CommandList);
// slot exit automatically, slot : 'ON' -> 'OFF'/'READY'
// parameter : slot_id
rpc SetSlotTerminated(google.protobuf.Int32Value) returns (google.protobuf.Empty);;
// parameter : job_id/host_ip, sep=','
// return : slot id
rpc RegisterSlot(google.protobuf.StringValue) returns (google.protobuf.Int32Value);
// parameter : slot_id
rpc DeregisterSlot(google.protobuf.Int32Value) returns (google.protobuf.Empty);
// job:'RUNNING' && worker:'PAUSED'
rpc GetRunnableWorkerList(google.protobuf.Empty) returns (CommandList);
// input : worker id
// return : ret_code : 0(OK), -1(worker NOT FOUND), -2(worker WRONG STATUS)
// worker: 'PAUSED' -> 'RUNNING'
rpc SetWorkerInitialized(google.protobuf.Int32Value) returns (google.protobuf.Int32Value);
// task id : 'READY/-1' -> 'QUEUED/-2'
// parameter : slot_id('ON')
// return : job-key of the task, id==0(NULL)
rpc GetNextTask(google.protobuf.Int32Value) returns (TaskItem);
// job:'PAUSED' && worker:'RUNNING'
rpc GetTerminableWorkerList(google.protobuf.Empty) returns (CommandList);
// task_status_code:'QUEUED'/-2 -> 'RUNNING'/-3
// parameter : task_id('QUEUED/-2')
rpc SetTaskStarted(google.protobuf.Int64Value) returns (google.protobuf.Empty);
// input : worker id, return ret_code;
// return : ret_code : 0(OK), -1(worker NOT FOUND), -2(worker WRONG STATUS)
// worker : 'RUNNING' -> 'PAUSED'
rpc SetWorkerTerminated(google.protobuf.Int32Value) returns (google.protobuf.Int32Value);
// task_status_code:'RUNNING'/-3 -> 'OK'/0, ...
// parameter : TaskExecMessage
rpc SetTaskFinished(TaskExecMessage) returns (google.protobuf.Empty);
// //////////////////////////////////////////////////////////////////
// app client , called by user app.
// //////////////////////////////////////////////////////////////////
// input : slot id
// return : ret_code < 0 exit
rpc CheckSlotHeartbeat(google.protobuf.Int32Value) returns (google.protobuf.Int32Value);
// send task-key to next job in current pipeline
// return : ret_code : 0(OK), -1(job NOT FOUND)
// send job-key to next job in current pipeline
// return : task_id(OK), <0 (error)
// task_status_code : 'INITIAL'/-9
rpc SendJobMessage(JobKey) returns (google.protobuf.Int32Value);
// rpc SendToNextJob(JobKey) returns (google.protobuf.Int32Value);
rpc SendMessage(JobKey) returns (google.protobuf.Int64Value);
rpc SendJobMessages(JobKeys) returns (google.protobuf.Int32Value);
// send 'START' message to head job, and set its status to running
// rpc SetAppRunning(google.protobuf.Int32Value) returns (google.protobuf.Int32Value);
}
message JobKey {
message JobIdRef{
// qualified name of job
string sink_job_name=1;
// for sink-job in the same app
int32 current_job_id=2;
}
message JobIdAppRef{
// qualified name of job
string sink_job_name=1;
// for sink-job in remote server (app id)
int32 app_id=2;
// <ip-addr:port> for controld/grpc-server
string remote_server=3;
}
oneof jobId {
int32 cross_app_job_id=1;
JobIdRef builtin_job_id=2;
// for the same app
JobIdRef builtin_job_id=1;
// for different app in the same cluster
int32 cross_app_job_id=2;
// for cross-cluster app
JobIdAppRef cross_server_job_id=3;
}
// 1. multi-messages : comma-sep
// 2. customized sep : json-format
// label: "multi-messages", text: "abc:134:345ß"
string key_text=10;
}
message JobKeys {
// qualified name of job
string next_job_name=1;
repeated string key_texts=2;
int32 current_job_id=3;
// OR worker_id ?
bool async_task_creation=20;
map<string, string> headers = 21;
}
message TaskItem {
......@@ -110,17 +107,30 @@ message TaskItem {
string key=2;
}
message Command {
// primary key of slot/worker table
message InlineSlotInfo {
// primary key of slot table
int32 id=1;
string host=2;
string command_text=3;
int32 port=3;
string uname=4;
string command_text=5;
}
message CommandList {
repeated Command command=1;
message InlineSlotList {
repeated InlineSlotInfo slots=1;
}
message ExternalSlotList {
repeated ExternalSlotInfo slots=1;
}
message ExternalSlotInfo {
int32 job_id=1;
string command_text=2;
int32 num_slots=3;
map<string,string> resource_req=4;
}
message TaskExecMessage {
int32 slot=1;
int32 status_code=2;
......@@ -132,7 +142,23 @@ message TaskExecMessage {
google.protobuf.Timestamp t2=8;
google.protobuf.Timestamp t3=9;
google.protobuf.Timestamp t4=10;
repeated google.protobuf.Timestamp time_arr=11;
string sys_out=12;
string app_out=13;
repeated google.protobuf.Timestamp tc_arr=11;
string stdout=12;
string stderr=13;
string userText=14;
}
message ClientExecMessage {
// message SlotExecMessage {
int32 slot=1;
// return code
int32 code=2;
string stdout=3;
string stderr=4;
// client start time
google.protobuf.Timestamp tc0=5;
// client end time
google.protobuf.Timestamp tc1=6;
// 'slot_run', 'agent_setup', 'agent_teardown'
string action=7;
}
......@@ -19,42 +19,57 @@ class admL1Api():
suser = os.getenv('CSST_SCALEBOX_USER')
spwd = os.getenv('CSST_SCALEBOX_PWD')
sdb = os.getenv('CSST_SCALEBOX_DATABASE')
admL1Api.sum_numbers(body,obsid,shost,sport,suser,spwd,sdb)
#取环境变量中模块id,需预先设定
#current_job_id=os.getenv('CSST_ADML1_APPID')
# conn = psycopg2.connect(host="10.255.2.12",port=5433,user="scalebox",password="changeme",database="scalebox")
conn = psycopg2.connect(host=shost,port=sport,user=suser,password=spwd,database=sdb)
cursor = conn.cursor()
#sql ="SELECT id,name FROM t_app ;"
#根据admL1的名字查找它的jobid
admsql = "SELECT id FROM t_job where name = 'admL1'"
cursor.execute(admsql)
admrows = cursor.fetchone()
current_job_id = admrows[0]
sink_job_name=""
if obsid==1:
admL1Api.sum_numbers(body,"mbi",current_job_id)
admL1Api.sum_numbers(body,"sls2d",current_job_id)
elif obsid==2:
sink_job_name="mci"
elif obsid==3:
sink_job_name="ifs"
elif obsid==4:
sink_job_name="cpic"
elif obsid==5:
sink_job_name="hstdm"
else:
sink_job_name=""
if sink_job_name:
admL1Api.sum_numbers(body,sink_job_name,current_job_id)
else:
print('等待模块传输')
print("执行完毕")
@classmethod
def sum_numbers(self,body,obsid,shost,sport,suser,spwd,sdb):
def sum_numbers(self,body,sink_job_name,current_job_id):
#取环境变量中模块id,需预先设定
#current_job_id=os.getenv('CSST_ADML1_APPID')
#调用grpc的SendJobMessage
channel = grpc.insecure_channel(os.getenv('CSST_PIPELINE_GRPC_SERVER'))
stub = control_pb2_grpc.ControlServiceStub(channel)
test = control_pb2.JobKey()
# conn = psycopg2.connect(host="10.255.2.12",port=5433,user="scalebox",password="changeme",database="scalebox")
conn = psycopg2.connect(host=shost,port=sport,user=suser,password=spwd,database=sdb)
cursor = conn.cursor()
current_job_id=os.getenv('JOB_ID')
sql ="SELECT job_name FROM t_obs where obs_x ='{}' ;".format(obsid)
cursor.execute(sql)
rows = cursor.fetchall()
for rowname in rows:
#当前模块的id
test.builtin_job_id.current_job_id = int(current_job_id)
#下级模块的名字
test.builtin_job_id.sink_job_name = rowname[0]
test.builtin_job_id.sink_job_name =sink_job_name
test.key_text = body
reflag = stub.SendJobMessage(test)
print("rowname : %s" %(rowname[0]))
reflag = stub.SendMessage(test)
print("rowname : %s" %(sink_job_name))
print("reflag : %d " %(reflag.value))
conn.commit()
cursor.close()
conn.close()
return "执行成功"
return reflag.value
if __name__ == '__main__':
parameter = sys.argv
body=parameter[1]
......
......@@ -19,57 +19,42 @@ class admL1Api():
suser = os.getenv('CSST_SCALEBOX_USER')
spwd = os.getenv('CSST_SCALEBOX_PWD')
sdb = os.getenv('CSST_SCALEBOX_DATABASE')
#取环境变量中模块id,需预先设定
#current_job_id=os.getenv('CSST_ADML1_APPID')
# conn = psycopg2.connect(host="10.255.2.12",port=5433,user="scalebox",password="changeme",database="scalebox")
conn = psycopg2.connect(host=shost,port=sport,user=suser,password=spwd,database=sdb)
cursor = conn.cursor()
#sql ="SELECT id,name FROM t_app ;"
#根据admL1的名字查找它的jobid
admsql = "SELECT id FROM t_job where name = 'admL1'"
cursor.execute(admsql)
admrows = cursor.fetchone()
current_job_id = admrows[0]
sink_job_name=""
if obsid==1:
admL1Api.sum_numbers(body,"mbi",current_job_id)
admL1Api.sum_numbers(body,"sls2d",current_job_id)
elif obsid==2:
sink_job_name="mci"
elif obsid==3:
sink_job_name="ifs"
elif obsid==4:
sink_job_name="cpic"
elif obsid==5:
sink_job_name="hstdm"
else:
sink_job_name=""
if sink_job_name:
admL1Api.sum_numbers(body,sink_job_name,current_job_id)
else:
print('等待模块传输')
admL1Api.sum_numbers(body,obsid,shost,sport,suser,spwd,sdb)
print("执行完毕")
@classmethod
def sum_numbers(self,body,sink_job_name,current_job_id):
def sum_numbers(self,body,obsid,shost,sport,suser,spwd,sdb):
#取环境变量中模块id,需预先设定
#current_job_id=os.getenv('CSST_ADML1_APPID')
#调用grpc的SendJobMessage
channel = grpc.insecure_channel(os.getenv('CSST_PIPELINE_GRPC_SERVER'))
stub = control_pb2_grpc.ControlServiceStub(channel)
test = control_pb2.JobKey()
# conn = psycopg2.connect(host="10.255.2.12",port=5433,user="scalebox",password="changeme",database="scalebox")
conn = psycopg2.connect(host=shost,port=sport,user=suser,password=spwd,database=sdb)
cursor = conn.cursor()
current_job_id=os.getenv('JOB_ID')
sql ="SELECT job_name FROM t_obs where obs_x ='{}' ;".format(obsid)
cursor.execute(sql)
rows = cursor.fetchall()
for rowname in rows:
#当前模块的id
test.builtin_job_id.current_job_id = int(current_job_id)
#下级模块的名字
test.builtin_job_id.sink_job_name =sink_job_name
test.builtin_job_id.sink_job_name = rowname[0]
test.key_text = body
reflag = stub.SendJobMessage(test)
print("rowname : %s" %(sink_job_name))
print("rowname : %s" %(rowname[0]))
print("reflag : %d " %(reflag.value))
return reflag.value
conn.commit()
cursor.close()
conn.close()
return "执行成功"
if __name__ == '__main__':
parameter = sys.argv
body=parameter[1]
......
AUX_DIR=/sharewcl/L1Pipeline/aux
AUX_DIR=/sharewcl/pipeline/aux
CRDS_DIR=/sharewcl/OnOrbitCal/SimData/ref_202211/products_ref20_3hdr
DFS_ROOT=/sharewcl/dfs
FROM hub.cstcloud.cn/scalebox/agent
LABEL maintainer="Xiaoli Zhang<zhangxiaoli@cnic.cn>"
# 安装python
RUN apt-get update \
&& apt-get install -y python3 python3-pip \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
COPY *.sh /app/bin/
COPY *.py /app/bin/
RUN cd /app/bin/ \
&& chmod +x run.sh
IMAGE_NAME:=hub.cstcloud.cn/csst/module1:dev
build:
docker build --network=host -t $(IMAGE_NAME) .
dist:
docker save $(IMAGE_NAME) | zstdmt | pv | ssh c0 'zstd -d | docker load'
push:
docker push $(IMAGE_NAME)
run:
docker run -it --entrypoint bash $(IMAGE_NAME)
down:
docker stop $(IMAGE_NAME)
import os
import sys
if __name__ == '__main__':
parameter = sys.argv
body=parameter[1]
print(body)
\ No newline at end of file
#!/bin/bash
python3 /app/bin/module1.py $1
send-message $1
FROM hub.cstcloud.cn/scalebox/agent
LABEL maintainer="Xiaoli Zhang<zhangxiaoli@cnic.cn>"
# 安装python
RUN apt-get update \
&& apt-get install -y python3 python3-pip \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
COPY *.sh /app/bin/
COPY *.py /app/bin/
RUN cd /app/bin/ \
&& chmod +x run.sh
IMAGE_NAME:=hub.cstcloud.cn/csst/module2:dev
build:
docker build --network=host -t $(IMAGE_NAME) .
dist:
docker save $(IMAGE_NAME) | zstdmt | pv | ssh c0 'zstd -d | docker load'
push:
docker push $(IMAGE_NAME)
run:
docker run -it --entrypoint bash $(IMAGE_NAME)
down:
docker stop $(IMAGE_NAME)
import os
import sys
if __name__ == '__main__':
parameter = sys.argv
body=parameter[1]
print(body)
\ No newline at end of file
#!/bin/bash
python3 /app/bin/module2.py $1
send-message $1
FROM hub.cstcloud.cn/scalebox/agent
FROM hub.cstcloud.cn/scalebox/agent:0.9
# 安装redis-cli
RUN apt-get update \
&& apt-get install -y redis
ENV REDIS_SERVER=10.0.0.9
#ENV REDIS_SERVER=192.169.23.2
#ENV REDIS_SERVER=10.0.0.9
ENV REDIS_SERVER=192.169.23.2
COPY run.sh /app/bin/
RUN chmod +x /app/bin/run.sh
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment