ZTd!ZUdZVd"ZWd#ZXd$ZYd%ZZd&Z[d'Z\d(Z]d)Z^d*Z_d+Z`d,e^Zad,e`Zbd,e_Zcd-Zdd.Zed/Zfd0ZgdZhdZidZjd5ZkdZld6Zmd7Znd8ZodZpdaqejrqdase$etejud9d:d;Zve$eetee"d<d=d>ZwGd?d@d@ZxGdAdBdBZyGdCdDdDeZzGdEdFdFZ{GdGdHdHZ|GdIdJdJZ}GdKdLdLe}Z~GdMdNdNe}Ze~efZd2d0dOdPdQiidRdSdTdUdVdOdWeXdXdYdZdUd[d\gd]id^ZGd_d`d`ejZd2dadbeiid0dOdPdQiidRdSdagejdcdddSejdedUdVdOdWeXdXdfdZdUdgdhd\gd]idiZGdjdkdkZGdldmdmeZGdndodoeZGdpdqdqeZGdrdsdseZGdtdudueZGdvdwdweZGdxdydyZGdzd{d{eZe3defdd|e!eteBfe eeefe ee ete,e%ddfd}d~dZddZGdddeZGdddeZGdddeZGdddeZGdddeZGdddeZGdddeZGdddeZdete ee eddddZdeete"ee"dddZeete"fete"dddZddZGdddeZddZe|exedddZddZGdddeZGdddeZGdddeZGdddeZejdkrnddlZddl8Z8ddlZddlmZGdddejZejeyddlmZWnek rddZYnXdejelfe|eete etee ee"eetetefdddZdejelfe|eete etee ee"eetetefdddZddZe$getfe"dddZddZdd„ZddĄZddƄZddȄZddʄZdd̄Zdd΄ZddЄZeteeeteetfdќddӄZddՄZddׄZddلZddۄZdd݄Zdd߄ZeCeCdddZeCeCdddZeCeCdddZe|etdddZeCeCdddZeCeCdddZeCeCdddZe|ddddZeCeCdddZeCeCdddZe|etee eydddZe|ete etdddZddZddZאddZؐddZِddZڐdddZېddd Zܐd d Zݐdd d ZސdddZߐdddZeteeddddZdete ete etdddZddZe|e e}dddZe|e}dddZdddZe|etetetd d!d"Zd#d$Zdd%d&Ze|d'ed(d)d*Ze|d'e etd(d+d,Zdd-d.Zdd/d0Ze|edd1d2Zd3d4Zdd5d6Ze|eteteteedd7d8d9Zeteetetfd:d;d<Zeteete"fd:d=d>Ze|eete"fdd?d@Ze|eetetfddAdBZe|eeete"fddCdDZe|eexddEdFZdGdHZdIdJZddKdLZd0dZidgdfe|eeteetetfe eeeteete eetd'dMdNdOZe|eetddPdQdRZde|etete!eetfeee eetd'dSdTdUZddVdWZdddezjdfe|etete!eetfe d'eee ete ete eteze eexddX dYdZZdd[d\Ze|etetdd]d^d_Zde|eteeete!eetfd'eee ete eexdd` dadbZGdcddddeZdedfZe|etetddgdhdiZe|ete!etdfdjdkdlZe|eeteetdmdndoZe|etddpdqdrZ dsdtZ dudvZ Gdwd'd'Z Gdxdydye9Z Gdzd{d{ZGd|d}d}e9Ze|ddd~dZeҐddZeАddZdddZeeΐddZetetdddZddZeteeeeetfdddZddZddZddZetetedddZeteexdddZeteexdddZeexetdddZe|e etdddZe|eexe etdddZ e|eetee etfdddZ!e|eetefdddZ"e|eeeteetetete"e"fdddZ#e|eeetetete"dddZ$e|eeeteteteteetetfdddZ%e|eeetetddddZ&e|etetetetddddZ'e|eeetetetete$dd ddZ(e|e$e$ddddZ)e$e$dddd„Z*e|eee$e$ddÜdĐdńZ+e|etetetetdƜdǐdȄZ,e|etetetete etee$e eteddɜ dʐd˄Z-e*eteeetetfd̜d͐d΄Z.eeteetetfdϜdАdфZ/e|eetetfetedҜdӐdԄZ0e|eeetdd՜d֐dׄZ1eCeCddؐdلZ2e2eАdڐdۄZ3e|eddܐd݄Z4e|e ete ete etddޜdߐdZ5ddZ6de|etete!eetfeee eetd'dSddZ7e|etetezdddZ8eeӐddZ9e|eete"fdddZ:eete"fe|ddddZ;e|ddddZeΐddZ?eeeeʐddZ@eːddZAeeeʐddZBeːddZCeːddZDddZEe|eeteete)etffdddZFeteeteete)etffdddZGe|eeteete)etffdd d ZHeteteeteete)etffd d d ZIddZJddZKetedddZLdddZMeeteeeeteffdddZNeeteeeetetffdddZOdddZPe|eteteteteetetefddd ZQeАd!d"ZRGd#d$d$eZSd%d&ZTd'd(ZUd)d*ZVd+d,ZWd-d.ZXd/d0ZYd1d2ZZe|etdd3d4d5Z[ee|ddd6d7Z\e|ddd8d9Z]e|edd:d;Z^e|ddd<d=Z_e|eedd>d?d@Z`ddAdBZae|dddCdDZbeteeeetfdEdFdGZcetetedHdIdJZdetetddKdLdMZee|dddNdOZfe|dddPdQZgGdRdSdSejhZidTdUZjGdVdWdWeZkGdXdYdYekZlGdZd[d[ekZmGd\d]d]ekZnde|e ete ete ete etekd^d_d`Zoe|dddadbZpe|dddcddZqe|dddedfZre|etddgdhZsdidjZtdkdlZuddndoZvddpdqZwGdrdsdsZxGdtduduZye|dddvdwZzde|etetedydzd{Z{e|edd|d}Z|ee|etdd~dZ}GdddZ~e!eje~fddddZddZeetejdddZeete|dddZe|eetddddZddddZddddZedkredS(N) dictConfig)Enum)DictListTupleOptionalUnionAnyNoReturnCallableIOSequenceTypeVarcastSetIterableTextIO Generator) ConfigParser)redirect_stdoutcontextmanager)wraps)glob)StringIO)ThreadEvent) HTTPErrorURLError)urlopenRequest)PathFuncT)Zboundzquay.io/ceph/ceph:v18FZreefz%quay.io/prometheus/prometheus:v2.43.0zdocker.io/grafana/loki:2.4.0z docker.io/grafana/promtail:2.4.0z'quay.io/prometheus/node-exporter:v1.5.0z'quay.io/prometheus/alertmanager:v0.25.0zquay.io/ceph/ceph-grafana:9.4.7zquay.io/ceph/haproxy:2.3zquay.io/ceph/keepalived:2.2.4zquay.io/ceph/nvmeof:0.0.1z$docker.io/maxwo/snmp-notifier:v1.2.1z'quay.io/omrizeneva/elasticsearch:6.8.23z+quay.io/jaegertracing/jaeger-collector:1.29z'quay.io/jaegertracing/jaeger-agent:1.29z'quay.io/jaegertracing/jaeger-query:1.29z docker.io /var/lib/cephz /var/log/cephz /run/cephadmz/etc/logrotate.dz /etc/sysctl.dz/etc/systemd/systemconfigz ceph.confzceph.pubzceph.client.admin.keyringz /etc/ceph/iiiTz[ceph: \u@\h \W]\$ z%Y-%m-%dT%H:%M:%S.%fZ )funccmdreturncs$tjd|jd|||}|S)Nzrunning function z, with parms: )loggerdebug__name__)r*r+responser1*/tmp/tmpyrtrv2e9.cephadm.build/__main__.pyrun_func{sr3)r*cmd_listr,cs6g}x|D]}|jt||q Wtj|IdH}|S)N)appendr3asyncioZgather)r*r4Ztasksr+datar1r1r2concurrent_taskss  r8c@s>eZdZdZeeddddZedddZedd d ZdS) EndPointz'EndPoint representing an ip:port formatN)ipportr,cCs||_||_dS)N)r:r;)selfr:r;r1r1r2__init__szEndPoint.__init__)r,cCs|jd|jS)N:)r:r;)r<r1r1r2__str__szEndPoint.__str__cCs|jd|jS)Nr>)r:r;)r<r1r1r2__repr__szEndPoint.__repr__) r/ __module__ __qualname____doc__strintr=r?r@r1r1r1r2r9sr9c@s4eZdZeeeeeddddZeedddZdS) ContainerInfoN) container_id image_nameimage_idstartversionr,cCs"||_||_||_||_||_dS)N)rGrHrIrJrK)r<rGrHrIrJrKr1r1r2r=s zContainerInfo.__init__)otherr,cCsJt|tstS|j|jkoH|j|jkoH|j|jkoH|j|jkoH|j|jkS)N) isinstancerFNotImplementedrGrHrIrJrK)r<rLr1r1r2__eq__s     zContainerInfo.__eq__)r/rArBrDr=r boolrOr1r1r1r2rFs rFc@seZdZdZdZdZdS)DeploymentTypeZDeployZRedeployZReconfigN)r/rArBDEFAULTREDEPLOYRECONFIGr1r1r1r2rQsrQc@s,eZdZddddZejddddZdS) BaseConfigN)r,cCsdd|_d|_t|_t|_t|_t|_ t |_ d|_ t |_t|_g|_d|_d|_d|_t|_d|_dS)NF)imagedockerDATA_DIRdata_dirLOG_DIRlog_dir LOGROTATE_DIR logrotate_dir SYSCTL_DIR sysctl_dirUNIT_DIRunit_dirverboseDEFAULT_TIMEOUTtimeout DEFAULT_RETRYretryenvmemory_request memory_limitlog_to_journaldCONTAINER_INITcontainer_initcontainer_engine)r<r1r1r2r=s zBaseConfig.__init__)argsr,cCs8t|}x*|jD]\}}t||rt|||qWdS)N)varsitemshasattrsetattr)r<roZargdictkvr1r1r2 set_from_argss zBaseConfig.set_from_args)r/rArBr=argparse Namespacervr1r1r1r2rUsrUcsxeZdZddddZejddddZeddd Ze ed d d Z e e d fd d Z e e ddfdd Z ZS)CephadmContextN)r,cCsd|jd<t|jd<dS)N_args_conf)__dict__rU)r<r1r1r2r=s zCephadmContext.__init__)ror,cCs|jj|||_dS)N)r{rvrz)r<ror1r1r2set_argss zCephadmContext.set_argscCs d|jkS)Nr*)rz)r<r1r1r2 has_functionszCephadmContext.has_function)namer,cCs t||S)N)rr)r<rr1r1r2 __contains__szCephadmContext.__contains__csTd|jkr"t|j|r"t|j|Sd|jkrDt|j|rDt|j|Stj|SdS)Nr{rz)r|rrr{getattrrzsuper__getattribute__)r<r) __class__r1r2 __getattr__s   zCephadmContext.__getattr__)rvaluer,csJt|j|rt|j||n*t|j|r8t|j||ntj||dS)N)rrr{rsrzr __setattr__)r<rr)rr1r2rs   zCephadmContext.__setattr__)r/rArBr=rwrxr}rPr~rDrr rr __classcell__r1r1)rr2rys ryc@s:eZdZddddZeedddZedddZdS) ContainerEngineN)r,cCst|j|_dS)N) find_programEXEpath)r<r1r1r2r=szContainerEngine.__init__cCs tdS)N)NotImplementedError)r<r1r1r2rszContainerEngine.EXEcCs|jd|jdS)Nz ())rr)r<r1r1r2r?szContainerEngine.__str__)r/rArBr=propertyrDrr?r1r1r1r2rsrcs^eZdZdZddfdd ZeeedfdddZe dd d d Z e dd d Z Z S)PodmanpodmanN)r,cstjd|_dS)N)rr=_version)r<)rr1r2r=s zPodman.__init__.cCs|jdkrtd|jS)NzPlease call `get_version` first)r RuntimeError)r<r1r1r2rK s zPodman.version)ctxr,cCs.t||jdddgtjd\}}}t||_dS)NrKz--formatz{{.Client.Version}}) verbosity) call_throwsr CallVerbosityQUIET_parse_podman_versionr)r<rout_r1r1r2 get_versions zPodman.get_versioncCs*djtt|j}|jd|jd|S)N.z (z ) version )joinmaprDrKrr)r<rKr1r1r2r?szPodman.__str__)r/rArBrr=rrrErKryrrDr?rr1r1)rr2rs rc@seZdZdZdS)DockerrXN)r/rArBrr1r1r1r2rsrcephadmformatz0%(asctime)s %(thread)x %(levelname)s %(message)sINFOzlogging.StreamHandler)levelclassDEBUGz#logging.handlers.WatchedFileHandlerz%s/cephadm.log)rr formatterfilename)consolelog_filerVrr)rhandlers)rKdisable_existing_loggers formattersrloggersc@seZdZejedddZdS)ExcludeErrorsFilter)recordr,cCs |jtjkS)z=Only lets through log messages with log level below WARNING .)levelnologgingWARNING)r<rr1r1r2filterAszExcludeErrorsFilter.filterN)r/rArBr LogRecordrPrr1r1r1r2r@srZexclude_errorsz())rrfiltersstreamr)rrr)console_stdoutconsole_stderrrrr)rKrrrrrc@seZdZdZdZdZdS) termcolorzzzN)r/rArByellowredendr1r1r1r2rrsrc@s eZdZdS)ErrorN)r/rArBr1r1r1r2rxsrc@s eZdZdS)ClusterAlreadyExistsN)r/rArBr1r1r1r2r|src@s eZdZdS)TimeoutExpiredN)r/rArBr1r1r1r2rsrc@s eZdZdS)UnauthorizedRegistryErrorN)r/rArBr1r1r1r2rsrc @seZdZdZdZd S)Cephmonmgrosdmdsrgw rbd-mirrorcrash cephfs-mirror ceph-exporteriscsinfsnvmeofN) rrrrrrrrr)rrr)r/rArBdaemonsgatewaysr1r1r1r2rsrc@s"eZdZeeedddZdS)OSD)r,cCs dddgS)Nz# allow a large number of OSDszfs.aio-max-nr = 1048576zkernel.pid_max = 4194304r1r1r1r1r2get_sysctl_settingsszOSD.get_sysctl_settingsN)r/rArB staticmethodrrDrr1r1r1r2rsrc@seZdZdZdZddgZeZdZdZ de e e e e fee efee ddd d Zee e e e e fdd d d Zee e e ee d ddZee dddZee dddZee dddZee dddZddddZddddZdS) SNMPGatewayzIDefines an SNMP gateway between Prometheus and SNMP monitoring Frameworksz snmp-gatewayV2cV3i$zsnmp-gateway.confN)rfsid daemon_id config_jsonrWr,cCs||_||_||_|ptj|_|jdd|_|jdd|_|jdd|_ |jdd|_ |jdd |_ |jd d |_ |jd d|_ |jd d|_|jdd|_|jdd|_|jdd|_|jdd|_|jdS)Nuidrgid destinationrV snmp_versionrsnmp_communityZpublic log_levelinfosnmp_v3_auth_usernamesnmp_v3_auth_passwordsnmp_v3_auth_protocolsnmp_v3_priv_protocolsnmp_v3_priv_passwordsnmp_v3_engine_id)rrrr default_imagerWgetrrrrrrrrrrrrvalidate)r<rrrrrWr1r1r2r=s" zSNMPGateway.__init__)rrrr,cCs"t|}|st||||||jS)N) fetch_configsAssertionErrorrW)clsrrrZcfgsr1r1r2initszSNMPGateway.initc Cs tjj|j|d|d}y(t|d}tj|j}WdQRXWnttj fk r\dSX|j dg}|srdSy8t d|dd}|jj d j d }WdQRXWnttfk rdSXx@|D]8} | j} | jdo| jdr| j ddj ddSqWdS)z:Return the version of the notifier from it's http endpointz snmp-gateway.z unit.metarNportszhttp:// 

,zversion=r%)rr)rr)osrrrZopenjsonloadsreadOSErrorJSONDecodeErrorrrdecodesplitrrstrip startswithendswith) rrrrrhmetadatarrZhtmlhstrippedr1r1r2rs(  "   zSNMPGateway.get_version)r,cCst|j}|s|jS|djS)Nr)fetch_tcp_portsr DEFAULT_PORTr;)r< endpointsr1r1r2r;s zSNMPGateway.portcCsg}d|jd|jd|jd|jdg}|jdkrx|jdd|jd |jg|jrx|jd d |jg||S) Nz--web.listen-address=:z--snmp.destination=z--snmp.version=z --log.level=zL--snmp.trap-description-template=/etc/snmp_notifier/description-template.tplrz--snmp.authentication-enabledz--snmp.authentication-protocol=z--snmp.security-engine-id=z--snmp.private-enabledz--snmp.private-protocol=)r;rrrextendrrr)r<Zv3_argsZ base_argsr1r1r2get_daemon_argss       zSNMPGateway.get_daemon_argscCs&tjj|jj|jj|jd|jS)Nr)rrrrrZr daemon_typer)r<r1r1r2rZszSNMPGateway.data_dircCstjj|j|jS)N)rrrrZ env_filename)r<r1r1r2conf_file_path szSNMPGateway.conf_file_pathc Cs|t|jh}|jdkr,|jd|jdnB|jd|jd|jd|jd|jrn|jd|jdWdQRXdS)zQCreates the environment file holding 'secrets' passed to the snmp-notifier daemonrzSNMP_NOTIFIER_COMMUNITY=rzSNMP_NOTIFIER_AUTH_USERNAME=zSNMP_NOTIFIER_AUTH_PASSWORD=zSNMP_NOTIFIER_PRIV_PASSWORD=N) write_newr rwriterrrr)r<fr1r1r2create_daemon_confs  zSNMPGateway.create_daemon_confcCsHt|jstd|j|jtjkr6td|j|jsDtddS)zValidate the settings Raises: Error: if the fsid doesn't look like an fsid Error: if the snmp version is not supported Error: destination IP and port address missing znot a valid fsid: znot a valid snmp version: zPconfig is missing destination attribute(:) of the target SNMP listenerN)is_fsidrrrrSUPPORTED_VERSIONSr)r<r1r1r2rs   zSNMPGateway.validate)N)r/rArBrCr rDEFAULT_SNMP_GATEWAY_IMAGErrr ryrDrrErr rr= classmethodrrrrr;rr rZr rrr1r1r1r2rs*. rc @seZdZdZdgdgdgddgdgdgd Zed d d d gdgdedddgdgdedddgdgdedddgde d d gddddgde d ddj eddgd gd!gd"d#Z e d$d%Zd&S)' Monitoringz0Define the configs for the monitoring containersi#i#i i#i#i ix#) prometheusz node-exportergrafana alertmanagerlokipromtail2Z4GBz,--config.file=/etc/prometheus/prometheus.ymlz--storage.tsdb.path=/prometheuszprometheus.yml)rWcpusmemoryrozconfig-json-files1Z1GBz --config.file=/etc/loki/loki.ymlzloki.ymlz(--config.file=/etc/promtail/promtail.ymlz promtail.ymlz--no-collector.timex)rWrrroz grafana.iniz+provisioning/datasources/ceph-dashboard.ymlzcerts/cert_filezcerts/cert_keyZ2GBz--cluster.listen-address=:{}rr%zalertmanager.ymlpeers)rWrrrozconfig-json-fileszconfig-json-args)rrrz node-exporterrrcCs|dks t|jdd}d}d }d }d }|dkrrx:dD]2}t||jjd ||d gtjd \}}}|dkr6Pq6Wd}n$t||jjd ||d gtjd \}}}|dkr|jd|r|jdd}n|jd|r|jdd}|S)zp :param: daemon_type Either "prometheus", "alertmanager", "loki", "promtail" or "node-exporter" rr node-exporterrr-rr%rVprometheus-alertmanagerexecz --version)rrz %s, version  r$)rrr!rr)rr#) rreplacecallrnrrrrr)rrGr r+codeerrrrKr1r1r2rs0    zMonitoring.get_versionN)r/rArBrCport_mapDEFAULT_PROMETHEUS_IMAGEDEFAULT_LOKI_IMAGEDEFAULT_PROMTAIL_IMAGEDEFAULT_NODE_EXPORTER_IMAGEDEFAULT_GRAFANA_IMAGEDEFAULT_ALERT_MANAGER_IMAGEr componentsrrr1r1r1r2r-sT r)ownerpermsencoding)rr3r4r5r,ccstjj|}|d}i}|r&||d<ynt|df|V}|V|jtj|j|dk rptj|jf||dk rtj|j|WdQRXWn t k rtj |YnXtj ||dS)aWrite a new file in a robust manner, optionally specifying the owner, permissions, or encoding. This function takes care to never leave a file in a partially-written state due to a crash or power outage by writing to temporary file and then renaming that temp file over to the final destination once all data is written. Note that the temporary files can be leaked but only for a "crash" or power outage - regular exceptions will clean up the temporary file. z.newr5wN) rrabspathrflushfsyncfilenofchownfchmod Exceptionunlinkrename)rr3r4r5ZtempnameZ open_kwargsfhr1r1r2rs$   rc Cs`xZ|D]R}tjj||}t||}tjd|t|||fdd}|j|WdQRXqWdS)z*create config files for different serviceszWrite file: %szutf-8)r3r5N)rrr dict_get_joinr-rrr) config_dirZ config_filesrrfname config_fileZconfig_contentrr1r1r2populate_filess   rEc@seZdZdZdZdZdddgZdgZddiZe fd d Z e d d Z d dZ eddZeddZddZddZdddZddZddZdS) NFSGaneshazDefines a NFS-Ganesha containerrz/usr/bin/ganesha.nfsdz-Fz-LZSTDERRz ganesha.conficCsv||_||_||_||_t|ddd|_t|d|_t|d|_t|dg|_t|di|_ t|di|_ |j dS) NpoolT)require namespaceuserid extra_argsfilesr) rrrrWdict_getrGrIrJrKrLrr)r<rrrrrWr1r1r2r=s  zNFSGanesha.__init__cCs||||t||jS)N)rrW)rrrrr1r1r2rszNFSGanesha.initcCs|t}d|tjj|d<d|tjj|d<d|tjj|d<|jrx|jjdd}|jjd d }d ||f|tjj|d <|S) Nz/etc/ceph/ceph.conf:zr#z/etc/ceph/keyring:zkeyringz/etc/ganesha:zz etc/ganeshaclustercephuseradminz%/var/lib/ceph/radosgw/%s-%s/keyring:zz keyring.rgw)dictrrrrr)r<rZmountsrOZrgw_userr1r1r2get_container_mountsszNFSGanesha.get_container_mountscCsdtg}|S)Nz CEPH_CONF=%s)CEPH_DEFAULT_CONF)envsr1r1r2get_container_envss zNFSGanesha.get_container_envscCsPd}t||jjd|tjdgtjd\}}}|dkrLtjd|}|rL|j d}|S)Nr$z-v)rrz&NFS-Ganesha Release\s*=\s*[V]*([\d.]+)r%) r(rnrrF entrypointrrresearchgroup)rrGrKrr*r)matchr1r1r2rs   zNFSGanesha.get_versioncCst|jstd|j|js,td|j|js@td|j|jrlx$|jD]}||jkrNtd|qNW|jr|jjdstd|jjdstddS) Nznot an fsid: %szinvalid daemon_id: %szinvalid image: %sz*required file missing from config-json: %srNzRGW keyring is missingrQzRGW user is missing) rrrrrWrequired_filesrLrr)r<rCr1r1r2rs     zNFSGanesha.validatecCsd|j|jfS)Nz%s.%s)r r)r<r1r1r2get_daemon_name4szNFSGanesha.get_daemon_nameNcCs&d|j|jf}|r"d||f}|S)Nz ceph-%s-%sz%s-%s)rr_)r<desccnamer1r1r2get_container_name8s zNFSGanesha.get_container_namecCs |j|jS)N) daemon_argsrK)r<r1r1r2r ?szNFSGanesha.get_daemon_argsc Cstjj|std|tjdtjj|d}t|||dt||j |||j rtjj|d}t |||fd}|j |j j ddWd QRXd S) z)Create files under the container data dirzdata_dir is not a directory: %szCreating ganesha config...z etc/ganeshaiz keyring.rgw)r3rNrVN)rrisdirrr-rrmakedirsrErLrrrr)r<rZrrrB keyring_pathrr1r1r2create_daemon_dirsCs   zNFSGanesha.create_daemon_dirs)N)r/rArBrCr rYrcr^r+ DEFAULT_IMAGEr=rrrUrrXrrr_rbr rgr1r1r1r2rFs       rFc@seZdZdZdZdZdgZefddZe ddZ e d d Z e d d Z e d dZddZddZd ddZddZe d!ddZe edddZddZdS)" CephIscsizDefines a Ceph-Iscsi containerrz/usr/bin/rbd-target-apiziscsi-gateway.cfgcCs2||_||_||_||_t|di|_|jdS)NrL)rrrrWrMrLr)r<rrrrrWr1r1r2r=cs zCephIscsi.__init__cCs||||t||jS)N)rrW)rrrrr1r1r2ruszCephIscsi.initcCstt}d|tjj|d<d|tjj|d<d|tjj|d<d|tjj|d<d |tjj|d <d ||<d |d <|S) Nz/etc/ceph/ceph.conf:zr#z/etc/ceph/keyring:zrNz/etc/ceph/iscsi-gateway.cfg:zziscsi-gateway.cfgz/sys/kernel/configconfigfsz,/usr/local/scripts/tcmu-runner-entrypoint.shztcmu-runner-entrypoint.shz /var/log:zz/dev)rSrrr)rZr\rTr1r1r2rU{szCephIscsi.get_container_mountscCsg}ddddg}|j||S)Nz type=bindzsource=/lib/moduleszdestination=/lib/moduleszro=true)r5)binds lib_modulesr1r1r2get_container_bindss zCephIscsi.get_container_bindscCs>d}t||jjd|dddgtjd\}}}|dkr:|j}|S)Nr$z/usr/bin/python3z-czKimport pkg_resources; print(pkg_resources.require('ceph_iscsi')[0].version))rr)r(rnrrrr)rrGrKrr*r)r1r1r2rs zCephIscsi.get_versioncCspt|jstd|j|js,td|j|js@td|j|jrlx$|jD]}||jkrNtd|qNWdS)Nznot an fsid: %szinvalid daemon_id: %szinvalid image: %sz*required file missing from config-json: %s)rrrrrWr^rL)r<rCr1r1r2rs   zCephIscsi.validatecCsd|j|jfS)Nz%s.%s)r r)r<r1r1r2r_szCephIscsi.get_daemon_nameNcCs&d|j|jf}|r"d||f}|S)Nz ceph-%s-%sz%s-%s)rr_)r<r`rar1r1r2rbs zCephIscsi.get_container_namecCsvtjj|std|tjdtjj|d}t|||d|j|j d<t ||j ||tj tjj|dddS)z)Create files under the container data dirzdata_dir is not a directory: %szCreating ceph-iscsi config...rjiztcmu-runner-entrypoint.shiN) rrrdrr-rrretcmu_runner_entrypoint_scriptrLrEchmod)r<rZrr configfs_dirr1r1r2rgs   zCephIscsi.create_daemon_dirsTcCs0tjj|d}|rdj|}n dj|}|jS)NrjzCif ! grep -qs {0} /proc/mounts; then mount -t configfs none {0}; fiz1if grep -qs {0} /proc/mounts; then umount {0}; fi)rrrrr)rZmount mount_pathr+r1r1r2configfs_mount_umountszCephIscsi.configfs_mount_umount)r,cCsdS)Na#!/bin/bash RUN_DIR=/var/run/tcmu-runner if [ ! -d "${RUN_DIR}" ] ; then mkdir -p "${RUN_DIR}" fi rm -rf "${RUN_DIR}"/* while true do touch "${RUN_DIR}"/start-up-$(date -Ins) /usr/bin/tcmu-runner # If we got around 3 kills/segfaults in the last minute, # don't start anymore if [ $(find "${RUN_DIR}" -type f -cmin -1 | wc -l) -ge 3 ] ; then exit 0 fi sleep 1 done r1r1r1r1r2rns#z'CephIscsi.tcmu_runner_entrypoint_scriptcCs6t|j|j|jt|jd}d|_|jdd|_|S)Nz.tcmuz,/usr/local/scripts/tcmu-runner-entrypoint.shZtcmu)r`) get_deployment_containerrrr rDrrYrbra)r<tcmu_containerr1r1r2get_tcmu_runner_containersz#CephIscsi.get_tcmu_runner_container)N)T)r/rArBrCr rYr^rhr=rrrrUrmrrr_rbrgrsrDrnrvr1r1r1r2ri[s$    $ric@seZdZdZdZdgZeZefddZe ddZ e e e e e fdd d Ze d d Ze ee ee d ddZddZddZdddZddZe d ddZe ee dddZdS)! CephNvmeofzDefines a Ceph-Nvmeof containerrzceph-nvmeof.confcCs2||_||_||_||_t|di|_|jdS)NrL)rrrrWrMrLr)r<rrrrrWr1r1r2r=s zCephNvmeof.__init__cCs||||t||jS)N)rrW)rrrrr1r1r2r,szCephNvmeof.init)rZr,cCsbt}d|tjj|d<d|tjj|d<d|tjj|d<d|tjj|d<d |d <d |d <|S) Nz/etc/ceph/ceph.conf:zr#z/etc/ceph/keyring:zrNz/src/ceph-nvmeof.conf:zzceph-nvmeof.confz/sys/kernel/configrjz/dev/hugepagesz/dev/vfio/vfio)rSrrr)rZrTr1r1r2rU2szCephNvmeof.get_container_mountscCsg}ddddg}|j||S)Nz type=bindzsource=/lib/moduleszdestination=/lib/moduleszro=true)r5)rkrlr1r1r2rm=s zCephNvmeof.get_container_binds)rrGr,cCs8t||jjddd|jg\}}}d}|dkr4|j}|S)Ninspectz--formatz*{{index .Config.Labels "io.ceph.version"}}r)r(rnrrWr)rrGrr*retrKr1r1r2rHszCephNvmeof.get_versioncCspt|jstd|j|js,td|j|js@td|j|jrlx$|jD]}||jkrNtd|qNWdS)Nznot an fsid: %szinvalid daemon_id: %szinvalid image: %sz*required file missing from config-json: %s)rrrrrWr^rL)r<rCr1r1r2rSs   zCephNvmeof.validatecCsd|j|jfS)Nz%s.%s)r r)r<r1r1r2r_bszCephNvmeof.get_daemon_nameNcCs&d|j|jf}|r"d||f}|S)Nz%s-%s)rr_)r<r`rar1r1r2rbfs zCephNvmeof.get_container_namecCsRtjj|std|tjdtjj|d}t|||dt||j ||dS)z)Create files under the container data dirzdata_dir is not a directory: %szCreating ceph-nvmeof config...rjiN) rrrdrr-rrrerErL)r<rZrrrpr1r1r2rgms    zCephNvmeof.create_daemon_dirsTcCs0tjj|d}|rdj|}n dj|}|jS)NrjzCif ! grep -qs {0} /proc/mounts; then mount -t configfs none {0}; fiz1if grep -qs {0} /proc/mounts; then umount {0}; fi)rrrrr)rZrqrrr+r1r1r2rszsz CephNvmeof.configfs_mount_umount)r,cCsdgS)Nzvm.nr_hugepages = 4096r1r1r1r1r2rszCephNvmeof.get_sysctl_settings)N)T)r/rArBrCr r^DEFAULT_NVMEOF_IMAGErr=rrrrDrrUrmryrrrr_rbrgrsrrr1r1r1r2rws&      rwc@seZdZdZdZdZdZdeiZefe e e e e fe e efe ddddZee e e e e fdd d d Zee e e fd d dZee d ddZdd ddZdS) CephExporterz!Defines a Ceph exporter containerz ceph-exporterz/usr/bin/ceph-exporteri&N)rrrrrWr,c Cs||_||_||_||_|jdd|_tt\}}|rs   zKeepalived.create_daemon_dirscCspt|jstd|j|js,td|j|js@td|j|jrlx$|jD]}||jkrNtd|qNWdS)Nznot an fsid: %szinvalid daemon_id: %szinvalid image: %sz*required file missing from config-json: %s)rrrrrWr^rL)r<rCr1r1r2rJs   zKeepalived.validatecCsd|j|jfS)Nz%s.%s)r r)r<r1r1r2r_YszKeepalived.get_daemon_namecCs&d|j|jf}|r"d||f}|S)Nz ceph-%s-%sz%s-%s)rr_)r<r`rar1r1r2rb]s zKeepalived.get_container_namecCsddddg}|S)NzKEEPALIVED_AUTOCONF=falsez/KEEPALIVED_CONF=/etc/keepalived/keepalived.confzLKEEPALIVED_CMD=/usr/sbin/keepalived -n -l -f /etc/keepalived/keepalived.confzKEEPALIVED_DEBUG=falser1)rWr1r1r2rXds zKeepalived.get_container_envs)r,cCs dddgS)Nz"# IP forwarding and non-local bindznet.ipv4.ip_forward = 1znet.ipv4.ip_nonlocal_bind = 1r1r1r1r1r2roszKeepalived.get_sysctl_settingscCst|jddS)Nz/var/lib)r)rr)r<r1r1r2extract_uid_gid_keepalivedwsz%Keepalived.extract_uid_gid_keepalived)rZr,cCst}d|tjj|d<|S)Nz/etc/keepalived/keepalived.confzkeepalived.conf)rSrrr)rZrTr1r1r2rU{szKeepalived.get_container_mounts)N)r/rArBrCr r^DEFAULT_KEEPALIVED_IMAGErryrDrrErr=rrrgrr_rbrrXrrrrrUr1r1r1r2r$s(     rc@sfeZdZUdZedgddeideideidZe e e e e ffe e e e fe dddd Z dS) Tracingz4Define the configs for the jaeger tracing containerszdiscovery.type=single-node)rWrWrW)Z elasticsearchz jaeger-agentzjaeger-collectorz jaeger-queryN)r#r r,cCsd|d kr0d|kstdd|dgtj|d<|dkr`d|ksDtd |dd gtj|d <dS) Njaeger-collector jaeger-queryZelasticsearch_nodeszSPAN_STORAGE_TYPE=elasticsearchzES_SERVER_URLS=rWz jaeger-agentZcollector_nodesz--reporter.grpc.host-port=z0--processor.jaeger-compact.server-host-port=6799rc)rr)rrr2)r#r r1r1r2set_configurations   zTracing.set_configuration)r/rArBrCDEFAULT_ELASTICSEARCH_IMAGEDEFAULT_JAEGER_AGENT_IMAGEDEFAULT_JAEGER_COLLECTOR_IMAGEDEFAULT_JAEGER_QUERY_IMAGEr2rrDr rrr1r1r1r2rs  rc@seZdZdZdZeeeefeeddddZ e e eeeefdddd Z eeedd d d Z eed ddZeed ddZeed ddZeeeefdddZeeeedddZdS)CustomContainerzDefines a custom container containerN)rrrrWr,cCs||_||_||_t|d|_t|dd|_t|dd|_t|di|_t|dg|_t|dg|_ t|dd |_ t|d g|_ t|d g|_ t|d g|_ t|d i|_dS)NrYrir volume_mountsrorW privilegedF bind_mountsrdirsrL)rrrWrMrYrrrrorWrrrrrL)r<rrrrWr1r1r2r=s zCustomContainer.__init__)rrrr,cCs|||t||jS)N)rrW)rrrrr1r1r2rszCustomContainer.init)rZrrr,c Cstjdj|tjj|s(td|x@|jD]6}tjdj|tjj||j d}t |||dq0Wxf|j D]\}tjdj|t |j |}tjj||j d}t |||fdd}|j|Wd QRXqrWd S) zG Create dirs/files below the container data directory. z/`. Example: { /foo/conf: /conf foo/conf: /conf } becomes { /foo/conf: /conf /var/lib/ceph///foo/conf: /conf } )rrqrrr)r<rZrTsourcerr1r1r2rUs  z$CustomContainer.get_container_mountscCs`|jj}xP|D]H}xBt|D]6\}}tjd|}|rdjtjj||j d||<qWqW|S)a Get the bind mounts. Relative `source=...` paths will be located below `/var/lib/ceph//`. Example: [ 'type=bind', 'source=lib/modules', 'destination=/lib/modules', 'ro=true' ] becomes [ ... 'source=/var/lib/ceph///lib/modules', ... ] z ^source=(.+)$z source={}r%) rcopy enumeraterZr]rrrrr\)r<rZrkbindindexrr]r1r1r2rms    z#CustomContainer.get_container_binds)r/rArBrCr rDrrErr=rryrrgrr rrXrUrmr1r1r1r2rs r)rrrr,cCs&t|j|r"|r"tj|||dS)N)r touchrchown)rrrr1r1r2rs r)dkeydefaultrHr,cCs*|r||jkrtdj||j||S)a  Helper function to get a key from a dictionary. :param d: The dictionary to process. :param key: The name of the key to get. :param default: The default value in case the key does not exist. Default is `None`. :param require: Set to `True` if the key is required. An exception will be raised if the key does not exist in the given dictionary. :return: Returns the value of the given key. :raises: :exc:`self.Error` if the given key does not exist and `require` is set to `True`. z{} missing from dict)keysrrr)rrrrHr1r1r2rM#srM)rrr,cCs(|j|}t|tr$djtt|}|S)ak Helper function to get the value of a given key from a dictionary. `List` values will be converted to a string by joining them with a line break. :param d: The dictionary to process. :param key: The name of the key to get. :return: Returns the value of the given key. If it was a `list`, it will be joining with a line break. r)rrMlistrrrD)rrrr1r1r2rA8s  rAcCsttj}|jtj|jtj|jt j|jt j|jt j|jt j|jt j|jtj|jtj|jtjt|tt|kst|S)N)rrrr rr2r5rFr rirwrrr CephadmAgentrrlensetr)Zsupported_daemonsr1r1r2get_supported_daemonsJs           rc@s eZdZdS)PortOccupiedErrorN)r/rArBr1r1r1r2r]srcCszy$|jtjtjd|j||fWn|tk rz}z8|jtjkrfd|||f}tj |t |n|WYdd}~Xn*t k r}zt |WYdd}~XnXWd|j XdS)Nr%z Cannot bind to IP %s port %d: %s) setsockoptsocket SOL_SOCKET SO_REUSEADDRrrerrnoZ EADDRINUSEr-warningrr=rclose)rsaddressr;emsgr1r1r2 attempt_bindas   r)rendpointr,cstjdttjttdfdd jdkrfjdkrftjrXtjjStj jSt fddtj dftjdffDS) zDDetect whether a port is in use on the local machine - IPv4 and IPv6zVerifying port %s ...)afrr,csvy"tj|tj}t||jWnNtk r6dStk rp}z |jtjtjfkr\dS|WYdd}~XnXdS)NTF) r SOCK_STREAMrr;rrrZ EAFNOSUPPORTZ EADDRNOTAVAIL)rrrr)rrr1r2 _port_in_usewsz!port_in_use.._port_in_usez0.0.0.0z::c3s|]\}}||VqdS)Nr1).0rr)rr1r2 szport_in_use..) r-rrDr AddressFamilyrPr:is_ipv6AF_INET6AF_INETany)rrr1)rrrr2 port_in_usess  rcCsr|jsntjd|jd|jdt|jrHtjtjtj}t |j}ntjtj tj}|j}t ||||jdS)Nz Verifying IP z port z ...) Zskip_ping_checkr-rr:r;rrrr unwrap_ipv6rr)reprr:r1r1r2 check_ip_ports  rc@s.eZdZdZeddddZedddZdS) TimeoutzN Raised when the lock could not be acquired in *timeout* seconds. N) lock_filer,cCs ||_dS)z N)r)r<rr1r1r2r=szTimeout.__init__)r,cCsdj|j}|S)Nz)The file lock '{}' could not be acquired.)rr)r<Ztempr1r1r2r?szTimeout.__str__)r/rArBrCrDr=r?r1r1r1r2rsrc@s>eZdZdddddZddddZeeedd d d ZdS) _Acquire_ReturnProxyFileLockN)lockr,cCs ||_dS)N)r)r<rr1r1r2r=sz_Acquire_ReturnProxy.__init__)r,cCs|jS)N)r)r<r1r1r2 __enter__sz_Acquire_ReturnProxy.__enter__)exc_type exc_value tracebackr,cCs|jjdS)N)rrelease)r<rrrr1r1r2__exit__s z_Acquire_ReturnProxy.__exit__)r/rArBr=rr rr1r1r1r2rsrc@seZdZdeeeddddZeedddZ de ee e d d d Z deddddZddddZeeeddddZddddZddddZddddZdS) rr%N)rrrer,cCsHtjjtstjtdtjjt|d|_||_d|_||_ d|_ dS)Niz.lockr) rrexistsLOCK_DIRmkdirr _lock_filer _lock_file_fdre _lock_counter)r<rrrer1r1r2r=s  zFileLock.__init__)r,cCs |jdk S)N)r)r<r1r1r2 is_lockedszFileLock.is_locked皙?)repoll_intervallr,c Cs|dkr|j}|jd7_t|}|j}tj}yx|jsTtjtd|||j |jrntjtd||Pq6|dkrtj||krtj d||t |jq6tjtd|||tj |q6WWn(t k rtd|jd|_YnXt|dS) aQ Acquires the file lock or fails with a :exc:`Timeout` error. .. code-block:: python # You can use this method in the context manager (recommended) with lock.acquire(): pass # Or use an equivalent try-finally construct: lock.acquire() try: pass finally: lock.release() :arg float timeout: The maximum time waited for the file lock. If ``timeout < 0``, there is no timeout and this method will block until the lock could be acquired. If ``timeout`` is None, the default :attr:`~timeout` is used. :arg float poll_intervall: We check once in *poll_intervall* seconds if we can acquire the file lock. :raises Timeout: if the lock could not be acquired in *timeout* seconds. .. versionchanged:: 2.0.0 This method returns now a *proxy* object instead of *self*, so that it can be used in a with statement without side effects. Nr%zAcquiring lock %s on %szLock %s acquired on %srzTimeout acquiring lock %s on %sz2Lock %s not acquired on %s, waiting %s seconds ...)r)reridrtimerr-logQUIET_LOG_LEVEL_acquirerrsleepr=maxr)r<rerZlock_idZ lock_filename start_timer1r1r2acquires:    zFileLock.acquireF)forcer,cCs4|jr0|jd8_|jdks"|r0|jd|_dS)aT Releases the file lock. Please note, that the lock is only completely released, if the lock counter is 0. Also note, that the lock file itself is not automatically deleted. :arg bool force: If true, the lock counter is ignored and the lock is released in every case. r%rN)rr_release)r<rr1r1r2r!s zFileLock.releasecCs |j|S)N)r)r<r1r1r2r<szFileLock.__enter__)rrrr,cCs |jdS)N)r)r<rrrr1r1r2r@szFileLock.__exit__cCs|jdddS)NT)r)r)r<r1r1r2__del__Ds zFileLock.__del__c CsftjtjBtjB}tj|j|}ytj|tjtj BWn"t t fk rZtj |YnX||_ dS)N)rO_RDWRO_CREATO_TRUNCrrfcntlflockZLOCK_EXZLOCK_NBIOErrorrrr)r<Z open_modefdr1r1r2rHszFileLock._acquirecCs(|j}d|_tj|tjtj|dS)N)rrrZLOCK_UNrr)r<rr1r1r2rTs  zFileLock._releaser&)r&)Nr)F)r/rArBryrDrEr=rrPrrfloatrrrrr rrrrr1r1r1r2rsD rc@s@eZdZdZdZdZdZdZdZe ddd Z e dd d Z d S) rrr%r$r&r'r})r,c Cs6|jd|jt|jtj|jt|jtj|jtji}||S)Nr) SILENTrrrrQUIET_UNLESS_ERRORVERBOSE_ON_FAILUREVERBOSEr)r<_verbosity_level_to_log_levelr1r1r2success_log_levelxs zCallVerbosity.success_log_levelc Cs8|jd|jt|jtj|jtj|jtj|jtji}||S)Nr) rrrrrrrrr)r<rr1r1r2error_log_levels zCallVerbosity.error_log_levelN) r/rArBrrrrrrrErrr1r1r1r2rcs  r)eventsc@seZdZdZddddZedddZdddd Zddd d Ze dd d Z e e e ddddZ e j fe ddddZe e e ddddZe edddZe ddddZe e e e ddddZdS) ThreadedChildWatcheraZThreaded child watcher implementation. The watcher uses a thread per process for waiting for the process finish. It doesn't require subscription on POSIX signal but a thread creation is not free. The watcher has O(1) complexity, its performance doesn't depend on amount of spawn processes. N)r,cCstjd|_i|_dS)Nr) itertoolscount _pid_counter_threads)r<r1r1r2r=s zThreadedChildWatcher.__init__cCsdS)NTr1)r<r1r1r2 is_activeszThreadedChildWatcher.is_activecCs |jdS)N) _join_threads)r<r1r1r2rszThreadedChildWatcher.closecCs2ddt|jjD}x|D] }|jqWdS)z%Internal: Join all non-daemon threadscSs g|]}|jr|j r|qSr1)is_alivedaemon)rthreadr1r1r2 sz6ThreadedChildWatcher._join_threads..N)rrvaluesr)r<threadsrr1r1r2rs z"ThreadedChildWatcher._join_threadscCs|S)Nr1)r<r1r1r2rszThreadedChildWatcher.__enter__)rexc_valexc_tbr,cCsdS)Nr1)r<rr r r1r1r2rszThreadedChildWatcher.__exit__)_warnr,cCs6ddt|jjD}|r2||jdt|ddS)NcSsg|]}|jr|qSr1)r)rrr1r1r2rsz0ThreadedChildWatcher.__del__..z0 has registered but not finished child processes)r)rrrrResourceWarning)r<r rr1r1r2rs  zThreadedChildWatcher.__del__)pidcallbackror,cGsFtj}tj|jdt|j||||fdd}||j|<|jdS)Nzwaitpid-T)targetrror) rZget_event_loop threadingr _do_waitpidnextrrrJ)r<r rrolooprr1r1r2add_child_handlers  z&ThreadedChildWatcher.add_child_handler)r r,cCsdS)NTr1)r<r r1r1r2remove_child_handlersz)ThreadedChildWatcher.remove_child_handler)rr,cCsdS)Nr1)r<rr1r1r2 attach_loopsz ThreadedChildWatcher.attach_loop)r expected_pidrror,c Cs|dks tytj|d\}}Wn(tk rH|}d}tjd|YnTXtj|r`tj|}n&tj|rxtj | }nt d||j rtj d|||j rtjd||n|j|||f||jj|dS)Nrz8Unknown child process pid %d, will report returncode 255zunknown wait status z$process %s exited with returncode %sz%Loop %r that handles pid %r is closed)rrwaitpidChildProcessErrorr-r WIFEXITED WEXITSTATUS WIFSIGNALEDWTERMSIG ValueErrorZ get_debugr.Z is_closedZcall_soon_threadsaferpop)r<rrrror status returncoder1r1r2rs*     z ThreadedChildWatcher._do_waitpid)r/rArBrCr=rPrrrr rrwarningswarnrrrrrr1r1r1r2rs r)runcCsLtj}ztj||j|Sz|j|jWdtjd|jXXdS)N)r6Znew_event_loopZset_event_loopZrun_until_completeZshutdown_asyncgensr)cororr1r1r2 async_runs   r')rcommandr`rrekwargsr,c  s|dkrdn|r d7p(|jttttfdfdd }t|\}}} |j} | dkr|j} tj| d| dd j x$|j D]} tj| d | qWx$|j D]} tj| d | qW||| fS) z Wrap subprocess.Popen to - log stdout/stderr to a logger, - decode utf-8 - cleanly return out, err, returncode :param timeout: timeout in seconds Nrz: )r,c stjtjjtjjtjjdIdH}|js2t|j s.run_with_timeoutzNon-zero exit code z from r%zstdout zstderr ) rerrDrEr'rrr-rr splitlines) rr(r`rrer)r7r*r+r"rliner1)r(r6rer2r( s  !r(c Ks~t|||||f|\}}}|rtxB||fD]6} | jr(t| jdkr(tddj|d| q(Wtddj||||fS)Nr$zFailed command: r%z: zFailed command: %s)r(rrr8rr) rr(r`rrer)rr*ryrr1r1r2rLsrc Cs\tjd|dj|fdd}ytj||tjjdStjk rV|||YnXdS)Nz Running command (timeout=%s): %sr%cSs"d||f}tj|t|dS)Nz'Command `%s` timed out after %s seconds)r-r.r)r(rerr1r1r2 raise_timeoutas  z#call_timeout..raise_timeout)rerh) r-r.rr-r(rr/rr)rr(rer:r1r1r2 call_timeout\sr;)cli_funcr,c CsVxHdD]@}y tj|Stjk rDtjd|tj|YqXqWtj|S)Nr%r'z'Invalid JSON. Retrying in %s seconds...)r%r'r')rrrr-r.rr)r< sleep_secsr1r1r2json_loads_retryos  r>cCsx|j}tjd|d}xZ|r2tjd|Pn||krJtd||ftjd|||f|d7}tjdqWdS)z Wait for a service to become available :param what: the name of the service :param func: the callable object that determines availability zWaiting for %s...r%z%s is availablez%s not available after %s triesz$%s not available, waiting (%s/%s)...r$N)rgr-rrrr)rwhatr*rgnumr1r1r2 is_availableys rAcCst}|r|j||S)N)rr)fncpr1r1r2 read_configs rDcCstjj|}tjj|S)N)rr expanduserr7)pr1r1r2pathifys rGc Cs@y&tjj|}tjj|tjjdjtSt k r:dSXdS)N)tz) rrgetmtimedatetimeZ fromtimestamptimezoneutcstrftimeDATEFMTr=)rBmtr1r1r2get_file_timestamps rPc Cstjd}|jd|}|r6|d dkr6|dd d}|jd}dj|dd}d d g}xB|D]:}y tjj||jtjj d j t St k rYq`Xq`WdS)Nz(\.[\d]{6})[\d]*z\1r%Zz-0000r%rr&z%Y-%m-%dT%H:%M:%S.%f%zz%Y-%m-%d %H:%M:%S.%f %z)rHr&r&) rZcompilesubrrrJstrptimeZ astimezonerKrLrMrNr)rrFruZfmtsrr1r1r2try_convert_datetimes      rUcs0dttttdfdd tt|jdS)N)valorg_er,csN| r|r|yt|Stk rH}z|dd|p:|Sd}~XnXdS)Nrr%r&)rEr)rVrWr)to_intr1r2rXs  z%_parse_podman_version..to_intr)N)rDrr=rEtuplerr)Z version_strr1)rXr2rsrcCstjS)N)r gethostnamer1r1r1r2rsrcCstjdddS)Nrr%r)rrr1r1r1r2get_short_hostnamesr[cCstjptjS)N)rgetfqdnrZr1r1r1r2get_fqdnsr])hostnamer,cCs:tj|dtjtjd}dd|D}dd|D}||fS)N)flagstypecSs&g|]}|dtjkr|ddqS)rr')rr)rir1r1r2rsz$get_ip_addresses..cSs&g|]}|dtjkr|ddqS)rr')rr)rrar1r1r2rs)r getaddrinfo AI_CANONNAMEr)r^rqZipv4_addressesZipv6_addressesr1r1r2rs  rcCs tjjS)N)platformunamemachiner1r1r1r2get_archsrgcCs"tddjddtdDS)NrrVcss|]}tjtjVqdS)N)randomchoicestringascii_lowercase)rrr1r1r2r sz&generate_service_id..)r[rranger1r1r1r2generate_service_id srncCsdjddtdDS)NrVcss |]}tjtjtjVqdS)N)rhrirjrkdigits)rrar1r1r2r sz$generate_password.. )rrmr1r1r1r2generate_password s rqcCs"d}|j|r|t|d}|S)Nzsha256:)rr)rar6r1r1r2normalize_container_id s rrcCs ttjS)N)rDuuiduuid1r1r1r1r2 make_fsid sruc Cs(ytj|Wntk r"dSXdS)NFT)rsZUUIDr)rr1r1r2r s r)r*r,cs&tttdfdd }tt|S)N)rr,cs.d|kr&|jr&t|js&td|j|S)Nrznot an fsid: %s)rrr)r)r*r1r2_validate_fsid' s z%validate_fsid.._validate_fsid)rryr rr!)r*rvr1)r*r2 validate_fsid& srwcs*ttttdfdd }tt|S)zD If we only find a single fsid in /var/lib/ceph/*, use that )rr,csd|kr&|jr&tjd|j|St}t|j}|jddrT|j|jddt |dd}xX|D]P}t |dszqfqfd|ks|j r|j|dqf|d|j krf|j|dqfWt |}|sn6t |dkrtjd|d |d |_n td ||S) NrzUsing specified fsid: %sglobalF)detailrr%zInferring fsid %srz>Cannot infer an fsid, one must be specified (using --fsid): %s)rr-r.rrDr# has_optionaddr list_daemonsrrsortedrrr)rZfsidsrC daemon_listr)r*r1r2 _infer_fsid4 s.        zinfer_fsid.._infer_fsid) infer_configrryr rr!)r*rr1)r*r2 infer_fsid0 s"rcs&tttdfdd }tt|S)a Infer the cluster configuration using the following priority order: 1- if the user has provided custom conf file (-c option) use it 2- otherwise if daemon --name has been provided use daemon conf 3- otherwise find the mon daemon conf file and use it (if v1) 4- otherwise if {ctx.data_dir}/{fsid}/{CEPH_CONF_DIR} dir exists use it 5- finally: fallback to the default file /etc/ceph/ceph.conf )rr,cs@tttdfdd tttdfdd }dkr>jnd_jrnjtkrntjdjSd krjrd krjrjn|j}|dk rȈ|jd d d |jd d d _n0j djdt dt }t j j|r|_jrtjdjn$t j jtr8tjdtt_S)N)r  daemon_namer,cs tjj||}tjj|dS)Nr#) get_data_dirrrZrrr)r rrZ)rr1r2 config_pathg sz8infer_config.._infer_config..config_path)rr,csztdd}xh|D]`}|jddjdr|jdd|kr|jdddkrtjjd |djd d d r|dSqWdS) NF)ryrrVzmon.rstylez cephadm:v1rrr%)r|rrrrrr)rr~r)rrr1r2get_mon_daemon_namek s  " z@infer_config.._infer_config..get_mon_daemon_namer#zUsing specified config: rrrr%rrzInferring config zUsing default config )rDrr#rVr-r.rrrrZ CEPH_CONF_DIR CEPH_CONFrrrr)rrrZ ceph_conf)r*)rrr2 _infer_configd s& & z#infer_config.._infer_config)rryr rr!)r*rr1)r*r2r[ s *r)rr,cCs<tr8djt}x(|jD]}tjdjtj|tjqWt S)NzThis is a development version of cephadm. For information regarding the latest stable release: https://docs.ceph.com/docs/{}/cephadm/install z{}{}{}) DEFAULT_IMAGE_IS_MAINrLATEST_STABLE_RELEASEr8r-rrrrrh)rr$r9r1r1r2_get_default_image s  rcs&tttdfdd }tt|S)z( Use the most recent ceph image )rr,csB|jstjjd|_|js*t||jj|_|js:t||_|S)N CEPHADM_IMAGE)rWrr/rinfer_local_ceph_imagernrr)r)r*r1r2 _infer_image s z!infer_image.._infer_image)rryr rr!)r*rr1)r*r2 infer_image s rcs&tttdfdd }tt|S)z3 Require the global --image flag to be set )rr,cs|jstd|S)Nz9This command requires the global --image option to be set)rWr)r)r*r1r2_require_image sz%require_image.._require_image)rryr rr!)r*rr1)r*r2 require_image srcs&tttdfdd }tt|S)N)rr,cst||S)N)update_default_image)r)r*r1r2_default_image sz%default_image.._default_image)rryr rr!)r*rr1)r*r2r srcCst|ddrdSd|_t|dd}|r|jddd}|tjkrPtj|d|_|dkr`tj|_|dkrptj|_|tj krtj|_|t j krt j|_|t jkrt j|d|_|jst j jd|_|jst||_dS) NrWrrr%rrrr)rrWrrr2rrrrr rwrrr/rr)rrZtype_r1r1r2r s,      rcCsdtt|_|S)aDecorator that indicates the command function is meant to have no dependencies and no environmental requirements and can therefore be executed as non-root and with no logging, etc. Commands that have this decorator applied must be simple and self-contained. T)rr _execute_early)r*r1r1r2executes_early s rcs&tttdfdd }tt|S)N)rr,cs$tjdtrtd|S)NzDeprecated command used: z$running deprecated commands disabled)r-r NO_DEPRECATEDr)r)r*r1r2_deprecated_command sz/deprecated_command.._deprecated_command)rryr rr!)r*rr1)r*r2deprecated_command sr)r daemon_filterby_namer,cstttftdfdd r.daemon_name_or_typerz7Trying to get container info using invalid daemon name NF)rycs*g|]"}|kr|djkr|qS)r)r)rr)rrrr1r2r sz&get_container_info..rrr%r) rrDr-rr|rget_container_statsrnrrrrF)rrrrZmatching_daemonsZd_typeZd_idrrr)rGrHrIrJrKr1)rrrrr2get_container_info s  r)rcontainer_pathr,c Cst||dddddddg\}}}d}d|krB|jrBd |jkrB|jnd}|dk rT|gntj}x8|D]0}t|||dk }|dk r`tjd |d Pq`Wx~|jD]r}|o|j r|j j d \} } } } |dk r| |j krq| o| j d  rtj d| d| d| d| | SqWdS)a, Infer the local ceph image based on the following priority criteria: 1- the image specified by --image arg (if provided). 2- the same image as the daemon container specified by --name arg (if provided). 3- image used by any ceph container running on the host. In this case we use daemon types. 4- if no container is found then we use the most ceph recent image on the host. Note: any selected container must have the same fsid inferred previously. :return: The most recent local ceph image (already pulled) Zimagesz--filterzlabel=ceph=Truezdangling=falsez--formatz;{{.Repository}}@{{.Digest}}|{{.ID}}|{{.Tag}}|{{.CreatedAt}}Nrrz!Using container info for daemon ''|@zUsing ceph image with id 'z ' and tag 'z ' created on r)rrrrrr-r.r8isspacelstriprrIrr) rrrrZcontainer_inforZ daemons_lsrrWdigestrItagZ created_dater1r1r2r s," "rcCs6tjddd}tj|j|||j||j|S)Nr6zceph-tmp)moder6)tempfileNamedTemporaryFilerr;r:rr8)rrrZtmp_fr1r1r2 write_tmp5 s  rcCsFtjj|stj||dn tj||tj|||tj||dS)N)r)rrrreror)dirrrrr1r1r2re@ s   recCstjj||d||fS)Nz%s.%s)rrr)rrZtnr1r1r2rJ srcCstjj||S)N)rrr)rr\r1r1r2 get_log_dirO srcCsRtjj||}t|||tttjj|d||tttjj|dd||t|S)NrZposted)rrrrer)rrZrr data_dir_baser1r1r2make_data_dir_baseT s rcCsN|dks|dkrt|\}}t||j||t||j||}t|||t|S)N)rrrZrrer)rrr rrrrZr1r1r2 make_data_dir^ s  rcCs:|dks|dkrt|\}}t||j}t|||t|S)N)rrr\re LOG_DIR_MODE)rrrrr\r1r1r2 make_log_dirh s   rc Cs*t|ddddt|dt|d|gdS)Ninstallz-dz-m0770z-oz-gz/var/run/ceph/%s)rrD)rrrrr1r1r2 make_var_runq src Cs|dks|dkrt|\}}x|D]}|}tjj|rLtjj|tjj|}tjd||ftj |ddtj ||xttj |D]f\}}} tjd|||ftj |||x8| D]0} tjd||| ftj tjj|| ||qWqWq"WdS)z/ Copy a directory tree from src to dst Nzcopy directory `%s` -> `%s`T) ignore_errorszchown %s:%s `%s`) rrrrdrbasenamer-r.shutilrmtreecopytreewalkr) rsrcdstrrZsrc_dirZdst_dirdirpathdirnames filenamesrr1r1r2 copy_treew s     rcCs|dks|dkrt|\}}xr|D]j}|}tjj|rLtjj|tjj|}tjd||ftj ||tjd|||ftj |||q"WdS)z& Copy a files from src to dst Nzcopy file `%s` -> `%s`zchown %s:%s `%s`) rrrrdrrr-r.rcopyfiler)rrrrrsrc_filedst_filer1r1r2 copy_files s    rcCs|dks|dkrt|\}}x|D]}|}tjj|rLtjj|tjj|}tjj|rtj|}tj d||ftj ||tj |q"tj d||ft j ||tj d|||ftj|||q"WdS)z$ Move files from src to dst Nzsymlink '%s' -> '%s'zmove file '%s' -> '%s'zchown %s:%s `%s`)rrrrdrrislinkreadlinkr-r.symlinkr>rmover)rrrrrrrZsrc_rlr1r1r2 move_files s        r)rrrr,cCsRxLtj|D]>\}}}tj|||x$|D]}tjtjj||||q*Wq WdS)N)rrrrr)rrrrrrrr1r1r2recursive_chown s r) executablerr,c Cstjj|\}}tjdkr*|dkr*|d}tjj|r:|S|dkrtjjdd}|dkrytjd}Wnt t fk rtj }YnX|sdS|j tj }x*|D]"}tjj||}tjj|r|SqWdS)zTries to find 'executable' in the directories listed in 'path'. A string listing directories separated by 'os.pathsep'; defaults to os.environ['PATH']. Returns the complete filename or None if not found. win32z.exeNPATHCS_PATH)rrsplitextsysrdisfiler/rconfstrAttributeErrorrdefpathrpathsepr)rrrextpathsrFrr1r1r2find_executable s(     rcCs t|}|dkrtd||S)Nz %s not found)rr)rrr1r1r2r s rc Cs:|jr tSx(tD] }y|Stk r0YqXqWdS)N)rXrCONTAINER_PREFERENCEr=)rrar1r1r2find_container_engine s  rcCsb|j}t|ts4ddtD}tdjdj|n*t|tr^|j||jt kr^tdt |S)NcSsg|] }|jqSr1)r)rrar1r1r2r sz*check_container_engine..z^No container engine binary found ({}). Try run `apt/dnf/yum/zypper install `z or z,podman version %d.%d.%d or later is required) rnrMrrrrrrrKMIN_PODMAN_VERSION)rengineZexesr1r1r2check_container_engine s     rcCs&|dk rd|||fSd||fSdS)Nz ceph-%s@%s.%sz ceph-%s@%sr1)rr rr1r1r2 get_unit_name sr)rrrr,c Cs<t|||}y|dStk r6tdj|YnXdS)N systemd_unitzFailed to get unit name for {})get_daemon_descriptionKeyErrorrr)rrrrr1r1r2get_unit_name_by_daemon_name s  rc Csd}d}y>t|dd|gtjd\}}}|dkr8d}d}n d|krDd}Wn8tk r~}ztjd|d}d}WYdd}~XnXd }yVt|dd |gtjd\}}}|j}|dkrd }n |dkrd}n|dkrd}nd }Wn6tk r}ztjd|d }WYdd}~XnX|||fS)NF systemctlz is-enabled)rrTdisabledzunable to run systemctl: %sunknownz is-activeactiverunninginactivestoppedfailed auto-restarterror)r)r)rr)r(rrr=r-rr) r unit_nameenabled installedrr*r)rstater1r1r2 check_unit s<  rcCsdx^|D]V}t||\}}}|r8|dkr8tjd|dS|dk r|rtjd||j|qWdS)NrzUnit %s is enabled and runningTzEnabling unit %sF)rr-renable_service)runitsenablerurrrr1r1r2 check_unitsC s  r CephContainer)rcr,cCs(|jjddddkrdStt||S)Nrr%ragentcephadm-exporterF)rr)rrrPget_running_container_name)rrr1r1r2is_container_runningQ src CsJxD|j|jgD]4}t||jjdddd|g\}}}|jdkr|SqWdS)Nrrxz--formatz{{.State.Status}}r)ra old_cnamer(rnrr)rrrrr*ryr1r1r2rX s  rcCsZd|}|dk r tjj||}tjj|rVt|}|jdrV|jddrV|jddSdS)Nz/etc/ceph/%s.confrxr)rrr7rrD has_sectionrzr)rO legacy_dirrDr#r1r1r2get_legacy_config_fsidc s  rcCsd}|dkrxyVtjj|j|d|d}|dk r>tjj||}t|d}|jj}WdQRXWntk rvYnX|st ||d}|S)Nrzceph-%sZ ceph_fsidr)r) rrrrZr7rrrrr)rrOr rrrZ fsid_filerr1r1r2get_legacy_daemon_fsidp s    rcCs(|jdk r|jSt|jto&|jjtkS)N)rkrMrnrrKCGROUPS_SPLIT_PODMAN_VERSION)rr1r1r2should_log_to_journald s  rcCst}|tjkr|d2kr|dddddg7}t|}|rD|ddg7}n |d d g7}|d kr~|d g7}|rt|d dg7}n |dg7}n\|tjkrtj|}||jdt7}|d3krd}tj|d}t|} | r d| kr| dr| d}d| kr | dr | dd}|d|d|g7}|dkrt |} | jdd} | jdd} |d| g7}|d | g7}d!} t }d"|krt t \}}t t|p|d}|rt|n|}|d#| d$|d|g7}|d%krJt |} | jd&t}x|D]}|d'j|g7}qWy|d(| d)g7}Wntk r>YnX|d*g7}|dkr^|d+g7}|dkrt |} y|d(| d)g7}Wntk rYnX|d,krt |} y|d(| d)g7}Wntk rYnX|d-d.d/g7}n|d0kr|jtj|d1n|tjkr:tj|||}||j7}n|tjkrdtj|||}|j|jnz|tjkrtj|||}||j7}nR|tjkrtj|||}|j|jn(|tjkrtj|||}|j|j|S)4Nr ceph-exporterz --setuserrPz --setgroupz--default-log-to-file=falsez--default-log-to-journald=truez--default-log-to-stderr=falsez--default-log-to-stderr=truez"--default-log-stderr-prefix=debug rz'--default-mon-cluster-log-to-file=falsez*--default-mon-cluster-log-to-journald=truez)--default-mon-cluster-log-to-stderr=falsez(--default-mon-cluster-log-to-stderr=truerorrrrVrr:rz--web.listen-address=r>rretention_timeZ15dretention_size0z--storage.tsdb.retention.time=z--storage.tsdb.retention.size=httprz--web.external-url=z://rr z--cluster.peer={}z--web.config.file=Z web_configz0--config.file=/etc/alertmanager/alertmanager.ymlz--config.expand-envz node-exporterz--path.procfs=/host/procz--path.sysfs=/host/sysz--path.rootfs=/rootfsz jaeger-agentrc)rr)rrr)rrrrrr2rr+ fetch_metarr]rrriter wrap_ipv6rrr rrFr rr r{rrr)rrr rrrkrr:r;metar#rrschemehostrZ ipv6_addrsaddrr Zpeer nfs_ganeshaZ ceph_exporterrccscr1r1r2r  s                        r cCs t||||||d}|tjkr.t||||d|rftjj|d} t| ||fd} | j|WdQRX|rtjj|d} t| ||fd} | j|WdQRX|t j j krt |} d} d}|dkrVt ||j||}d} ttjj|| ||dttjj|| d ||dttjj|d ||dttjj|d ||ttjj|d ||n|d krt ||j||}d } ttjj|| ||dttjj|| d||dttjj|| d||dttjj|d ||dttjj|d d||n>|dkrFt ||j||}d} ttjj|| ||dttjj|| d ||dn|dkrt ||j||}d} ttjj|| ||dttjj|d ||dn|dkrt ||j||}d} ttjj|| ||dttjj|d ||dnL|dkr2t ||j||}d} ttjj|| ||dttjj|d ||d| krx| dD]}ttttf| d}t||}tjj|rtjj||jtjj}ntjj|| |}t|||fdd} | j|WdQRXqFWn|tjkrtj|||}|j|||n|tjkr(tj|||}|j|||n|tjkrRtj|||}|j|||n|t jkr|t j|||}|j|||nv|t!jkrt!j|||}|j|||nL|t"jkrt"j|||}|j|||n"|t#jkrt#j|||}|j$t%||t||||dS)N)rrr#)r3rNrVrzetc/prometheusiZalertingr7Zetcrz etc/grafanaZcertszprovisioning/datasourcesz grafana.dbrzetc/alertmanagerrz etc/promtailrzetc/lokiz node-exporterzetc/node-exporterrLzutf-8)r3r5)&rrrrrrrrrrr2rrrrZrerrrrrDr rAisabsrseprFr rrgrirwrrrrr_write_custom_conf_files)rrr rrrr#rNrZrrrfrrBZ data_dir_rootrCZcfgrZfpathr ceph_iscsiZ ceph_nvmeofrrrsgr1r1r2rg s                rg)rr rrrrr,c s.t|}|sdStjj|j|d|d|}tjj|sHt|||dddg}x|D]Љtfdd|DrVtjj|tjjd} t | ||fdd } | j dWdQRX|d krV|d } tjj| st| ||dtjj| tjjd} t | ||fdd } | j dWdQRXqVWdS) Ncustom_config_filesrirrrc3s|]}|kVqdS)Nr1)rrt)ccfr1r2ru sz+_write_custom_conf_files..zutf-8)r3r5rz.tcmu) fetch_custom_config_filesrrrrZrreallrrr) rr rrrrccfilesZcustom_config_dirmandatory_keysrrZtcmu_config_dirZtcmu_file_pathr1)rr2rk s&   r)optionr,cCst|}|jdd|S)Nr)_get_config_jsonr )rjsr1r1r2get_parm s r"cCs|s tS|dkr0tdk r t}qtjj}|anX|ddkrN|d dkrN|}n:tjj|rzt|d}|j}WdQRXnt dj |yt j |}Wn2t k r}zt dj ||WYdd}~XnX|SdS) Nr"r{r%}rzConfig file {} not foundzInvalid JSON in {}: {}r&)rS cached_stdinrstdinrrrrrrrrrr)rjrr!rr1r1r2r  s$   "r cCsHt|dd}|dk r|St|dd}|dk rDtj|p8i}||_|SiS)z:Return a dict containing metadata about a deployment. meta_propertiesNZ meta_json)rrrr()rr Zmjsonr1r1r2r  s  r cCsXt|dd}|r(t|}|jdd|St|dd}|rTt|pBi}|jdd|SiS)aReturn a dict containing arbitrary configuration parameters. This function filters out the key 'custom_config_files' which must not be part of a deployment's configuration key-value pairs. To access custom configuration file data, use `fetch_custom_config_files`. config_blobsNrr)rrSr r )r cfg_blobscfg_jsonjdatar1r1r2r s     rcCsDt|dd}|r|jdgSt|dd}|r@t|}|jdgSgS)zoReturn a list containing dicts that can be used to populate custom configuration files for containers. r)Nrr)rrr )rr*r+r,r1r1r2r s    rcCst|dd}|dkrg}t|tr4ttt|j}i}t|dd}t|trZtj|}n |dk rf|}g}xB|D]:}t||kr|j t |t||qp|j t d|qpW|S)zCReturn a list of Endpoints, which have a port and ip attribute Z tcp_portsNport_ipsz0.0.0.0) rrMrDrrrErrrr5r9)rrr-Z port_ips_attrrr;r1r1r2r s"       rcCs d}d}t|}|r8|jd}|jd}|r8|r8||fSd|kr|jry$t|jd}|j}WdQRXWn*tk r}zt|WYdd}~XnXd|kr|jrd|j|jf}nbd|ko|j ry$t|j d}|j}WdQRXWn,tk r}zt|WYdd}~XnX||fS)Nr#rNrrz[%s] key = %s ) rrr#rrFileNotFoundErrorrrrrN)rr#rNrrrr1r1r2get_config_and_keyring s.  r/cCs|t}|tjkr|jtj|tjkr8|jtjn@|tjkrx|sJttj|||}t ||j ||}|j|j||S)N) rrir r rmrwrrrrrZ)rrr rrkrrZr1r1r2rm s   rmc Cst}|tjkr|rtjjd|}tjj|r6d||<t||j}d||<d|}tjj|rfd||<|dkrt |rd} | || <|tjko|rt ||j ||} |dkrd |} n d ||f} |dkr| d || <|sd || d <|dVkrd||f|| d<|dWkrd|d<d|d<|dXkr>d|d<d|d<d|d<|dkrt |j r|j d|} | d} tjj| rtjj| stj| ddd|| <ntjd | d!d"|d<y|jr@t|j}tjj|r(d#||d$<d%||d&<d'||d(<d)||d*<d+||d,<d-||d.<ntjd/jtjd0tjWntk rXYnX|tjkr|rt ||j ||} t||j}|d1krd2|tjj| d3<d4|tjj| d5<n|d6krd7|tjj| d8<d9|tjj| d5<n|d:kr"d;|tjj| d<<d||<d=|tjj| d5<n|d>krXd?|tjj| d@<dA|dB<dC|d<dD|d<np|dEkrdF|tjj| dG<dH|tjj| dI<dJ|tjj| dK<dL|tjj| dM<n|dNkrdO|tjj| dP<|tjkr |stt ||j ||} tj|||}|j|j| |t jkrB|s"tt ||j ||} |jt j| |t!jkrx|sXtt ||j ||} |jt!j| |t"jkr|stt ||j ||} | j#dQrt$j%dRdS| } t||j}|jt"j| ||t&jkr |stt ||j ||} |jt&j| |t'jkrP|s"tt'j|||}t ||j ||} |j|j| t(|j)t*r~tjjdTr~dT|kr~dU|dT<|S)YNz /var/run/cephz/var/run/ceph:zz/var/log/ceph:zz/var/lib/ceph/%s/crashz/var/lib/ceph/crash:zrz/run/systemd/journalrz!/var/lib/ceph/radosgw/ceph-rgw.%sz/var/lib/ceph/%s/ceph-%sz:zz/etc/ceph/ceph.conf:zz/config rbd-mirror cephfs-mirror ceph-exporterz#/etc/ceph/ceph.client.%s.%s.keyringz/keyringrrclusterless-ceph-volumez/devz /run/udevz/sysz/run/lvmz /run/lock/lvmrz/selinuxi)rz/sys/fs/selinux:rozCluster direcotry z does not exist.z/rootfsz,/usr/lib/python3.6/site-packages/ceph_volumez/src/ceph-volume/ceph_volumez/usr/sbin/cephadmz/src/cephadm/cephadm.pyz/usr/share/ceph/mgrz/src/pybind/mgrz%/usr/lib/python3.6/site-packages/cephz/src/python-common/cephz&/etc/grafana/dashboards/ceph-dashboardz%/monitoring/ceph-mixin/dashboards_outz,/etc/prometheus/ceph/ceph_default_alerts.ymlz,/monitoring/ceph-mixin/prometheus_alerts.ymlz{}{}{}z)Ceph shared source folder does not exist.rz/etc/prometheus:Zzetc/prometheusz /prometheus:Zr7rz /etc/loki:Zzetc/lokiz/loki:Zrz/etc/promtail:Zz etc/promtailz /promtail:Zz node-exporterz/etc/node-exporter:Zzetc/node-exporterz /host/proc:roz/procz /host/sys:roz /rootfs:rorz/etc/grafana/grafana.ini:Zzetc/grafana/grafana.iniz'/etc/grafana/provisioning/datasources:Zz$etc/grafana/provisioning/datasourcesz/etc/grafana/certs:Zzetc/grafana/certsz/var/lib/grafana/grafana.db:Zzdata/grafana.dbrz/etc/alertmanager:Zzetc/alertmanagerz.tcmuz\.tcmu$rVz /etc/hostsz /etc/hosts:ro)r0r1rr2)rrr3)rr3)+rSrrrrrrrr\rrrZ HostFactsselinux_enabledrer-rZshared_ceph_folderrGrrrrrrr2rFr rrupdaterUrrwrirrZrSrrrMrnr)rrr r no_configrTZrun_pathr\Z crash_dirZjournald_sock_dirrZZ cdata_dirZ cluster_dirZselinux_folderZ ceph_folderrrr1r1r2rU- s                                              rU)rrrarrrorWr,c CsD|dkr g}|jd|jdt||jd|||||||j|jd S)NzCEPH_VOLUME_SKIP_RESTORECON=yeszCEPH_VOLUME_DEBUG=1z/usr/sbin/ceph-volume) rWrYrorrrWrrarirj)r5rrWrirj)rrrarrrorWr1r1r2get_ceph_volume_container s   r8)rcontainer_argsr,cCs2t|jtr$|jjtkr$|jdn |jddS)Nz--pids-limit=-1z--pids-limit=0)rMrnrrK#PIDS_LIMIT_UNLIMITED_PODMAN_VERSIONr5)rr9r1r1r2set_pids_limit_unlimited s   r;)rrr rrptracer9r,cCsd}d}g} g} d} |tjkr(| jd|dkr4g}|tjksH|tjkrRt|||d4kr^d}|dkrvd}d|}n|d krd }d |}n|d krd }d|}n|dkrd}d|}n|d5krd|}d||f}n|tjkrd}n|tjkrt||ddd|gdjtjd}|jd\}}t|t|fSt k r} z|| f}WYdd} ~ Xq*Xq*W|rt d|dd |d t d dS) Nstatz-cz%u %g)rWrYro)rr%z#Failed to extract uid/gid for path rz: r%zuid/gid not found) rWrMrDrr%rrrrErr) rZimgrrexfprrrrr1r1r2rts*   r) rrr rrrrr#rNosd_fsiddeployment_typerr,c s | pg} | tjkrltfdd| Drl|dkrPtjddjtt| dntdj djtt| |t |j ||} | tj krt jj|  rtd| |d kot jj|  r|st|stt|||} t|||}t|||||t |j d |}t|j}tjd d d t|d |ddddg t|d ||d|d|| jd|jdidjt|d||fd}|j|WdQRXnt|||||||| tj kr|tjkrt}t|t stt||}|j!|n*|rt"||||||| | d nt#dt jj| dsRt| d||fd}|jdWdQRXt| d||fd}|jdWdQRXt$|| rt%}|j&dd| D|j'j(|g|j)| tj kr|t*j+krt,d d!t-|||gt,d d"t-|||gdS)#Ncsg|]}t|qSr1)r)rr)rr1r2rsz!deploy_daemon..rzceph-mgr TCP port(s) rz already in usez/TCP Port(s) '{}' required for {} already in usez,cannot reconfig, data path %s does not existrz/usr/bin/ceph-monz--mkfsz-iz--fsidz-cz /tmp/configz --keyringz /tmp/keyringz/var/log/ceph:zz/var/lib/ceph/mon/ceph-%s:zz/tmp/keyring:zz /tmp/config:z)rWrYrorz/config)r3)rErz7attempting to deploy a daemon without a container imagez /unit.createdz0mtime is time the daemon deployment was created z/unit.configuredz&mtime is time we were last configured cSsg|] }|jqSr1)r;)rrr1r1r2rsrz reset-failedrestart).rQrRrr-rrrrDrrrrZrTrrrrrrgrr\rrWr rr%rrrr rrMrSdeploy_daemon_unitdeploy_daemon_unitsrupdate_firewalld Firewalld open_portsexternal_portsr apply_rulesrrrr)rrr rrrrr#rNrErFrrZ tmp_keyring tmp_configmon_dirr\rZ config_jsZ cephadm_agentfwr1)rr2 deploy_daemons          "rScCs|r|jd|d|jddj|jddd|jddj|jdt|jtr|jddjdd |jdd Dd|jddjd d |jddd Dd|jdjd d |jD|rdndddS)Nz# rz! r%T)rz 2> /dev/null cSsg|]}tj|qSr1)shlexquote)rar1r1r2r sz0_write_container_cmd_to_bash..)storagecSsg|]}tj|qSr1)rTrU)rrVr1r1r2rs)rrWcSsg|]}tj|qSr1)rTrU)rrVr1r1r2rsz &rV)rrrm_cmdrMrnrrun_cmd)rZfile_objrcomment backgroundr1r1r2_write_container_cmd_to_bashs  &(r\)rrrr,c std}|djsdSdj|jdd}|d||d}|jsLdStddfdd y |Wn$tk rtjd |YnXdS) Nz/sys/fs/cgroupz system.slicezsystem-ceph\x2d{}.slicer"z\x2dz.service)rr,cs.x |jD]}|jr |q W|jdS)N)iterdiris_dirrmdir)rrF)cg_trimr1r2r`(s zclean_cgroup..cg_trimzFailed to trim old cgroups )r rrr'rr-r)rrrZ CGROUPV2_PATH slice_nameZcg_pathr1)r`r2 clean_cgroups  rb) rrrrr rrenablerJrErr,c $/s8tttddfdd } t|j||} | d} | d}t| t}t|^}|jd|tjkrt d}|jdj ||||d |d kr| st t j jd d || f}t j j|r|jd xdDD](}t j j| |}|jdj |||dqWntddgt|||t|||d|||fd}t|jtjd\}}}d|krhddt|| dg}nddt|d| ddg}t|t|||t|||d|||fd}t||dnR|tjkr|jd jtj| d!d"d#tj||}|j}t||d$d!d%t|d&|t|ft}|jj rHtj ndj!r\tj!ndd'|j"d(s| rd)d*| D|d(<ng|d(<|jt#j$|d+d,d#WdQRXWdQRX|d krd-nd}t| d.&}| t%t|||d krR| st tdd/t|| gt|||t|||d0|||fd}t||d1n|tjkrd2} tj||}|j}|jd3d j|j&d#|jdE| d5||t|d6fd#|jdF| d7||t|d6fd#|jd jtj| d8d"d#WdQRXt| d9}| t%t||WdQRXrbt| d:}|jj'd#WdQRXt(||t)|t*|}!d;|}"tj+d<|"dd=}|j|!WdQRXt,d>d?gt-|||}#td>d@|#gtj.dtd>dA|#gtj.d|rt,d>dB|#g|r4t/||#t,d>dC|#gdS)GN)rrer,c sljjd}|jd|jddjjd|dd|jd|jddjj|dddS) Nz inspect %s &>/dev/nullz! z || r%T)rrez )re)rnrrrrstop_cmdra)rreZcontainer_exists)rrr1r2add_stop_actionsBs.z-deploy_daemon_units..add_stop_actionsz /unit.runz /unit.metazset -e rz@{install_path} -d -m0770 -o {uid} -g {gid} /var/run/ceph/{fsid} ) install_pathrrrrz /etc/ceph/osdz%s-%s.json.adopted-by-cephadmz%# Simple OSDs need chown on startup: blockblock.db block.walz&[ ! -L {p} ] || chown {uid}:{gid} {p} )rFrrZactivatez --bad-optionzceph-%s-%s.%s-activate-test)rorrra)rz unrecognized arguments: activatelvmz --no-systemdz--osd-idz --osd-uuidz --no-tmpfszceph-%s-%s.%s-activatez%LVM OSDs use ceph-volume lvm activater%T)rqrziscsi tcmu-runner container)r[z%s.%s)rirjrcSsg|] }|jqSr1)r;)rrr1r1r2rsz'deploy_daemon_units..r')indentz/unit.poststopZ deactivatezceph-%s-%s.%s-deactivatezdeactivate osdz/runz! zrm z/ceph-%s@%s.%s.service-pidz.tcmuz/ceph-%s@%s.%s.service-cidFz /unit.stopz /unit.imagezceph-%s@.servicer)r4rz daemon-reloadstopz reset-failedrcrJ)rgrhriz! rm z! rm )0rrrErrZrrrrrrrrrrrr8rUrmr(rYrrrDr\rir rsrrvr r6rirjrrdumpsrrdrWinstall_sysctlinstall_base_units get_unit_filerbrrrrb)$rrrrr rrrcrJrErrerZZ run_file_pathmeta_file_pathrZmetafrf simple_fnrrFZtest_cvrr*ryr+Zprestartrrur reZpoststopr@unit unit_filerr1)rrr2rI3s            ,       (((         rIc@sXeZdZUddgiZeeeefddZddZ ddZ d d Z d d Z d dZ dS)rKri cCs||_|j|_dS)N)rcheck available)r<rr1r1r2r=szFirewalld.__init__cCsftd|_|jstjddSt|jd\}}}|sBtjddS|dkrXtjddStjdd S) Nz firewall-cmdz'firewalld does not appear to be presentFzfirewalld.servicez firewalld.service is not enabledrz firewalld.service is not runningzfirewalld readyT)rr+r-r.rrr)r<rrrr1r1r2rvs     zFirewalld.checkcCs|jstjd|dS|dkr&d}n"|dkr4d}n|tjkrDd}ndS|jsVtd t|j|jd d |gt j d \}}}|rtj d |t|j|jd d|g\}}}|rtd||fntjd|dS)NzGNot possible to enable service <%s>. firewalld.service is not availablerzceph-monrrrrPrzcommand not definedz --permanentz--query-service)rz0Enabling firewalld service %s in current zone...z --add-servicez,unable to add service %s to current zone: %sz/firewalld service %s is enabled in current zone)rrr) rwr-r.rFr r+rr(rrrr)r<r Zsvcrr*ryr1r1r2enable_service_fors( "zFirewalld.enable_service_forcCs|jstjd|dS|js&tdx|D]}t|d}t|j|jdd|gtj d\}}}|rtj d|t|j|jdd|g\}}}|rtd ||fq,tjd |q,WdS) NzCNot possible to open ports <%s>. firewalld.service is not availablezcommand not definedz/tcpz --permanentz --query-port)rz-Enabling firewalld port %s in current zone...z --add-portz)unable to add port %s to current zone: %sz,firewalld port %s is enabled in current zone) rwr-r.r+rrDr(rrrr)r<fw_portsr;tcp_portrr*ryr1r1r2rLs  "zFirewalld.open_portscCs|jstjd|dS|js&tdx|D]}t|d}t|j|jdd|gtj d\}}}|stj d|t|j|jdd|g\}}}|rtd ||fqtj d |d q,tj d |d q,WdS)NzDNot possible to close ports <%s>. firewalld.service is not availablezcommand not definedz/tcpz --permanentz --query-port)rz$Disabling port %s in current zone...z --remove-portz.unable to remove port %s from current zone: %szPort z disabledzfirewalld port z already closed) rwr-r.r+rrDr(rrrr)r<ryr;rzrr*ryr1r1r2 close_ports+s   "zFirewalld.close_portscCs.|js dS|jstdt|j|jdgdS)Nzcommand not definedz--reload)rwr+rrr)r<r1r1r2rNBs zFirewalld.apply_rulesN)r/rArBrMrrDrrEr=rvrxrLr{rNr1r1r1r2rKs rKcCs,d|ko |js(t|}|j||jdS)Nskip_firewalld)r|rKrxrN)rr Zfirewallr1r1r2rJMs rJ)rrr r,cCstttdddd}t|jjd|d|d}g}|dkrJtj}n6|d kr\tj}n$|d krntj}n|t j krt j}t ||}|rt|jj d d d |||t |ddgdS)z Set up sysctl settings N)conflinesr,c Ss8d|d}t|ddd}|jdj|WdQRXdS)N# created by cephadmrV)r3r4r)rrV)rV)rrr)r}r~rr1r1r2_writeYs zinstall_sysctl.._writez90-ceph-r"z.confrrriT)rexist_oksysctlz--system)r rrDr`joinpathrrrrrwr filter_sysctl_settingsrr)rrr rr}r~r1r1r2roUs        ro)rvariabler,cCst|dd|g\}}}|pdS)zC Read a sysctl setting by executing 'sysctl -b {variable}' rz-bN)r()rrrr*r)r1r1r2 sysctl_getwsr)rr~r,cs"ttdfdd }tt||S)z Given a list of sysctl settings, examine the system's current configuration and return those which are not currently set as described. ) desired_liner,csr|jd}|dkr|d|}|j}| s4|jr8dStdd|jd\}}| s\| r`dSt|}||kS)N#r%FcSs|jS)N)r)rr1r1r2sz>filter_sysctl_settings..test_setting..=r&)findrrrrr)rZ comment_startZsettingZ desired_valueZ actual_value)rr1r2 test_settings    z,filter_sysctl_settings..test_setting)rDrPrr)rr~rr1)rr2rs r)rrr,c Csd}t|d|d}|s dSt|}tjd|d|dx|D]}ytj||j|d8}WqFtjk r"}zt|j d rtj d |d yt j ||d8}Wn:t k r}ztj d |d |d WYdd}~XnXn$tj d|d|d|jd |WYdd}~XqFXqFW|dkrJtjd|jd dSt|}||krpd|d|d} n||kr~d} tj d| ddS)zw Cephadm once used '/usr/lib/sysctl.d' for storing sysctl configuration. This moves it to '/etc/sysctl.d'. z/usr/lib/sysctl.dz /90-ceph-z-*.confNz Found sysctl z files in deprecated location z. Starting Migration.r%zalready existsz*Destination file already exists. Deleting rzCould not remove z: zCould not move z from z to rz'Successfully migrated sysctl config to zpartially successful (failed rrZ unsuccessfulz"Migration of sysctl configuration z/. You may want to perform a migration manually.)rrr-rrrr`rrDrrrr>r) rrZdeprecated_locationZdeprecated_confsZ file_countr}r*Zdel_errZfiles_processedr!r1r1r2migrate_sysctl_dirs8    ,:   rcCsdtjj|jd}t|jddd}|jdWdQRX|snt|dddgt|dddgt|dd dgtjj|jd |}t|jd |d dd}|jd j|dWdQRX|st|ddd|gt|dd d|gtjj|jd |rdSt|jd |dd<}dddddddddg }|jd|dj |dj |fWdQRXdS)z9 Set up ceph.target and ceph-$fsid.target units. z /ceph.targetN)r4zX[Unit] Description=All Ceph clusters and services [Install] WantedBy=multi-user.target rdisablez ceph.targetrcrJz/ceph-%s.targetz/ceph-z.targetz[Unit] Description=Ceph cluster {fsid} PartOf=ceph.target Before=ceph.target [Install] WantedBy=multi-user.target ceph.target )rzceph-%s.targetzceph-monzceph-mgrzceph-mdszceph-osdz ceph-fuseZradosgwz rbd-mirrorz cephfs-mirrorz tcmu-runnerz# created by cephadm /var/log/ceph/%s/*.log { rotate 7 daily compress sharedscripts postrotate killall -q -1 %s || pkill -1 -x '%s' || true endscript missingok notifempty su root root } r%r) rrrrbrrrrr^r)rrZexistedrZtargetsr1r1r2rps:  rpcCs^d}t|jtr(d}|jjtkr(|d7}t|jt}dj||j||rHdnd|rRdndd}|S)NrVzyExecStartPre=-/bin/rm -f %t/%n-pid %t/%n-cid ExecStopPost=-/bin/rm -f %t/%n-pid %t/%n-cid Type=forking PIDFile=%t/%n-pid z Delegate=yes ai# generated by cephadm [Unit] Description=Ceph %i for {fsid} # According to: # http://www.freedesktop.org/wiki/Software/systemd/NetworkTarget # these can be removed once ceph-mon will dynamically change network # configuration. After=network-online.target local-fs.target time-sync.target{docker_after} Wants=network-online.target local-fs.target time-sync.target {docker_requires} PartOf=ceph-{fsid}.target Before=ceph-{fsid}.target [Service] LimitNOFILE=1048576 LimitNPROC=1048576 EnvironmentFile=-/etc/environment ExecStart=/bin/bash {data_dir}/{fsid}/%i/unit.run ExecStop=-/bin/bash -c 'bash {data_dir}/{fsid}/%i/unit.stop' ExecStopPost=-/bin/bash {data_dir}/{fsid}/%i/unit.poststop KillMode=none Restart=on-failure RestartSec=10s TimeoutStartSec=200 TimeoutStopSec=120 StartLimitInterval=30min StartLimitBurst=5 {extra_args} [Install] WantedBy=ceph-{fsid}.target z docker.servicezRequires=docker.service )rrZrKZ docker_afterZdocker_requires)rMrnrrKrrrrZ)rrrKrXrr1r1r2rqs   ! rqc@seZdZgidgddddddddf eeeeeeeefeeeeeeeeeeeeeeeeeeeddddZ e gigddddddddf eeeeeeeeeefeeeeeeeeeeeeeeeeeeddd d Z e ed d d Z e jedddd Z e ed ddZeed ddZeeeedddZddZd!eeeedddZd"eeeeedddZeejfdd ZdS)#rrVNFT)rrWrYrorrar9rWrr<rrr=rirjr,cCsp||_||_||_||_||_||_||_||_| |_| |_ | rD| ng|_ | rR| n|j |_ | |_ ||_||_dS)N)rrWrYror_cnamer9rWrr<rrmrr=rirj)r<rrWrYrorrar9rWrr<rrr=rirjr1r1r2r=HszCephContainer.__init__)rrr rrYrorr9rWrr<rrr=rirjr,cCs2|||j|||d|||f|| | | | | |||dS)Nz ceph-%s-%s.%s)rWrYrorrar9rWrr<rrr=rirj)rW)rrrr rrYrorr9rWrr<rrr=rirjr1r1r2r?is  zCephContainer.for_daemon)r,cCs|jjddS)a podman adds the current container name to the /etc/hosts file. Turns out, python's `socket.getfqdn()` differs from `hostname -f`, when we have the container names containing dots in it.: # podman run --name foo.bar.baz.com ceph/ceph /bin/bash [root@sebastians-laptop /]# cat /etc/hosts localhost ::1 localhost sebastians-laptop foo.bar.baz.com [root@sebastians-laptop /]# hostname -f sebastians-laptop [root@sebastians-laptop /]# python3 -c 'import socket; print(socket.getfqdn())' foo.bar.baz.com Fascinatingly, this doesn't happen when using dashes. rr")rr')r<r1r1r2raszCephContainer.cname)rVr,cCs ||_dS)N)r)r<rVr1r1r2rascCs|jS)N)r)r<r1r1r2rszCephContainer.old_cnamecCst|jjjddddg}t|jjtr.cSs g|]}ddjdj|gqS)z--mountz{}r)rr)rrr1r1r2rs)rDrrnrrMrrrr5rr rWrrirjr=rYrr<rrarWsumrrqrr9ro)r<cmd_argsrWvolsrkrhr1r1r2rYsV         zCephContainer.run_cmd)r+r,cCst|jjjdddg}dd|jddtg}g}g}|jrF|jd|jjrX|jd|j rl|j d d g|j r|jd |dd g7}|j rx|j D]}|j d|gqWt d d|jjDg}t dd|jDg}||j|||d|d|jg|ddS)Nr%z--rmz --ipc=hostz-ezCONTAINER_IMAGE=%sz NODE_NAME=%sz --net=hostz --no-hostsz --privilegedz--group-add=diskz--initzCEPH_USE_RANDOM_NONCE=1cSs g|]\}}dd||fgqS)z-vz%s:%sr1)rrrr1r1r2rsz+CephContainer.shell_cmd..cSs g|]}ddjdj|gqS)z--mountz{}r)rr)rrr1r1r2rsz --entrypointrr%)rDrrnrrWrr=r5Zno_hostsrr rrWrrrqrr9)r<r+rrWrrkrhr1r1r2 shell_cmds8       zCephContainer.shell_cmdcCsDt|j|}|s tdj|jt|jjjdg|j|jg|S)Nzunable to find container "{}"r$) rrrrrarDrnrr9)r<r+rar1r1r2exec_cmds zCephContainer.exec_cmd)rrWr,cCsDt|jjjddg}|r"|jd|r4|j|jn |j|j|S)Nrmz-fz --storage)rDrrnrr5rra)r<rrWryr1r1r2rX$s   zCephContainer.rm_cmd)rrer,cCsT|dkr*t|jjjd|r |jn|jg}n&t|jjjdd||rH|jn|jg}|S)Nrmz-t)rDrrnrrra)r<rreryr1r1r2rd1s  zCephContainer.stop_cmdcCs$t|j|j|j||d\}}}|S)N)r`rer)rrrYrY)r<rerrrr1r1r2r%?s zCephContainer.run)FF)FN)r/rArBryrDrrrrPr=rr?rrasetterrrYrrrXrErdrdrrr%r1r1r1r2rGsLRTB*  csXeZdZdddfdd ZddddZddd d Zeeefdd d d ZZ S) MgrListenerrN)rr,cs$||_d|_tt|j|jddS)NF)r)rrmrrr=r%)r<r)rr1r2r=IszMgrListener.__init__)r,c ,Cstjtjtj}|jdt|jjf|jd|jdt j t j j }t j |_|j|jj|jj|j|jj|j|dd}x|jsyRy|j\}}Wntjk rwYnXyt|jdj}WnHtk r}z*d|}|j|jtj|wWYdd}~XnXx|j|j} | s2Pytj | } |j!| WnFtk r}z(d|}|j|jtj|WYdd}~XnHX|jd d | kr|jj"|jj#j"|jj$j"tj%d | qWWqtk r}ztjd |WYdd}~XqXqWdS) Nz0.0.0.0<r%T)Z server_siderpz2Failed to extract length of payload from message: z-Failed to extract json payload from message: sACKr#zGot mgr message z$Mgr Listener encountered exception: )&rrrrrEr listener_port settimeoutlistensslcreate_default_contextZPurposeZ CLIENT_AUTH CERT_REQUIRED verify_modeZload_cert_chainlistener_cert_pathlistener_key_pathload_verify_locationsca_pathZ wrap_socketrmacceptrerecvrr=sendencoder-rrrhandle_json_payloadwakeup ls_gatherervolume_gathererr.) r<Z listenSocketssl_ctxZsecureListenSocketconnrlengthrerr_strZpayloadr7r1r1r2r%NsR            zMgrListener.runcCs d|_dS)NT)rm)r<r1r1r2shutdownzszMgrListener.shutdown)r7r,c Cst|d|j_d|krtjd|d}xL|D]D}||jjkr0tjj|jj |}t |}|j ||WdQRXq0W|jj |jj dS)NZcounterr#zReceived new config from mgr)rErackr-rr^rrr daemon_dirrrpull_conf_settingsr)r<r7r#rrrr1r1r2r}s     zMgrListener.handle_json_payload) r/rArBr=r%rrr rrr1r1)rr2rHs,rc@sTeZdZdZdZdZdZddddd gZd-ee e e e fd d d Z ife e e fddddZife e e fddddZe dddZe dddZe dddZddddZddddZdddd Zddd!d"Zd.eee efd#d$d%Ze e e e effdd&d'Ze e e e e fd(d)d*Zeee e e fefdd+d,ZdS)/rri2!rlFz agent.jsonrNz root_cert.pemz listener.crtz listener.keyrV)rrrcs|_|_|_d_d_d_d_tjj |j jj dj_ tjj j d_ tjj j d_tjj j d_tjj j d_tjj j d_d_d _t_t_tfd d d _tfd d d_d_dddg_d_i_dS)Ni:rVrz agent.jsonrNz root_cert.pemz listener.crtz listener.keyr%csjS)N)_get_lsr1)r<r1r2rsz'CephadmAgent.__init__..ZLscs jddS)NF)enhanced) _ceph_volumer1)r<r1r2rsZVolumeFgr)rrr starting_port target_ip target_portrrrrrZr rrrfrrrrrreventr mgr_listener AgentGathererrrdevice_enhanced_scanrecent_iteration_run_timesrecent_iteration_indexcached_ls_values)r<rrrr1)r<r2r=s.$  zCephadmAgent.__init__N)r#r,cCs(x"|jD]}||krtd|qWdS)Nz%required file missing from config: %s)r^r)r<r#rCr1r1r2rs zCephadmAgent.validatec Cs|s tdt|tst|j|xH|D]@}||jkr*tjj|j |}t |}|j ||WdQRXq*Wtjj|j d}t |}|j |j WdQRXt |j}tjj|j d}t |}|j tj|dddWdQRXtjj|jj|j}t |}|j |jWdQRXt|jddgt|jdd |jgtjd t|jdd |jgtjd t|jdd d |jgdS)NzAgent needs a configzunit.runz unit.metar')rkrrz daemon-reloadrm)rz reset-failedrcz--now)rrMrSrrr^rrrrrrunit_runr rrrnrbrrurr(rr) r<r#rrrZ unit_run_pathr rrZunit_file_pathr1r1r2rHs2       "   zCephadmAgent.deploy_daemon_unit)r,cCsdjt|j|j|jS)Nz {}.service)rrrr r)r<r1r1r2rszCephadmAgent.unit_namec Cs@tjd}tjjtjd}d|d|d|jd|jdS)NZpython3rzset -e r%z agent --fsid z --daemon-id z & ) rwhichrrrealpathrargvrr)r<Zpy3Z binary_pathr1r1r2rs zCephadmAgent.unit_runcCsdj|j|jdS)Na#generated by cephadm [Unit] Description=cephadm agent for cluster {fsid} PartOf=ceph-{fsid}.target Before=ceph-{fsid}.target [Service] Type=forking ExecStart=/bin/bash {data_dir}/unit.run Restart=on-failure RestartSec=10s [Install] WantedBy=ceph-{fsid}.target )rrZ)rrr)r<r1r1r2ruszCephadmAgent.unit_filecCsFd|_|jjr|jj|jjr.|jj|jjrB|jjdS)NT)rmrrrrr)r<r1r1r2rs     zCephadmAgent.shutdowncCs|jjdS)N)rr)r<r1r1r2rszCephadmAgent.wakeupc sJyhtjdR}tj|}|d_|d_t|d_t|d_|d_ |d}WdQRXWn8t k r}zj t d|WYdd}~XnXy&tj d}|j_WdQRXWn:t k r}zj t d |WYdd}~XnXjrjstd _|jd kr2d _jjfd ddS)NrrrZrefresh_periodrrrz4Failed to get agent target ip and port from config: zFailed to get agent keyring: FtrueTcsjjdS)N)r)rrr1)r<r1r2rsz1CephadmAgent.pull_conf_settings..)rrrloadrrrE loop_intervalrrr=rrrfrrNrrlowerr update_func)r<rr#Zuse_lsmrr1)r<r2rs.      zCephadmAgent.pull_conf_settingsc!CsB|jyfx@tdD]4}t|jtd|js:t|j|_P|jd7_qW|jsltdt|jddWn0t k r}ztd|WYdd}~XnX|j j s|j j |j j s|j j |jj s|jj tj}d|_tj|_|j|jx<|jsz/data/z Content-Typezapplication/json)context)secondszReceived mgr response: "resultz" z seconds after sending request.z Failed to send metadata to mgr: r&gcSsg|] }|r|qSr1r1)rrr1r1r2rdsz$CephadmAgent.run..r)tjdt|j}x|D]}||j|d<q W|dfS|j}d}d}t|jjt|jkrtjdi|_t|j}x|D]}||j|d<qW|dfSx|jjD]\}}|ddkr|||j|<q|d||dkrd}P|j|d ||d ks(|j|d ||d kr,d}||d |j|d <||d |j|d <d ||kr||d |j|d <qW|rtjdt|j}i|_x|D]}||j|d<qW|dfSd d |jjD}||fSdS)Nz+No cached ls output. Running full daemon lsrTFz;Change detected in state of daemons. Running full daemon lsrrrGrrrcSsg|] \}}|qSr1r1)rrrr1r1r2rsz(CephadmAgent._get_ls..) rr-rr|rrrrrq)r<rrZ ls_subsetZ need_full_lsZ state_changerrr1r1r2rsL           zCephadmAgent._get_ls)rV)F)r/rArBr Z default_portrrmr^ryrDrrEr=rrrHrrrurrrr%rPrrr rrrrr1r1r1r2rs.! J@ rcsfeZdZddeeeddfdd Zddd d Zddd d Zddd dZ eddddZ Z S)rUnnamedrrN)rr* gatherer_type initial_ackr,csV||_||_||_||_t|_d|_d|_dddg|_d|_ t t |j |j ddS)NFgr)r)rr*rrrrr7rmrrrrr=r%)r<rr*rr)rr1r2r=s zAgentGatherer.__init__)r,cCsZxR|jsTytj}|jj}d}y|j\|_}Wn>tk rt}z"tj |j d|d|_WYdd}~XnX||jks|r||_|jj tj}t j ||d}|j|j|j<|jdd|_t|jdtdd|jD}|jjt|jjt|d |jjWqtk rP}ztj |j d |WYdd}~XqXqWdS) NFz0 Gatherer encountered exception gathering data: )rr%r&gcSsg|] }|r|qSr1r1)rrr1r1r2rsz%AgentGatherer.run..rz! Gatherer encountered exception: )rmrrrrr*r7r=r-rrrrJrrrrrrrr4rrrEr)r<rrZchangerrrrr1r1r2r%s,   zAgentGatherer.runcCs d|_dS)NT)rm)r<r1r1r2r#szAgentGatherer.shutdowncCs|jjdS)N)rr)r<r1r1r2r&szAgentGatherer.wakeup)r*r,cCs ||_dS)N)r*)r<r*r1r1r2r)szAgentGatherer.update_func)rr) r/rArBr rDrEr=r%rrrrr1r1)rr2rs  rcCs<t||j|j}tjj|js0td|jd|jdS)NzAgent daemon directory z2 does not exist. Perhaps agent was never deployed?) rrrrrrdrrr%)rrr1r1r2 command_agent-srcCspddl}y|jd}Wntk r2tddSXd}tdjt|d|t|d|t|d |t|d |dS) Nrrzcephadm version UNKNOWNr%zz#cephadm version {0} ({1}) {2} ({3})CEPH_GIT_NICE_VER CEPH_GIT_VERCEPH_RELEASE_NAMECEPH_RELEASE_TYPE) importlib import_module ImportErrorprintrr)rrZvmodZ_unsetr1r1r2command_version8s   rc CsNyt||j|jWn0tk rDd}tjd|t|YnXt|S)NzOFailed to pull container image. Check that host(s) are logged into the registryz)Pulling image for `command_pull` failed: ) _pull_imagerWinsecurerr-r.rcommand_inspect_image)rrr1r1r2 command_pullNsrc stjd|dddg}|jjd|g}t|jtrV|r@|jdtjjdrV|jdd j |}xvdD]n}t ||t j d \}}|sdSdkrt tfdd|Dstd|tjd||ftj|qfWtd|dS)NzPulling container image %s...z'error creating read-write layer with IDznet/http: TLS handshake timeoutzDigest did not match, expectedpullz--tls-verify=falsez/etc/ceph/podman-auth.jsonz%--authfile=/etc/ceph/podman-auth.jsonr%r%r')rZ unauthorizedc3s|]}|kVqdS)Nr1)rpattern)r*r1r2rvsz_pull_image..zFailed command: %sz8`%s` failed transiently. Retrying. waiting %s seconds...z+Failed command: %s: maximum retries reached)r%r'r)r-rrnrrMrr5rrrr(rrrrrrr) rrWrZ ignorelistr+Zcmd_strr=rryr1)r*r2r[s,       rcCstt||jjddd|jg\}}}|r*tjSt|j|j}t||jddgj j}||d<t t j |ddd d S) Nrxz--formatz{{.ID}},{{.RepoDigests}}rPz --versionZ ceph_versionr'T)rk sort_keysr) rrnrrWrENOENTget_image_info_from_inspectrrr%rrrn)rrr*ryZ info_fromverr1r1r2rsr)rr,cCs4dddg}x$|D]}|j|rtd|SqW|S)a Normal case: >>> normalize_image_digest('ceph/ceph', 'docker.io') 'docker.io/ceph/ceph' No change: >>> normalize_image_digest('quay.ceph.io/ceph/ceph', 'docker.io') 'quay.ceph.io/ceph/ceph' >>> normalize_image_digest('docker.io/ubuntu', 'docker.io') 'docker.io/ubuntu' >>> normalize_image_digest('localhost/ceph', 'docker.io') 'localhost/ceph' z ceph/cephz ceph/daemonzceph/daemon-baser)rDEFAULT_REGISTRY)rZknown_shortnamesrWr1r1r2normalize_image_digests  rcCsV|jdd\}}|s"tdj|dt|i}|rRttt|ddjd|d<|S)Nrr%zinspect {}: empty resultrIr%Z repo_digestsr&)rrrrrrrr)rrWrIZdigestsrr1r1r2rs  r)subnetsr,cCsd}t}g}|jd}x|D]}|j}tjd|sLd}|j|dqytj|j}|j |Wqt k r}z"d}|j|dt |WYdd}~XqXqW|t |dj |fS) aDetermine whether the given string is a valid subnet :param subnets: subnet string, a single definition or comma separated list of CIDR subnets :returns: return code, IP version list of the subnets and msg describing any errors validation errors rrz\/\d+$r%z( is not in CIDR format (address/netmask)z invalid: Nz, )rrrrZr[r5 ipaddress ip_networkrKr{rrDrr)rrcversionserrors subnet_listsubnetrurr1r1r2 check_subnets"    .rcCs$|jdr |jdr |ddS|S)N[]r%r&)rr)rr1r1r2rs rc Cs:y tj|jdkrd|dSWntk r4YnX|S)Nrlrr)r ip_addressrKr)rr1r1r2r s r c CsBt|}ytj|jdkStk r<tjdj|dSXdS)Nrlz%Address: {} is not a valid IP addressF)rrrrKrr-rr)rr1r1r2rs r)ip_addrrr,cCsRdd|jdD}x8|D]0}t|r.t|n|}tj|tj|krdSqWdS)z.rTF)rrrrrr )rrr rrr1r1r2 ip_in_subnetss  r) addrv_argr,cCstjd}g}|}|ddks*|d dkr:td|dxx|dd jdD]b}|j|}|sptd|d |d}tjd d |}|dt| d}|jt|t |qNW|S)z4Parse mon-addrv param into a list of mon end points.z:(\d+)$rrr%rz--mon-addrv value z must use square bracketsrz must include port numberz^v\d+:rVr&r&) rZrRrrfindallrSrr5r9rE)rr addrv_argsaddr_argrhasportport_strbase_ipr1r1r2parse_mon_addrvs  r)mon_ipr,cCsttjd}g}|j|}|rP|d}|dt| d}|jt|t|n |jt|d|jt|d|S)z1Parse mon-ip param into a list of mon end points.z:(\d+)$rr%i i)rZrRrrr5r9rE)rrrrrrr1r1r2 parse_mon_ips  r)addrvr,cCst|dkrtdddd}g}xT|D]L}|j|krB||j}nd}tjd||j|d|jd|jq(Wdjd j|}|S) zCConvert mon end-points (ip:port) into the format: [v[1|2]:ip:port1]r$z9Detected a local mon-addrv list with more than 2 entries.Zv1Zv2)ii z+Using msgr2 protocol for unrecognized port r>z[{0}]r) rrr;r-rr5r:rr)r Z port_to_verZ addr_arg_listrrrr1r1r2build_addrv_params)s     "r!c CsXt|j}|jddsdS|jddjdjd}t|\}}}|rXtd|d|tdd |jd D}td d t |j D}d }x,|D]$} | |krd }qt j d| dqW|std|d|j rt|j |sDtd|j d|nF|jrDt|j} x2| D]*} t| j|std| jd|qWt jd||S)z/Get mon public network from configuration file.rxpublic_networkN"rzInvalid public_network z parameter: cSsg|] }|jqSr1)r)rrr1r1r2rHsz+get_public_net_from_cfg..rcSsg|] }|dqS)rr1)rrr1r1r2rIsFTzThe public CIDR network z/ (from -c conf file) is not configured locally.z#None of the public CIDR network(s) z+ (from -c conf file) is configured locally.zThe provided --mon-ip z* does not belong to any public_network(s) zThe provided --mon-addrv z- ip does not belong to any public_network(s) z1Using mon public network from configuration file )rDr#rzrrrrrrrrqr-rrr mon_addrvrr:r.) rrCr"r rerr_msgZconfigured_subnets local_subnetsZvalid_public_netnetrr r1r1r2get_public_net_from_cfg;s4     r()rmon_epsr,c Csg}xt|jD]\}}g}x(|jD]\}}|jdd|Dq(Wx|D]|}y.zMon IP `z` is in CIDR network ``z&Cannot infer CIDR network for mon IP `z` : NzHCannot infer CIDR network. Pass --skip-mon-network to configure it laterz:Inferred mon public CIDR from local network configuration r)rrqr rrrr:r5r-rrrrr.rrr) rr)Z mon_networksr'ZifacesZ local_ipsrrZmon_eprr1r1r2infer_mon_networkas"  2  r+cCsd}g}d}|jr@t|j}|r,t|j|_t|j}t|}n0|jrh|jjddk}t|j}|j}ntd|rx|D]}t ||qzWt j d|d|d}|j st |pt||}|||fS) z%Get mon public network configuration.FrVrr%z$must specify --mon-ip or --mon-addrvzBase mon IP(s) is z, mon addrv is N)rrr rr!r$rrrrr-r.Zskip_mon_networkr(r+)ripv6rr$Z end_point mon_networkr1r1r2prepare_mon_addresses}s,      r.c Csd}t|j}|j}|dkr@|jddr@|jddjdjd}|rtdd|jdD}td dt|j D}x&|D]}||kr|t j d |d q|Wt |\}}} |rt d | d |krdnd}n t jd||fS)NFrxcluster_networkr#rcSsg|] }|jqSr1)r)rrr1r1r2rsz+prepare_cluster_network..rcSsg|] }|dqS)rr1)rrr1r1r2rszThe cluster CIDR network z is not configured locally.z%Invalid --cluster-network parameter: rlTznInternal network (--cluster-network) has not been provided, OSD replication will default to the public_network)rDr#r/rzrrrrrrqr-rrrr) ripv6_cluster_networkrCr/Z cluster_netsr&r'r r r%r1r1r2prepare_cluster_networks"   r1)rrrmgr_idr,c Cs|j}tjdt||ddgdjj}t||ddgdjj}t||ddgdjj}d||||f}td|d||} t|||} |||| | fS)NzCreating initial keys...z/usr/bin/ceph-authtoolz--gen-print-key)rWrYroz[mon.] key = %s caps mon = allow * [client.admin] key = %s caps mon = allow * caps mds = allow * caps mgr = allow * caps osd = allow * [mgr.%s] key = %s caps mon = profile mgr caps mds = allow * caps osd = allow * z[client.admin] key = r)rWr-rrr%rr) rrrr2Z_imagemon_key admin_keymgr_keyrN admin_keyringbootstrap_keyringr1r1r2create_initial_keyss2   r8)rrrrmon_idmon_addrr,c Csvtjdtddd}t||jdddd|d||d g|jd id j}tjd |d |d|tj |j |||S)NzCreating initial monmap...rVrz/usr/bin/monmaptoolz--createz --clobberz--fsidz--addvz /tmp/monmapz /tmp/monmap:z)rWrYrorzmonmaptool for r%z on ) r-rrrrWrr%r.rr;r:)rrrrr9r:monmaprr1r1r2create_initial_monmaps  r<)rrrrr9bootstrap_keyring_path monmap_pathr,c Cstjdt||d|||t||jd|}t||j}t||jddd|d|ddd d d d g t ||d||d |d||d|didj } tj d|d| ||fS)NzCreating mon...rz/usr/bin/ceph-monz--mkfsz-iz--fsidz-cz /dev/nullz--monmapz /tmp/monmapz --keyringz /tmp/keyringz/var/log/ceph:zz/var/lib/ceph/mon/ceph-%s:zz/tmp/keyring:zz /tmp/monmap:z)rWrYrorz create mon.z on ) r-rrgrrZrr\rrWr r%r.) rrrrr9r=r>rQr\rr1r1r2prepare_create_mon s*  r?)rrrrr9r,c Cs6t||d|}ddi|_t||d||||ddd dS)Nr service_name)r#rN)rAr(rS)rrrrr9Zmon_cr1r1r2 create_mon/s rA)rr9rQadmin_keyring_pathrr,c sNtjdtjddg|d||d|didfdd }td |dS) NzWaiting for mon to start...z /usr/bin/cephr!z/var/lib/ceph/mon/ceph-%s:zz%/etc/ceph/ceph.client.admin.keyring:zz/etc/ceph/ceph.conf:z)rWrYrorcs8jr jnd}tjj|tjd\}}}|dkS)Nr)r`rerr)rer(rYrYrr)rerr*ry)rrr1r2is_mon_availableNs  z&wait_for_mon..is_mon_availabler)r-rrrWrA)rr9rQrBrrCr1)rrr2 wait_for_mon:s   rD) rrrrr2r5r#clifuncr,c stjdd||f}t|d|} ddi_tddtddg} jsX| jtddt|d|| ||||| d tjd fd d } td| dS) NzCreating mgr...z[mgr.%s] key = %s rr@z0.0.0.0iC$i="i )r#rNrzWaiting for mgr to start...csxjr jnd}y4dddg|tjd}tj|}|jdijddStk rr}ztjd |dSd}~XnXdS) Nrr!z-fz json-pretty)rerZmgrmaprwFzstatus failed: %s) rerrrrrr=r-r.)rerr'r)rErr1r2is_mgr_availablens   z$create_mgr..is_mgr_available) r-rrAr(r9skip_monitoring_stackr5rSrA) rrrrr2r5r#rEZ mgr_keyringZmgr_crrFr1)rErr2 create_mgrZs      rH)rcliwait_for_mgr_restartr,c !Csh|dd|jg|jrDtjdt|jjdi}|ddddg|d|jr|jrtjd t|jjd t|jjd i}|dd dd g|d|ddddg|d|ddg}t||jn|jo|j rtjdt|jjd t|j jdi}|dd dd g|d|ddddg|dn`tjd|ddg|ddg}t |j d}|j |WdQRXtjd|j t||jt }tjd|yXddd|g}|jr|jt|jn&|jrt|j}|jt|dj||Wn4tk r} ztd|| fWYdd} ~ XnXxRd0D]J} |jsHtjd!| |dd"| gntjd#| |dd"| d$gqW|jstjd%|dd"d&g|jsxbd1D]Z} tjd!| y|dd"| gWn2tk rtj |_tjd,| d-YnXqW|jrdxbd2D]Z} tjd!| y|dd"| gWn2tk r\tj |_tjd,| d-YnXqWdS)3Nrzset-userzUsing provided ssh config...z/tmp/cephadm-ssh-config:zzset-ssh-configz-iz/tmp/cephadm-ssh-config) extra_mountszUsing provided ssh keys...z/tmp/cephadm-ssh-key:zz/tmp/cephadm-ssh-key.pub:zz set-priv-keyz/tmp/cephadm-ssh-keyz set-pub-keyz/tmp/cephadm-ssh-key.pubz get-pub-keyz2Using provided ssh private key and signed cert ...z/tmp/cephadm-ssh-key-cert.pub:zzset-signed-certz/tmp/cephadm-ssh-key-cert.pubzGenerating ssh key...z generate-keyr6zWrote public SSH key to %szAdding host %s...orchrr{rzFailed to add host <%s>: %srrz.Deploying %s service with default placement...applyz!Deploying unmanaged %s service...z --unmanagedz1Deploying crash service with default placement...r ceph-exporterrr node-exporterrzFailed to apply service type zA. Perhaps the ceph version being bootstrapped does not support itrr)rr)rNrrrOr)rr)ssh_user ssh_configr-rrGrssh_private_keyssh_public_keyauthorize_ssh_keyssh_signed_certroutput_pub_ssh_keyrrrr5rr$rr:rrZorphan_initial_daemonsrGrEINVAL error_coderZwith_centralized_logging) rrIrJrTZssh_pubrrrorrrr1r1r2 prepare_ssh~s~              "      rY)rIrJr,cCs>tjd|ddddg|tjd|ddd dgdS) NzEnabling cephadm module...rmodulercrz*Setting orchestrator backend to cephadm...rLrZbackend)r-r)rIrJr1r1r2enable_cephadm_mgr_modules   r[)rrrrIrJr,c Cst|ddddt|jgtjd|ddddg||jr|jrtjd t|jjd t|jjd i}|dd d dg|d|ddd dg|dntjd|ddgtjd|jpt }t |||}dd|j d dddg}|j s|j d||t|jdidtjd|ddddg} t| } d|ko6|jsVt|} | j| g| jtjdt| |j |fdS)Nr#rrzmgr/dashboard/ssl_server_portz Enabling the dashboard module...rZrcZ dashboardz'Using provided dashboard certificate...z/tmp/dashboard.crt:zz/tmp/dashboard.key:zzset-ssl-certificatez-iz/tmp/dashboard.crt)rKzset-ssl-certificate-keyz/tmp/dashboard.keyz1Generating a dashboard self-signed certificate...zcreate-self-signed-certzCreating initial admin user...zac-user-createz/tmp/dashboard.pwZ administratorz--force-passwordz--pwd-update-requiredz/tmp/dashboard.pw:zz!Fetching dashboard port number...rr|z[Ceph Dashboard is now available at: URL: https://%s:%s/ User: %s Password: %s )rDZssl_dashboard_portr-rZ dashboard_keyZ dashboard_crtrGrZinitial_dashboard_passwordrqrZinitial_dashboard_userZdashboard_password_noupdater5rEr|rKrLrNr]) rrrrIrJrTpasswordZtmp_password_filer+rr;rRr1r1r2prepare_dashboards<            r])rrr:rWr,cCst|j}|jds|jd|jdd||jdd||jdd||jds\|jd|jdd r|jdd r|jddd|jr0tjd |jdd  r|jdd  r|jdd d |jdd  r|jdd r|jdd d|jds|jd|jdd r0|jdd r0|jddd|j r|jddd|jddd|jddd|jddd|jddd|jdddt }|j ||j }|j s|jrt||S)NrxrZmon_hostcontainer_imagerZ%auth_allow_insecure_global_id_reclaimz%auth allow insecure global id reclaimrz9Adjusting default settings to suit single-host cluster...Zosd_crush_chooseleaf_typezosd crush chooseleaf typerZosd_pool_default_sizezosd pool default sizerrZmgr_standby_moduleszmgr standby modules log_to_filer log_to_stderrrkZmon_cluster_log_to_fileZmon_cluster_log_to_stderrZmon_cluster_log_to_journald)rDr#rZ add_sectionrrzZsingle_host_defaultsr-rr_rrr registry_json registry_urlcommand_registry_login)rrr:rWrCZcpfr#r1r1r2prepare_bootstrap_configsJ         rd) rrr#r9rQr-r,rIr/r0r,c Cs|jstjd|dddd|g|d|itjd|ddd d|g|d|it|d d } | j}WdQRXtjd t|d dt|d|gn$d|kr|jr|dddd|jg|rt|j } | j ddrdnd} tjd|d| d|dd| d|g|r6tjd||dddd|g|sB| r^tjd|dddddgt|j d} | j |WdQRXtjd|j dS)Nz.Assimilating anything we can from ceph.conf...r#zassimilate-confz-iz /var/lib/ceph/mon/ceph-%s/configz/var/lib/ceph/mon/ceph-%s:zz#Generating new minimal ceph.conf...zgenerate-minimal-confz-oz/configrzRestarting the monitor...rrGrrWrrxr^r"zSetting public_network to z in z config sectionzSetting cluster_network to r/z$Enabling IPv6 (ms_bind_ipv6) bindingZ ms_bind_ipv6rr6zWrote config to %s) no_minimize_configr-rrrrrrWrDr#rz output_configr) rrr#r9rQr-r,rIr/r0rrCZ cfg_sectionr1r1r2finish_bootstrap_configSsF         rg)rr,c Cs>g}g}xB|D]:}tjd|r2|r,|j|g}q|j}|r|j|qW|rZ|j|g}x|D]}x|D]}d|kry,|jd\}}|j}|dkr|j|WnBtk r}z&dj|} tjd| d|WYdd}~XnXPdj|} tjd| qnWqdWg} x&|D]} t | } | r| j| qW| S) Nz^---\s+Z service_typer>rrz'Failed to pull service_type from spec: z . Got error: z)Failed to find service_type within spec: ) rZr[r5rrrrr-r_extract_host_info_from_spec) rZspecsZ current_specr9Z host_specsspecrr`rspec_str host_dictsr host_dictr1r1r2$_extract_host_info_from_applied_specsB       (  rm) host_specr,cCsi}x|D]}xd D]x}||kry"|jd\}}|j}|||<Wqtk r}z,dj|}tjd|d|d|WYdd}~XqXqWq Wd|krdj|}tjd|iS|S) Nr^rr>rzError trying to pull z from host spec: z . Got error: z&Could not find hostname in host spec: )r^r)rrrrr-r)rnrlr9fieldrZ field_valuerrjr1r1r2rhs     6 rh)r host_infobootstrap_hostnamer,c Cst}|jr|jj}||dkrd|kr0|d}n|d}t|dd|jddd|dd |j|fg \}}}|rtjd |d|fd Stjd |d|fd S)Nr^rZsudoz-uz ssh-copy-idz-fz-iz-o StrictHostKeyChecking=noz%s@%sz2 Copying ssh key to host %s at address %s failed! r%z&Added ssh key to host %s at address %sr)CEPH_DEFAULT_PUBKEYrSrr(rPr-rr)rrprqZssh_keyrrr*r)r1r1r2_distribute_ssh_keyss  .rs)rrrrr,cCsttdddd}|jd|dt}t|||ttjj|rtj d|d||j tjj |t ||j tjj |ttjj|jr||jtjj |tntjd|dS) z5Save cluster configuration to the per fsid directory N)rrr,cSs|rtj||dS)N)rr)rrr1r1r2 copy_filesz&save_cluster_config..copy_filerz Saving cluster configuration to z directoryz.Cannot create cluster configuration directory )rDrZrrerrrrr-rrfrroutput_keyring CEPH_KEYRINGrV CEPH_PUBKEYr)rrrrrtZconf_dirr1r1r2save_cluster_configs rxcs&tttdfdd }tt|S)z )rr,csy|Stk rYnvttfk r}zVtjt|jd||jrltjdt |dddntjd|j dWYdd}~XnXdS)Nz: a *************** Cephadm hit an issue during cluster installation. Current cluster files will be deleted automatically, to disable this behaviour do not pass the --cleanup-on-failure flag. In case of any previous broken installation user must use the following command to completely delete the broken cluster: > cephadm rm-cluster --force --zap-osds --fsid for more information please refer to https://docs.ceph.com/en/latest/cephadm/operations/#purging-a-cluster *************** F) keep_logszap_osdsa *************** Cephadm hit an issue during cluster installation. Current cluster files will NOT BE DELETED automatically to change this behaviour you can pass the --cleanup-on-failure. To remove this broken cluster manually please run: > cephadm rm-cluster --force --fsid a, in case of any previous broken installation user must use the rm-cluster command to delete the broken cluster: > cephadm rm-cluster --force --zap-osds --fsid for more information please refer to https://docs.ceph.com/en/latest/cephadm/operations/#purging-a-cluster *************** ) rKeyboardInterruptr=r-rr`r/Zcleanup_on_failurer _rm_clusterr)rr)r*r1r2 _rollback s  zrollback.._rollback)rryr rr!)r*r}r1)r*r2rollbacksr~c$:sHd_jstjjjt_js6tjjjt_j sNtjjjt _ t j t j k r~t j t jk r~tdt j rt jrtdjrtjjjj}tjj|rtdjdn tjdjpt_j}t|std|xjjj gD]}js>tjj|r>td|tjj|}|rtjj| rtjj|}tjd |d |ytj|d Wn&tk rtd |d YnXqWt\}}jdkrt j!st"n tjdtjd|t#}d|kr>j$ r>td||j%ddfj&pJt'} j(pXt)} t*|} | j+t,\} } }t-\}}t.|| j/}j0syt1j/Wn8t2k rd}tj3d|d|t|YnXt4j/ddgj5j6}tjd|j7sJ|j%d}|t8t9gkrJtd|dt8dtjdt:\}}t;||| \}}}}t<|||| | }t=|||| |j>|j>\}t?|d||fd}|j@|WdQRXtA|||tB|||| tC|||itDtEjFffd d! tG| |j>j>tH||| ||| || t?j}|j@d"|d#WdQRXtjd$jtI|||| |||rtJjKdd%.}|j@|jLd&d'd(d)d*g|j>d+iWdQRXdd,fd-d. }tM|jNs tO|jPrJjQrJjRrJjPjQjRd/}d0d1d2tSjT|gd'd1d3d4tUjVd5gjWs|tX|||jtYkrjZ rj[ rtjd6y,d7d8d1d9d:gd7d;dgWn t\k rtjd?YnXj]rtjd@j]t^j]>}t_|} x.| D]&}!jr>tjdAn t`|!|q&WWdQRXi}"dB|"taj]<y"d7dCd)dDg|"dE}#tj|#Wn0t\k rtbjc _tjdFj]YnXtd|||tjdGd'd1dHdIdJgd0d1dKdLgtjdMtejfd|jjftjdNtejfdtjdOtjdPjS)QNrz--ssh-private-key must be passed with either --ssh-public-key in the case of standard pubkey authentication or with --ssh-signed-cert in the case of CA signed signed keys or not provided at all.a--ssh-public-key and --ssh-signed-cert are mututally exclusive. --ssh-public-key is intended for standard pubkey encryption where the public key is set as an authorized key on cluster hosts. --ssh-signed-cert is intended for the CA signed keys use case where cluster hosts are configured to trust a CA pub key and authentication during SSH is done by authenticating the signed cert, requiring no public key to be installed on the cluster hosts.zA cluster with the same fsid 'z' already exists.zkSpecifying an fsid for your cluster offers no advantages and may increase the likelihood of fsid conflicts.znot an fsid: %sz@%s already exists; delete or pass --allow-overwrite to overwritezCreating directory z for izUnable to create zS due to permissions failure. Retry with root, or sudo or preallocate the directory.rootzSkip prepare_hostzCluster fsid: %srz~hostname is a fully qualified domain name (%s); either fix (e.g., "sudo hostname %s" or similar) or pass --allow-fqdn-hostnamezFailed to pull container image. Check that correct registry credentials are provided in bootstrap by --registry-url, --registry-username, --registry-password, or supply --registry-json with credentialszPulling image for bootstrap on z failed: rPz --versionzCeph version: r'zContainer release z != cephadm release z]; please use matching version of cephadm (pass --allow-mismatched-release to continue anyway)z4Extracting ceph user uid/gid from container image...z/config)r3csZdjdjdi}x|jD]\}}|||<qW|p:j}tjd||dj||dS)Nz/var/log/ceph:zz%/etc/ceph/ceph.client.admin.keyring:zz/etc/ceph/ceph.conf:zz /usr/bin/ceph)rWrYror)rer)rrqrerrWr%)r+rKrerrTrtru)r6rr\rPr1r2rIs   zcommand_bootstrap..cliz[client.admin] key = rzWrote keyring to %s) bufferingzutf-8r#zassimilate-confz-iz/var/lib/ceph/user.confz/var/lib/ceph/user.conf:z)r,c snytfdd}Wn$tk r8tfdd}YnX|dtjdfdd}td|dS) NcsddgtjdS)NrrB)r)rrr1)rIr1r2rszAcommand_bootstrap..wait_for_mgr_restart..csddgtjdS)Nrr)r)rrr1)rIr1r2rsepochz!Waiting for the mgr to restart...csXy$dddg}tj|}|dkStk rR}ztjd|dSd}~XnXdS)NtellrZ mgr_statusZ mgrmap_epochztell mgr mgr_status failed: %sF)rrr=r-r.)rr'r)rIrr1r2mgr_has_latest_epochs  zMcommand_bootstrap..wait_for_mgr_restart..mgr_has_latest_epochz mgr epoch %d)r>r=r-rrA)r'r)rIr)rr2rJs  z/command_bootstrap..wait_for_mgr_restart)rusernamer\z config-keyrz mgr/cephadm/registry_credentialsrzmgr/cephadm/container_initz--forcezBEnabling client.admin keyring and conf on hosts with "admin" labelrLzclient-keyringz client.adminz label:_adminrlabelr{Z_adminz>Unable to set up "admin" label; assuming older version of CephzApplying %s to clusterzHKey distribution is not supported for signed CA key setups. Skipping ...z/tmp/spec.yml:rorMz /tmp/spec.yml)rKz Applying %s to cluster failed! z'Enabling autotune for osd_memory_targetrZosd_memory_target_autotunerzmgr/dashboard/cluster/statusZ INSTALLEDzYou can access the Ceph CLI as following in case of multi-cluster or non-default config: sudo %s shell --fsid %s -c %s -k %s zLOr, if you are only running a single cluster on this host: sudo %s shell zPlease consider enabling telemetry to help improve Ceph: ceph telemetry on For more information see: https://docs.ceph.com/en/latest/mgr/telemetry/ zBootstrap complete.)grXrfrrrZ output_dirrrurvrVrwrPrRrSrUrrrZrrr-rrurZallow_overwritedirnamerrrePermissionErrorr/rPcheck_ssh_connectivityZskip_prepare_hostcommand_prepare_hostrZallow_fqdn_hostnamerr9r[r2rnrrr.r1rdrW skip_pullrrr.rr%rZallow_mismatched_releaseDEFAULT_IMAGE_RELEASErrr8r<r?rrrrrArrdrrrDrgrHrrrr[Zskip_sshrYrbregistry_usernameregistry_passwordrrnrDrmZskip_dashboardr]rVZskip_admin_labelrer=Z apply_specrrmrsrGrrWrxrr)$rrrrrrCZ user_confrr^r9r2rrr,r-r/r0r#rZ image_verZ image_releaserrr3r5r4r7r;rQtmprJZregistry_credentialsrkrrTrr1)r6rIrr\rPr2command_bootstrap*s                                 rcCs|jrtjd|jt|j}|jdrx|jdrx|jdrx|jd|_|jd|_|jd|_t||j|j|jqt dn0|jr|jr|jrt||j|j|jnt ddS)Nz+Pulling custom registry login info from %s.rrr\zjson provided for custom registry login did not include all necessary fields. Please setup json file as { "url": "REGISTRY_URL", "username": "REGISTRY_USERNAME", "password": "REGISTRY_PASSWORD" } zInvalid custom registry arguments received. To login to a custom registry include --registry-url, --registry-username and --registry-password options or --registry-json optionr) rar-rr"rrbrrregistry_loginr)rrr1r1r2rc1s     rc)rrrr\r,c CstjdyX|j}|jdd|d||g}t|tr:|jdt||\}}}t|tr`tj dt Wn(t k rt d|j |jfYnXdS)NzLogging into custom registry.Zloginz-uz-pz%--authfile=/etc/ceph/podman-auth.jsonz/etc/ceph/podman-auth.jsonzAFailed to login to custom registry @ %s as %s with given password)r-rrnrrMrr5rrro DEFAULT_MODEr=rrbr)rrrr\rr+rrr1r1r2rKs    rcCs|dkrt|dd\}}n|dkr,d\}}nz|dkrFt|dd\}}n`|dkr`t|d d\}}nF|d krzt|d d\}}n,|d krt|d dgd\}}ntdj|||fS)Nrz/etc/prometheus)rz node-exporterrz/var/lib/grafanarz /etc/lokirz /etc/promtailrz/etc/alertmanagerz{} not implemented yet)rr)rrr)rr rrr1r1r2r>]s r>c st|||||||}d|kr0|jr0|jj|jd|krL|jrL|jj|jt|}|rddg} x\|D]Ttfdd| Drfd} tj j |j |d|d|tj j | } | |j | <qfW|S) Nextra_container_argsextra_entrypoint_argsrrrc3s|]}|kVqdS)Nr1)rrt)r}r1r2rsz+get_deployment_container..rr)rArr9r rrorrrrrrZrr) rrr rrr<r9rrrrrrr1)r}r2rtqs&  rt)rr rr,cCstj}|jrtj}t|j||}t||\}}}|dksRt|tj ||j||drb|tjkrbtj }t j |j d|jd|S)Nrrz daemon z ...)rQrRZreconfigrTrrrrrr?rSr-rrr)rr rrFrrrr1r1r2get_deployment_types" rcCs t|dS)N)_common_deploy)rr1r1r2command_deploysrc Cs^d}d|kr|jr|j}|dkr.tjtj}n t|d}tj|}WdQRXtjd||S)z:Read a JSON configuration based on the `ctx.source` value.r"rrbNzLoaded deploy configuration: %r)rrrrr&rr-r.)rr config_datar@r1r1r2read_configuration_sources  r)rrr,cCs|d|_|jdd}|r ||_d|kr2|d|_d|krD|d|_d|krV|d|_t}t||j|x@|jdij D],\}}||j krt j d||t |||qWt|t jd |jd S) zBind properties taken from the config_data dictionary to our ctx, similar to how cli options on `deploy` are bound to the context. rrWrVrr r)paramszunexpected parameter: %r=%rzDetermined image: %rN)rrrWrr(r)ArgumentFacade_add_deploy_parser_argsrMrqdefaultsr-rrsrr.)rrrWZfacaderrr1r1r2apply_deploy_config_to_ctxs&       rcCst|}t||t|dS)zThe deploy-from command is similar to deploy but sources nearly all configuration parameters from an input JSON configuration file. N)rrr)rrr1r1r2command_deploy_froms rcCsp|jjdd\}}|tkr(td|t||j}|jt|||}t||jt |}t |||||dS)Nrr%zdaemon type %s not recognized) rrrrrrrrrr_dispatch_deploy)rr rrrFrr1r1r2rs     r)rr rdaemon_endpointsrFr,cs|tjkrt|\}t|\}}t||j||t|}t||j|||jd} |dkr|dk rd|kr|d} | j d| g| _ t ||j||| ||||j ||d n:|t j krt|t j |jdt} t j |jdt} | r& s tfdd | D r&td j|jd j| | rh sNtfd d | D rhtd j|jd j| t||\}}t||j||} t ||j||| ||||d n<|tjkr.|tjkr| rttjj} dd| D}t|\}t|\}}t||j||} t ||j||| |||||d n|tjkrt|\}t|\}}t||j||} t ||j||| |||||d n\|tjkrt|\}d\}}t||j||} t ||j||| |||||d n|tj krd\}}t||j||} t ||j||| ||||d n|t jkrvt j!||j|}|j"\}}t||j||} t ||j||| ||||d nl|t#jkrt#j!||j|}|j$\}}t||j||} t ||j||| ||||d n|t%jkrLt%j!||j|}|tjkr |j&dd|j'Dt||j|||j(|jd} t ||j||| |j)|j*dd||d n|t+jkrt,j-}t,j.}t ||j||d||||d n\|t/jkrt/j!||j|}t||j||} t ||j||| |j)|j*||d ntdj|dS)N)r<rZcrush_locationz--set-crush-location)r#rNrErFrzconfig-json-fileszconfig-json-argsc3s"|]}|jdijkVqdS)rLN)rr)rr)r#r1r2rsz#_dispatch_deploy..zI{} deployment requires config-json which must contain file content for {}z, c3s|]}|jkVqdS)N)r)rr)r#r1r2rsz@{} deployment requires config-json which must contain arg for {})rFrcSsg|]}td|qS)z0.0.0.0)r9)rrFr1r1r2r*sz$_dispatch_deploy..)r#rNrFrcSsg|]}td|qS)z0.0.0.0)r9)rrFr1r1r2r_s)rr<)rrr#rNrFrz9daemon type {} not implemented in command_deploy function)rr)rr)0rrr/rrrrrtZ allow_ptracerorSrErr2rrrrr capitalizerr>rFr rQrRr+rrirwrrArrrrrrr rrrrrrgetuidgetgidr)rr rrrFrNrrrrZc_locr^Z required_argsZ nfs_portsrrrrr1)r#r2rs       ""                              rcCs8|jjdd\}}t||j||}|j}t|||jS)Nrr%)rrrArrYr;re)rr rrr(r1r1r2 command_runsrc Cst|j}|jddr0|jdd|jkr0td|jr`d|jkrT|jjdd\}}qh|j}d}nd}d}|jr|tj krt ||j|r|j rtd|j s|j d|jdt dt}tjj|r||_ ntjjtrt|_ d g}t||j|||jrd nd d }t||j||}|jr.d |t|j<|j rDd|t|j <|jrx|jD]x}|jd} t| d} tjj| d} t| dkr| d} t| dkrdj| | d} | || <ndj| || <qTW|jr|j} ndg} |dddddtg7}|jrtjj|j |jd}tjj|stjd|t|dddtjjdrx@tj dD]2}|j!dr\t"j#tjjd|tjj||q\Wd||<x(|j$D]}|jdd\}}|||<qWt%||j&d g||||j'd d! }|j(| } |j)rt*d"jd#d$| DdSt+|| |j,S)%Nrxrzfsid does not match ceph.confrr%rz#must pass --fsid to specify clusterrz-iTF)r7z/etc/ceph/ceph.conf:zz/etc/ceph/ceph.keyring:zr>rr&z{}:{}r$z/mnt/{}rz-tz-ezLANG=CzPS1=%shomezCreating root home at %siz /etc/skelz.bashz/root doesnotmatter)rWrYror9rrrWrr%css|]}tj|VqdS)N)rTrU)rargr1r1r2rsz command_shell..)-rDr#rzrrrrrrrrrNrZrrvrrrCEPH_DEFAULT_KEYRINGrUrmrGrqrrrr( CUSTOM_PS1rr-r.rerrrrrrrWrhrdry_runrr;re)rrCr rZ keyring_filer9rTrkZ_mountZ split_src_dstrqrrr(rrrarVbrr1r1r2 command_shells               rc Cs|jstd|jjdd\}}dg}|jr4|j}ndg}|ddddd tg7}t||jd |d |j||fd }|j|}t |||j S) Nz#must pass --fsid to specify clusterrr%z-ishz-tz-ezLANG=CzPS1=%srz ceph-%s-%s.%s)rWrYr9ra) rrrrr(rrrWrr;re)rr rr9r(rr1r1r2 command_enters& rcCst|j}|jddr0|jdd|jkr0td|jrVt||jt||j}|jd \}}t ||jdd}d}d}t |\}} |rt |||}d||j <| rt | ||}d||j <t ||j|j|d} t|| jtjd \} } } | st| dS) Nrxrzfsid does not match ceph.confrrz/etc/ceph/ceph.conf:zz*/var/lib/ceph/bootstrap-osd/ceph.keyring:z)rWror)r)rr)rDr#rzrrrrrrrUr/rrr8rhr(rrYrrr)rrCrrrrTrPrOr#rNrrr*r)r1r1r2rs6         rcCsB|jstdt||j|j}t|d|j|gtjdd\}}}|S)Nz#must pass --fsid to specify clusterrrV)rr`)rrrrr(r(rr)rrrr)r1r1r2 command_unitAs rcCst|jstdt||j|j}tdg}|jd|g|jrH|j|jtjddj |t j |t j jddS)Nz#must pass --fsid to specify clusterZ journalctlz-uzRunning command: %sr%)rh)rrrrrr r(r-r.rr-r(rr/r)rrr+r1r1r2 command_logsTs  rcCst|}|jt||S)N)_list_ipv4_networksr6_list_ipv6_networks)rresr1r1r2ris rcCs8td}|stdt||ddgtjd\}}}t|S)Nr:zunable to find 'ip' commandrouter)r)rr.rrr_parse_ipv4_route)rexecstrrrr1r1r2rws r)rr,cCsi}tjd}x|jD]}|j|}|s,q|dd}d|krH|d7}|dd}|dd}||krpi||<|||krt|||<|||j|qW|S)Nz;^(\S+) (?:via \S+)? ?dev (\S+) (.*)scope link (.*)src (\S+)rrz/32r%r')rZrRr8rrr{)rrrFr9mr'ifacer:r1r1r2rs"      rcCsZtd}|stdt||dddgtjd\}}}t||dddgtjd\}}}t||S)Nr:zunable to find 'ip' commandz-6rr)rr)rr.rrr_parse_ipv6_route)rrroutesripsr1r1r2rs r)rrr,c sZi}tjd}tjd}tjd}x|jD]}|j|}| s,|ddjdkrVq,|dd}d|krr|d7}|dd} | d krq,||kri||<| ||kr,t||| <q,Wd} x|jD]}|j|}|s|j|}|r|ddjd d} q|ddfd d |jD}|r| ||dkr| s.) rZrRr8rrrrrrr{) rrrZroute_pZip_pZiface_pr9rr'rr1)r:r2rs@           rcCs0t|}ttddd}ttj|d|ddS)N)objr,cSst|trt|S|S)N)rMrr)rr1r1r2serialize_setssz-command_list_networks..serialize_setsr')rkr)rr rrrn)rrrr1r1r2command_list_networkssrcCs*t||j |jd}ttj|dddS)N)ryrr')rk)r|Z no_detailrrrrn)rrr1r1r2 command_lss  r)rur,cCs|jdr|dd }n|jdr.|dd }d}|d jdkrTd}|dd }nd|djdkrvd}|dd}nB|djdkrd}|dd}n |djd krd}|dd}tt||S)NZiBr$Br%KiMGTr&r&r&r&ir&r&ii@r&r&ii@lr&)rupperrEr)ruZmultr1r1r2with_units_to_ints$    rc&"Csd}g}|jj}|j}|dk r.tjj||}i}i}i} i} t||ddddgtjd\} } } t| | \}} t||ddddgtjd\} } } t | | \}} tjj |rxntj |D]^}|dCkr|}xHtj tjj ||D]}d |krq|j d d \}}t|||||d }d||f}dd||f|dk r8|nd|d}|rt||\|d<|d<}|sy@t|ddgtjd\} } } | r| jdr| j dd}Wntk rYnX||d<|j|qWqt|rt|}x,tj tjj ||D]}d|krtjjtjj |||r|}|j dd \}}t|||}nqd|||d}|rt||\|d<|d<}d}d}d}d}d}d} t|||||\} } } | s| jj d\}}}}!}t|}|j dd d}t|!} |j|}|sJt||dd |dd!gtjd\} } } | sJtttt| jd dDj d}|||<| s\d|krh|j|d}|t j!krt j"||}|t#j!krt#j"||}|t$j!krt$j"||}q|s|t%j&krt||d"|ddgtjd\} } } | r| jdr| j dd}|||<q|d#krlt||d"|d$dgtjd\} } } | r| jd%r| j dd }|||<q|dEkrt'j"|||}|||<q|d+krt||d"|d+dgtjd\} } } | r| jd,s| jd-r| j dd}|||<n|d.krbt||d"|d.d/gtjd\} } } | r| jd0r| j dd }|dd1krX|d d}|||<n@|t(j!krpn2|t)j!krt)j"|||}|||<nt*j+d2|nVtjj |||d3}"y,t,|"d4}#|#j-jpd}WdQRXWnt.k rYnXtjj |||d5}$y2t,|$d4}#t/j0|#j-}%|j1|%WdQRXWnt.k rTYnX||d6<||d7<||d8<||d9<|r| j|d||d:<| j|d||d;<||d<<| |d=<t2tjj |||d>|d?<t2tjj |||d3|d@<t2tjj |||dA|dB<|j|qWqW|S)FNrz--formatz{{.ID}},{{.MemUsage}}z --no-stream)rz{{.ID}},{{.CPUPerc}}rrrrr"r%)rz ceph-%s@%srz%s.%sr)rrrrrrrPz-vz ceph version r%r$ host_versionrz cephadm:v1rrrWrxz{{.RepoDigests}}r$rzgrafana-serverzVersion rr node-exporterrrrzHA-Proxy version zHAProxy version rz --versionz Keepalived ruz"version for unknown daemon type %sz unit.imagerz unit.metarGZcontainer_image_nameZcontainer_image_idZcontainer_image_digestsrZcpu_percentagerKstartedz unit.createdcreatedZdeployedzunit.configuredZ configured)rrrrr&)rrrrr)3rnrrZrr7r(rrr_parse_cpu_percrrrrrrrr=r5rrDrdrrrrrrUrrrrrrFr rrirwrrrrrr-rrrrrrr6rP)&rryrrrrrZZ seen_versionsZ seen_digestsr seen_cpupercrr*r)rseen_cpuperc_cid_lenrar r'rOrrrrVrrrrGrHrIZ image_digestsrKZ start_stamprJZvfilerZmfiler r1r1r2r|sh     $                            r|)r)rr,c Csi}d}|s|xn|jD]b}|jd\}}|jd\}}yt|||<|sPt|}Wqtk rvtjdj|YqXqW||fS)Nrrz / z%unable to parse memory usage line >{})r8rrrrr-rr) r)rrrr9cidusageusedlimitr1r1r2rs   rc Csri}d}|sjx\|jD]P}|jd\}}y|||<|s>t|}Wqtk rdtjdj|YqXqW||fS)Nrrz'unable to parse cpu percentage line >{})r8rrrr-rr)r)rrrr9rZcpupercr1r1r2rs  rcCsHx4t|||dD]"}|d|kr"q|d|kr0q|SWtdj|dS)N)ryrrrz&Daemon not found: {}. See `cephadm ls`)r|rr)rrrryrrr1r1r2rs  r)rrrr rr,c Csftj||||d}d \}}}x>|j|jfD].} |ddd| g} t|| tjd\}}}|s*Pq*W|||fS) NrrVr%rxz--formatz\{{.Id}},{{.Config.Image}},{{.Image}},{{.Created}},{{index .Config.Labels "io.ceph.version"}})rr&)rVrVr&)rr?rarr(rr) rrrr rrrr*r)rr+r1r1r2rs rcCs(|jsHyt||jWn0tk rFd}tjd|t|YnX|jjdd\}}|j dkrrtd|j t ||j |||j d}|stdt ||}|j|tjkrt||||nd|d krt|||nN|d krt|||n8|d krtd n$|d krt|||n td|dS)NaFailed to pull container image. Host may not be logged into container registry. Try `cephadm registry-login --registry-url --registry-username --registry-password ` or supply login info via a json file with `cephadm registry-login --registry-json `z*Pulling image for `command_adopt` failed: rr%rz$adoption of style %s not implemented)rz3could not detect legacy fsid; set fsid in ceph.confrrz node-exporterz)adoption of node-exporter not implementedrzdaemon type %s not recognized)rrrWrr-r.rrrrrrOrrrrrcommand_adopt_cephcommand_adopt_prometheuscommand_adopt_grafanacommand_adopt_alertmanager)rrr rrrr1r1r2 command_adopts<       rc@s:eZdZddZddZddZddZd d d d Zd S) AdoptOsdcCs||_||_||_dS)N)r osd_data_dirosd_id)r<rrrr1r1r2r=4szAdoptOsd.__init__cCsd\}}tjj|jd}y4t|d}|jj}WdQRXtjd|Wn"t k rntjd|YnXtjj tjj|jdrttjj|jd}|jj}WdQRXn tjd||fS)NrrzFound online OSD at %szUnable to read OSD fsid from %sr`z$"type" file missing for OSD data dir)NN) rrrrrrrr-rrr)r<rEosd_typerrr1r1r2check_online_osd:s  zAdoptOsd.check_online_osdc Csd\}}t|jdddgd}t|j|j\}}}|sytj|}|j|krtjdj |j||jddd}x>||jD]0}|dd d krd }P|dd d krd }PqWWn2t k r} ztjdj | WYdd} ~ XnX||fS)Nrjrz --format=json)rozFound offline LVM OSD {}rZtagsz ceph.osd_fsidz ceph.typergZ bluestorer7 filestorez(Invalid JSON in ceph-volume lvm list: {})NN) r8rrrYrrrr-rrr) r<rErrrr*r)r!devicerr1r1r2check_offline_lvm_osdNs*   "zAdoptOsd.check_offline_lvm_osdcCsd \}}tdj|j}t|dkrt|dd}yXtj|j}tj dj|j|d}|d}|dkrt |j d |d d |j gWn4t k r}ztj d j||WYdd}~XnXWdQRX||fS)Nz /etc/ceph/osd/{}-[a-f0-9-]*.jsonr%rrzFound offline simple OSD {}rr`rrqr7rzInvalid JSON in {}: {})NN)rrrrrrrrr-rrrrr)r<rErZosd_filerr!rr1r1r2check_offline_simple_osdis  .z!AdoptOsd.check_offline_simple_osdN)r,c Cstjdt|jdddj|jdgd}t|j|j\}}}|rVtd|d|yt j |}|svt d |jd}x$||jD]}|d d kr|}PqW|st d |j|d }t|jddd|jj |g\}}}|rt d|jj d|jd|t|jddd|g\}}}|r}tjj||}tjj|rtjd$|tj|||qWtjjd%d&||f}tjj|r<|d'}tjd||tj||tjd(t|d dd)||fgn"tjd*t|d dd+||fgd,|j}tjj|j|}tjj|d-}t ||g|||dtjd.d/|j||f}tjj|j|}t!||||d}t|t||||dtjd0t"||||t#||||}t$|||||||d1| d kp4|j%|d2 t&||dS)4Nz/var/lib/ceph/%s/%s-%sz]{}.{} data directory '{}' does not exist. Incorrect ID specified, or daemon already adopted?rzUnable to find OSD {}rPzobjectstore_type is %srz%FileStore is not supported by cephadmz ceph-%s@%srzStopping old systemd unit %s...rrmz Disabling old systemd unit %s...rzMoving data...)rr*zRemove dir `%s`ZumountzChowning content...rz-cz-Rz%d.%drzstore.dbrz.ldbz.sstzRenaming %s -> %sz'Renamed %d leveldb *.ldb files to *.sstrgblock.db block.walzChowning %s...z /etc/ceph/osdz %s-%s.jsonz.adopted-by-cephadmz/Disabling host unit ceph-volume@ simple unit...z ceph-volume@simple-%s-%s.servicez,Disabling host unit ceph-volume@ lvm unit...zceph-volume@lvm-%s-%s.servicez/etc/ceph/%s.confr#zMoving logs...z/var/log/ceph/%s-%s.%s.log*zCreating new units...T)rcrJrE)rgrr)'rrOrrr7rrrrrrrrrr-rrrrrrrrr.ismountr_rrr'r?rr(rrrrArIZ force_startrJ)rr rrrrZ data_dir_srcrEZ adopt_osdrrrrr data_dir_dststoreZ num_renamedZoldfZnewfZoldpZnewprrFrsZnew_fn config_src config_dstZ log_dir_srcZ log_dir_dstrr1r1r2rs                             rc Cs d}t||\}}tjd}dd|D}t|dt||||||d}d} tjj|j| } tjj |d} t | ||dt || g| ||dd} tjj|j| } tjj |d } t || g| ||dt ||||t||||} t||||| ||tj|d t||dS) NrcSsg|]}td|qS)z0.0.0.0)r9)rrFr1r1r2rsz,command_adopt_prometheus..)rrz/etc/prometheus/prometheus.ymlzetc/prometheusiz/var/lib/prometheus/metrics/r7)rFr)r>rr+_stop_and_disablerrrr7rrrerrrrArSrQrSrJ)rrrr rrrrrrrdata_srcdata_dstrr1r1r2rs*     rc Csd}t||\}}tjd}dd|D}t|dt||||||d}d} tjj|j| } tjj |d} t | ||dt || g| ||dd } tjj|j| } tjj |d} t || g| ||dd } d }tjj | otjj |rd }tjj|j|}t tjj |d ||dtjj |d }t ||g|||dd }tjj|j|}tjj |d}t ||g|||dttjj | dntjdj| |d}tjj|j|}tjj |d}t ||g|||dt||||t||||}t|||||||tj|d t||dS)NrcSsg|]}td|qS)z0.0.0.0)r9)rrFr1r1r2rBsz)command_adopt_grafana..zgrafana-server)rrz/etc/grafana/grafana.iniz etc/grafanaiz/etc/grafana/provisioning/z/etc/grafana/grafana.crtz/etc/grafana/grafana.keyzetc/grafana/certszetc/grafana/certs/cert_filezetc/grafana/certs/cert_keyz grafana.iniz'Skipping ssl, missing cert {} or key {}z/var/lib/grafana/r7)rFr)r>rr+rrrrr7rrrerrr_adjust_grafana_inir-r.rrrArSrQrSrJ)rrrr rrrrrrrZprov_srcZprov_dstZcertrZcert_srcZcert_dstZkey_srcZkey_dstrrrr1r1r2r:sN     rc Cs d}t||\}}tjd}dd|D}t|dt||||||d}d} tjj|j| } tjj |d} t | ||dt || g| ||dd } tjj|j| } tjj |d } t || g| ||dt ||||t||||} t||||| ||tj|d t||dS) NrcSsg|]}td|qS)z0.0.0.0)r9)rrFr1r1r2r}sz.command_adopt_alertmanager..zprometheus-alertmanager)rrz /etc/prometheus/alertmanager.ymlzetc/alertmanageriz!/var/lib/prometheus/alertmanager/zetc/alertmanager/data)rFr)r>rr+rrrrr7rrrerrrrArSrQrSrJ)rrrr rrrrrrrrrrr1r1r2rus*     rcCsyt|d}|j}WdQRXt|dd^}d}xR|D]J}|jdrJd}|jdrXd}|rxtjdd|}tjd d |}|j|q8WWdQRXWn2tk r}ztd j ||WYdd}~XnXdS) Nr)r4Frz[server]Tz ^cert_file.*z(cert_file = /etc/grafana/certs/cert_filez ^cert_key.*z&cert_key = /etc/grafana/certs/cert_keyzCannot update {}: {}) r readlinesrrrZrSrrrr)rZ grafana_inir~Zserver_sectionr9r*r1r1r2rs$    rcCs\t||\}}}|dkr6tjd|t|dd|g|rXtjd|t|dd|gdS)NrzStopping old systemd unit %s...rrmz Disabling old systemd unit %s...r)rr-rr)rrrrrr1r1r2rsrc Cst||j}|j|jjdd\}}t||j|j}|dkrN|j rNtdt|dd|gt j dt|dd |gt j dt|dd |gt j d|dkrd |jd |jd}t|dd|gt j dt |j|j ||}|dko|j rJtjj|j |jd}tjj|st|ddtd||tjjjtf}tj|tjj||nt|dd|gt|} dd| D} | ryt|} | j| | jWn4tk r} ztj d| WYdd} ~ XnXdS)Nrr%rrzEmust pass --force to proceed: this command may destroy precious data!rrm)rz reset-failedrrz/var/run/ceph/z /ceph-client.z.*.asokrz-rfrZremovedrz%s.%s_%scSsg|] }|jqSr1)r;)rrr1r1r2rsz%command_rm_daemon..z# Error when trying to close ports: )rr)r)rrr)!rrrrrrrrr(rrrrZZforce_delete_datarrrrrerrJZutcnowrMrNr?rrrKr{rNrr-r) rrr rrZ rgw_asok_pathrZZ backup_dirrrrrRrr1r1r2command_rm_daemonsH            r)rr?r,cCsTt||jdd}t|ddd|g||jd}tjd|dt||j\}}}dS)Nzclusterless-ceph-volumerjZzapz --destroy)rorrWzZapping z...)rUrr8rhr-rrrY)rr?rTrrr*r)r1r1r2_zaps  rc stjdd}tdddg|jd}t|j\}}}|rHtdytj|}Wn0t k r}ztd|WYdd}~XnXxx|D]pfdd j d gD}t |rt |rt j d qt |rd d j d gD} tjd | qWdS)Nzclusterless-ceph-volumeZ inventoryz--formatr)rorrWzfailed to list osd inventoryz'Invalid JSON in ceph-volume inventory: cs&g|]}|jdjko jdqS)Z cluster_fsidZ ceph_device)rr)rlv)rrar1r2rsz_zap_osds..ZlvsrcSsg|] }|dqS)rr1)rrr1r1r2rsz#Not zapping LVs (not implemented): )rUrr8rhrrYrrrrrrrrr-r) rrTrrr*r)rrmatchesZlv_namesr1)rrar2 _zap_osdss&   rcCs.|jstdt||j}|jt|dS)NzEmust pass --force to proceed: this command may destroy precious data!)rrrrrr)rrr1r1r2command_zap_osdss  rcCstddtj|jDS)NcSsg|]}t|r|qSr1)r)rrr1r1r2r+sz*get_ceph_cluster_count..)rrrrZ)rr1r1r2get_ceph_cluster_count*srcCs6|jstdt||j}|jt||j|jdS)NzEmust pass --force to proceed: this command may destroy precious data!)rrrrrr|ryrz)rrr1r1r2command_rm_cluster.s  r)rryrzr,c s8jstdtddfdd }tjdjxFtddD]6}|djkrVqB|d d krdqB|tj|d qBWxd jgD] }||qWd jjjdd}t dd|gt j d|rt t ddjdjgt ddjdjgt ddjdjgt ddjdjg|s~t ddjdjgt ddjdjgt ddjdjgtdkr|dt ddjdgt ddjdgt ddjd g|sx$tjd!D]}tj|q Wtjtd"g}x6|D].} x&| jd#jd$D]} | jqPWq6Wd%jd&jd'jg} x| D]} tj| d(d)qWtttg} tjj| dr4d}t | d}j|j!krd(}WdQRX|r4x8t"dt#| D]&}tjj| |r tj| |q WdS)*Nz>must select the cluster to delete by passing --fsid to proceed)rr,csFtdd|gtjdtdd|gtjdtdd|gtjddS)Nrrm)rz reset-failedr)r(rr)r)rr1r2disable_systemd_service=s      z,_rm_cluster..disable_systemd_servicezDeleting cluster with fsid: F)ryrrz cephadm:v1rzceph-%s.targetzsystem-ceph\x2d{}.slicer"z\x2drrm)rrz-fz/ceph-%s@.servicez/ceph-%s.targetz-rfz/ceph-%s.target.wantsrz/*.wants/ceph-%s@*z/ceph-%srz ceph.targetz$/multi-user.target.wants/ceph.targetz /ceph.targetz/cephadmz /cephadm.log*z/usr/lib/sysctl.dz90-ceph-z-*.confz /run/ceph/z /tmp/cephadm-z/var/run/ceph/T)r)$rrrDr-rr|rrr'r(rrrrrbrZr\r^rrrremover r`r>rrrVrrrrrrrrmr)rryrzrrrrarCZ sysctl_dirsr`rFZ ceph_dirsddrLZ valid_fsidrrr1)rr2r|8sl        "  r|cCs4dddddddg}t|||s0tjd|d Sd S) Nzchrony.servicezchronyd.servicezsystemd-timesyncd.servicez ntpd.servicez ntp.servicezntpsec.servicezopenntpd.servicez/No time sync service is running; checked for %sFT)rr-r)rrrr1r1r2check_time_syncs rcCs$g}ddg}yt|}tj|dWn0tk rX}z|jt|WYdd}~XnXxJ|D]B}yt|tjd|Wq`tk r|jd|Yq`Xq`Wt|s|jdd|ko|j rt j |j j kr|jdt |j fntjd |j |rtd j |tjd dS) Nrlvcreatez is presentz %s is presentz)%s binary does not appear to be installedz!No time synchronization is activeexpect_hostnamez3hostname "%s" does not match expected hostname "%s"z'Hostname "%s" matches what is expected.z ERROR: z Host looks OK) rr-rrr5rDrrrrrrr)rr commandsrrr(r1r1r2command_check_hosts0   r)rPr,c CsVytj|}Wn tk r.td|YnX|j}|j}tjj|j d}|||fS)Nz$Cannot find uid/gid for ssh-user: %sz.ssh) pwdgetpwnamrrpw_uidpw_gidrrrpw_dir)rPZs_pwdssh_uidssh_gidssh_dirr1r1r2 get_ssh_varssr ) ssh_pub_keyrPr,c CsTtttddd}tjd|d|dks4|jr.key_in_filezAdding key to z@localhost authorized_keys...Nz$Trying to authorize an empty ssh keyiz%s/authorized_keyszkey already in Frrr%rTrV)rDrPr-rrrrr rrrrerseekSEEK_ENDrSEEK_SETrr;r:r<rr) r rPrr r r auth_keys_fileZ add_newlinerr1r1r2rTs6         rT)rrPr,c Cst|\}}}d|}d}tjj|rt|d}|j}WdQRXtj\} } t| dV}tj|j ||tj |j t x,|D]$} | j |j krd}q|j | qWWdQRX|rtj| |n tjddS)z4Revoke the public key authorization for the ssh userz%s/authorized_keysFrNr6Tz%Cannot find the ssh key to be deleted)r rrrrrrmkstempr;r:r<rrrrrr-r) rrPr r r rZdeletedrr~rrr9r1r1r2revoke_ssh_key s"     rcCsttddd}|d s$|d r2tjddSd}d}d}|jrd|jrdt|jj}t|jj}n||jr|jrt|jj}t|jj}nVdt j }|d }dd d d d dddd|g }t ||\}}}|dkrtjddS|rtj dn|rtj dt |d}|jj} WdQRXt| |j} |jrFdt|jjgng} t |d!| d|d|jdtdf\}}}| rt| |j|jrdnd} |jrdnd} |jrdnd}d|jd| | |d }|dkrt|dS)"N)r+r,cSs&tj|dkr"tjd|dSdS)NzCommand not found: FT)rrr-r)r+r1r1r2cmd_is_available$ sz0check_ssh_connectivity..cmd_is_availablesshz ssh-keygenz*Cannot check ssh connectivity. Skipping...rVz /tmp/ssh_key_z.pubz-qz-tZrsaz-Nz-Cz-frz/Cannot generate keys to check ssh connectivity.zLVerification for CA signed keys authentication not implemented. Skipping ...zCVerifying ssh connectivity using standard pubkey authentication ...rz-F-o StrictHostKeyChecking=noz-iz-o PasswordAuthentication=norz sudo echoz>- The public key file configured by --ssh-public-key is valid z@- The private key file configured by --ssh-private-key is valid zA- The ssh configuration file configured by --ssh-config is valid zF ** Please verify your user's ssh configuration and make sure: - User z$ must have passwordless sudo access r)rr)rDrPr-rrRrSrGrrUrsrtr(rrrrrTrPrQrrr)rrZssh_priv_key_pathZssh_pub_key_pathZssh_signed_cert_pathZssh_key_gen_cmdrr)rrZnew_keyZssh_cfg_file_argZ pub_key_msgZ prv_key_msgZ ssh_cfg_msgr%r1r1r2r" sR             rcCs>tjdd}y t|WnDtk r^}z(tjt||sFt|}|jWYdd}~XnXtjdtds|s~t|}|j dgtjdt |s|st|}|j dgt ||dd|ko|j o|j t kr(tjd t |j ft |d |j gtd d }|j|j d WdQRXtjdt|dS)Nz%Verifying podman|docker is present...zVerifying lvm2 is present...rZlvm2z-Verifying time synchronization is in place...Zchrony)rrz#Adjusting hostname from %s -> %s...r^z /etc/hostnamer6rz!Repeating the final host check...)r-rrrrrDcreate_packagerinstall_podmanrrrrrrrrr)rpkgrrr1r1r2rb s6         rc@sJeZdZeddddZdejejeee e dfe eddddZ dS) CustomValidationN)rr,c Cs`y|jdd\}}Wn tk r4tj|dYnXt}||kr\tj|djdj|dS)Nrr%zNmust be of the format .. For example, osd.1 or prometheus.myhost.comz,name must declare the type of daemon e.g. {}z, )rrrw ArgumentErrorrrr)r<rr rrr1r1r2 _check_name s zCustomValidation._check_name)parserrIr option_stringr,cCs4t|tst|jdkr0|j|t||j|dS)Nr)rMrDrdestrrs)r<rrIrrr1r1r2__call__ s  zCustomValidation.__call__)N) r/rArBrDrrwArgumentParserrxrr r rr!r1r1r1r2r src Csd}d}d}tdd}x|jD]}|j}d|ks"|jdrBq"|jdd\}}|ddkrv|d dkrv|dd }|dkr|j}q"|d kr|j}q"|d kr"|j}q"WWdQRX|||fS) Nz/etc/os-releaserrrr%rr#ZIDZ VERSION_IDZVERSION_CODENAMEr&r&)rrrrrr)distrodistro_versiondistro_codenamerr9varrVr1r1r2 get_distro s$    r'c@seZdZdeeeeeeeeedddZddddZdddd Zddd d Z e edd d dZ ddddZ ee eeeeedddZeeefdddZeddddZdS)PackagerN)rstablerKbranchcommitcCsz|r| r| r| sX| r,|r,| r,| sX| r<| r<|sX| rT| rT| rT| sXt||_||_||_||_||_dS)N)rrr)rKr*r+)r<rr)rKr*r+r1r1r2r= szPackager.__init__)r,cCsdS)z5Validate parameters before writing any state to disk.Nr1)r<r1r1r2r szPackager.validatecCstdS)N)r)r<r1r1r2add_repo szPackager.add_repocCstdS)N)r)r<r1r1r2rm_repo szPackager.rm_repo)rr,cCstdS)N)r)r<rr1r1r2r szPackager.installcCstdS)N)r)r<r1r1r2r szPackager.install_podman)r#r$r*r+r,c Cstjddj||||pdtd}y t|}Wn<tk rl}z tjdtd||fWYdd}~XnXd}y|j}t|} Wn<tk r}z tjdtd||fWYdd}~XnX| j j d S) Nz0Fetching repo metadata from shaman and chacra...zbhttps://shaman.ceph.com/api/repos/ceph/{branch}/{sha1}/{distro}/{distro_version}/repo/?arch={arch}Zlatest)r#r$r*sha1archz;repository not found in shaman (might not be available yet)z%s, failed to fetch %srVz;repository not found in chacra (might not be available yet)zutf-8) r-rrrgrrrrgeturlrr) r<r#r$r*r+Z shaman_urlZshaman_responser*Z chacra_urlZchacra_responser1r1r2 query_shaman s(    "  "zPackager.query_shamancCs,|jjr|jjdfS|js |jr$dSdSdS)Nmanual*https://download.ceph.com/keys/release.gpgr,https://download.ceph.com/keys/autobuild.gpg autobuild)r3r)r4r5)rZgpg_urlr)rK)r<r1r1r2 repo_gpgkey s   zPackager.repo_gpgkey)servicer,cCst|jddd|gdS)zI Start and enable the service (typically using systemd). rrcz--nowN)rr)r<r7r1r1r2r szPackager.enable_service)NNNN)r/rArBryrrDr=rr,r-rrrr r1rr6rr1r1r1r2r( s( r(c seZdZdddZeeeeeeeeeeeeeeedd fdd Zedd d Zddd d Z ddd dZ e eddddZ ddddZ ddddZedddZedddZedddZedddZddddZddd d!ZZS)"Aptubuntudebian)r9r:N) rr)rKr*r+r#r$r%r,c sDtt|j|||||d|s"t||_|j||_||_||_dS)N)r)rKr*r+) rr8r=rr DISTRO_NAMESr#r%r$) r<rr)rKr*r+r#r$r%)rr1r2r=!s  z Apt.__init__)r,cCsdS)Nz!/etc/apt/sources.list.d/ceph.listr1)r<r1r1r2 repo_path!sz Apt.repo_pathcCs&|j\}}tjd|y t|}Wn<tk rb}z tjd||ftdWYdd}~XnX|j}td|d}|j |WdQRX|j rd|j j |j |j f}n6|jrd|j j |j|j f}n|j|j|j |j|j}tjd|jt|jd}|j |WdQRX|jdS) Nz"Installing repo GPG key from %s...z(failed to fetch GPG repo key from %s: %szfailed to fetch GPG keyz"/etc/apt/trusted.gpg.d/ceph.%s.gpgwbzdeb %s/debian-%s/ %s main zInstalling repo file at %s...r6)r6r-rrrrrrrrrKrrepo_urlr%r)r1r#r*r+r<r6)r<rrr0r*rrrr1r1r2r,!s.   z Apt.add_repocCsx8dD]0}d|}tjj|rtjd|tj|qWtjj|jrjtjd|jtj|j|jdkr||jdS) Nr5rr2z"/etc/apt/trusted.gpg.d/ceph.%s.gpgzRemoving repo GPG key %s...zRemoving repo at %s...r9)r5rr2) rrrr-rr>r<r# rm_kubic_repo)r<rrFr1r1r2r-/!s   z Apt.rm_repo)rr,cCs(tjd|t|jdddg|dS)NzInstalling packages %s...zapt-getrz-y)r-rrr)r<rr1r1r2rrB)r<rNrFr1r1r2r?q!s   zApt.rm_kubic_repo)r/rArBr;ryrrDr=r<r,r-rrr6rrArBrCrDr@r?rr1r1)rr2r8 s"$   r8c seZdZdd d!d"d#d$d%d&dZeeeeeeeeeeeeedd fd d Zeed d dZ edddZ edddZ ddddZ ddddZ ddddZeeddddZddddZZS)'YumDnfcentoselfedorafcmarinercm)rFZrhelZ scientificZrockyZ almalinuxZolrHrJN)rr)rKr*r+r#r$r,cstt|j|||||d|s"t|s*t||_t|jdd|_|j|d|_ |j|dt |j|_ |j dkr|jdks|j dkr|jdkrd |_ n|j d krd |_ nd |_ dS) N)r)rKr*r+rrr%rIrlrGrZdnfrKZtdnfZyum) rrEr=rrrErmajorr;distro_normalizedrD distro_codetool)r<rr)rKr*r+r#r$)rr1r2r=!s  zYumDnf.__init__)kwr,c KsRg}d}x>|D]6}|\}}||kr|j|d kr|j||j|qWdj|S)!an Repo files need special care in that a whole line should not be present if there is no value for it. Because we were using `format()` we could not conditionally add a line for a repo file. So the end result would contain a key with a missing value (say if we were passing `None`). For example, it could look like:: [ceph repo] name= ceph repo proxy= gpgcheck= Which breaks. This function allows us to conditionally add lines, preserving an order and be more careful. Previously, and for historical purposes, this is how the template used to look:: custom_repo = [{repo_name}] name={name} baseurl={baseurl} enabled={enabled} gpgcheck={gpgcheck} type={_type} gpgkey={gpgkey} proxy={proxy} reponame[%s]rname=%sbaseurl baseurl=%sr enabled=%sgpgcheck gpgcheck=%s_typetype=%sgpgkey gpgkey=%sproxyproxy=%spriority priority=%sNrVrrQrRrrSrTrUrrVrWrXrYrZr[r\r]r^r_r`) rarbrcrdrerfrgrhri)NrV)rr5r)r<rPr~tmplr9tmpl_key tmpl_valuer1r1r2 custom_repo!s zYumDnf.custom_repo)r,cCsdS)Nz/etc/yum.repos.d/ceph.repor1)r<r1r1r2r<!szYumDnf.repo_pathcCsF|js|jst|jr,d|jj|j|jfSd|jj|j|jfSdS)Nz %s/rpm-%s/%s)r)rKrrr>rN)r<r1r1r2 repo_baseurl!s    zYumDnf.repo_baseurlcCs|jjdrtd|jdkrX|jr6|jdkr6td|jrX|jjdddkrXtd |jsd|jr|jd }y t|Wn6tk r}zt j d |td WYdd}~XnXdS) NrIz`Ceph team does not build Fedora specific packages and therefore cannot add repos for this distroZel7ZpacificzsCeph does not support pacific or later for this version of this linux distro and therefore cannot add a repo for itrrZ16zrCeph does not support 16.y.z or later for this version of this linux distro and therefore cannot add a repo for itz/noarch/repodata/repomd.xmlz!unable to fetch repo metadata: %rzcfailed to fetch repository metadata. please check the provided parameters are correct and try again) rNrrr)rKrrnrrr-r)r<Ztest_urlr*r1r1r2r!s      zYumDnf.validatec Cs|js |jrtd}xzddddjD]L\}}|d|7}||jd||jd|d d |jd d 7}|d 7}q"Wn|j|j|j|j |j }t j d |j t|j d}|j|WdQRX|jjdrt j dt|j|jdddgdS)NrVz $basearchnoarchSRPMS)rz Ceph-noarchz Ceph-sourcez[%s] zCeph %srr%r)rrTrrWr[z zWriting repo to %s...r6rGzEnabling EPEL...rz-yz epel-release)r)rKrqrmrnr6r1rMrLr*r+r-rr<rrrNrrrrO)r<rrrrr1r1r2r,!s.     zYumDnf.add_repocCs"tjj|jrtj|jdS)N)rrrr<r>)r<r1r1r2r-"szYumDnf.rm_repo)rr,cCs*tjd|t|j|jddg|dS)NzInstalling packages %s...rz-y)r-rrrrO)r<rr1r1r2r"szYumDnf.installcCs|jdgdS)Nr)r)r<r1r1r2r"szYumDnf.install_podman)rFrG)rFrG)rFrG)rFrG)rFrG)rFrG)rHrI)rJrK)r/rArBr;ryrrDr=r rmr<rnrr,r-rrrrr1r1)rr2rE}!s&8 rEc seZdZdddgZeeeeeeeeeeeeeddfdd Zeedd d Z ed d d Z ed ddZ dd ddZ dd ddZ eeddddZdd ddZZS)ZypperZsleszopensuse-tumbleweedz opensuse-leapN)rr)rKr*r+r#r$r,csXtt|j|||||d|dk s&t||_d|_d|_d|_d|krT|dk rT||_dS)N)r)rKr*r+ZzypperZopensusez15.1Z tumbleweed)rrqr=rrrOr#r$)r<rr)rKr*r+r#r$)rr1r2r=%"s  zZypper.__init__)rPr,c KsRg}d}x>|D]6}|\}}||kr|j|d kr|j||j|qWdj|S)!z4 See YumDnf for format explanation. rQ[%s]rname=%srT baseurl=%sr enabled=%srW gpgcheck=%srYtype=%sr[ gpgkey=%sr]proxy=%sr_ priority=%sNrVrrQrrrrsrTrtrrurWrvrYrwr[rxr]ryr_rz) r{r|r}r~rrrrr)NrV)rr5r)r<rPr~rjr9rkrlr1r1r2rm2"s zZypper.custom_repo)r,cCsdS)Nz/etc/zypp/repos.d/ceph.repor1)r<r1r1r2r<O"szZypper.repo_pathcCsF|js|jst|jr,d|jj|j|jfSd|jj|j|jfSdS)Nz %s/rpm-%s/%s)r)rKrrr>r#)r<r1r1r2rnR"s zZypper.repo_baseurlc Cs|js |jrtd}xzddddjD]L\}}|d|7}||jd||jd|d d |jd d 7}|d 7}q"Wn|j|j|j|j |j }t j d |j t|j d}|j|WdQRXdS)NrVz $basearchrorp)rz Ceph-noarchz Ceph-sourcez[%s] zCeph %srr%r)rrTrrWr[z zWriting repo to %s...r6)r)rKrqrmrnr6r1r#r$r*r+r-rr<rr)r<rrrrr1r1r2r,["s(   zZypper.add_repocCs"tjj|jrtj|jdS)N)rrrr<r>)r<r1r1r2r-t"szZypper.rm_repo)rr,cCs*tjd|t|j|jddg|dS)NzInstalling packages %s...inz-y)r-rrrrO)r<rr1r1r2rx"szZypper.installcCs|jdgdS)Nr)r)r<r1r1r2r|"szZypper.install_podman)r/rArBr;ryrrDr=r rmr<rnr,r-rrrrr1r1)rr2rq"s  rq)rr)rKr*r+r,c Cst\}}}|tjkr,t|||||||dS|tjkrNt||||||||dS|tjkrnt|||||||dStd||fdS)N)r)rKr*r+r#r$)r)rKr*r+r#r$r%z"Distro %s version %s not supported)r'rEr;r8rqr)rr)rKr*r+r#r$r%r1r1r2r"s      rcCs|jr|jrtd|j r<|j r<|j r<|j r.probe_hbaZcommand_rVz : startingz/sys/class/scsi_host/*/scan proc_namer usb-storagez,: rescan skipping incompatible host adapter z : z: no compatible HBAs foundzOk. No compatible HBAs foundcSsg|] }|r|qSr1r1)rrr1r1r2r"sz(command_rescan_disks..z : Complete. z adapters rescanned, z failures, z skippedr%rz Failed. All z rescan requests failedz Partial. z successful, z failurez against: z, zOk. z adapters detected: z rescanned, z skipped, z failed (z.2fzs))rr)rDr*r/r'r-rrrrrrr read_filerr5r'r8r)rrr+rJZall_scan_filesZ scan_filesZskippedrZ adapter_namerZ responsesZfailureselapsedpluralr1r1r2command_rescan_disks"s8 . 2rc sztjttdfdd }tjtjtj}y||d}||d}Wntk rTdSXtdd|jd D}d j||S) N)sockoffsetr,c s:tjtjtj|j|tjdtdddddS)NZ256sr(zutf-8) r inet_ntoprrZioctlr:structpackbytes)rr)ifnamer1r2_extract"sz"get_ipv4_address.._extractiirVcSsg|]}tt|jdqS)r)binrEr)rrar1r1r2r"sz$get_ipv4_address..rz{}/{}) rrErDr SOCK_DGRAMrrrr)rrrrZdq_maskZdec_maskr1)rr2get_ipv4_address"s  rcstjjdsdStdg}|j}xz|D]r}|j}|d |kr(|ddjfddtdt|ddD}t j |}d j t |t d j |d d Sq(WdS)Nz/proc/net/if_inet6rVr%rr>csg|]}||dqS)r'r1)rZ_p)ipv6_rawr1r2r#sz$get_ipv6_address..r'z{}/{}z0x{}r$r&)rrrrr8rrrmrrrrrDrE)rrawr7Z iface_settingroZ ipv6_fmtdr,r1)rr2get_ipv6_address"s    ( $rdecimalcCszddddddddg}d }d }|d kr@dd d dddddg}d}d}x,|D]$}t||krbd||fS||}qFWd||fS)zConvert a bytes value into it's human-readable form. :param num: number, in bytes, to convert :param mode: Either decimal (default) or binary to determine divisor :returns: string representing the bytes value in a more readable format rVZKBZMBZGBZTBZPBZEBZZBg@@ZYBbinaryZKiBZMiBZGiBZTiBZPiBZEiBZZiBg@ZYiBz%3.1f%sz%.1f%s)abs)r@rZ unit_listZdivisorZyottartr1r1r2bytes_to_human#s    rcCs~xx|D]p}|rtjj||}n|}tjj|rt|d8}y|jjddj}Wntk rfdSX|SWdQRXqWdS)zReturns the content of the first file found within the `path_list` :param path_list: list of file paths to search :param file_name: optional file_name to be applied to a file path :returns: content of the file or 'Unknown' rzutf-8ignoreUnknownN) rrrrrrrrr) path_list file_namerrrrr1r1r2r'#s   rc@seZdZeeedddZddddZeddd d Zeeefdd d Z edd dZ edddZ eeefdddZ dS) Enclosure)enc_idenc_pathdev_pathcCs^||_tjj|d|_||_g|_d|_d|_d|_ ||_ d|_ i|_ d|_ i|_|jdS)aExternal disk enclosure metadata Args: :param enc_id: enclosure id (normally a WWN) :param enc_path: sysfs path to HBA attached to the enclosure e.g. /sys/class/scsi_generic/sg11/device/enclosure/0:0:9:0 :param dev_path: sysfs path to the generic scsi device for the enclosure HBA e.g. /sys/class/scsi_generic/sg2 rrrVN)_pathrrr _dev_path _enc_path ses_paths path_countvendormodelrr2 device_lookup device_countslot_map_probe)r<rrrr1r1r2r=D#s zEnclosure.__init__N)r,cCs>ttjj|jdg|_ttjj|jdg|_ttjj|jdg|_t tjj|jdd}x|D]}t|g}tjjtjj |dd}d}tjj |rt|g}djd d |Dj }||j |<tjj |}ttjj|d gttjj|d gttjj|d g|d|j|<qdWt|j |_|jtjj|jdS)z?Analyse the dev paths to identify enclosure related informationrrr2rslotrZvpd_pg80rVcss|]}|tjkr|VqdS)N)rj printable)rcharr1r1r2rj#sz#Enclosure._probe..r!faultlocate)r!rrserialN)rrrrrrrrr2rrrrrrrrr6rr)r<Z slot_pathsZ slot_pathrZ serial_pathr serial_rawZslot_dirr1r1r2r]#s&       zEnclosure._probe)dev_idr,cCs|jj|t|j|_dS)ztUpdate an enclosure object with a related sg device name :param dev_id (str): device name e.g. sg2 N)rr5rr)r<rr1r1r2r6w#s zEnclosure.updatecCsdd|jjDS)z*Return a dict representation of the objectcSs i|]\}}|jds||qS)r)r)rrtrur1r1r2 #sz#Enclosure._dump..)r|rq)r<r1r1r2_dump#szEnclosure._dumpcCstj|jddS)z@Return a formatted json representation of the object as a stringr$)rk)rrnr)r<r1r1r2r?#szEnclosure.__str__cCstj|jS)z6Return a json representation of the object as a string)rrnr)r<r1r1r2r@#szEnclosure.__repr__cCs|jS)z%Return a dict representing the object)r)r<r1r1r2as_json#szEnclosure.as_json) r/rArBrDr=rr6rr rr?r@rr1r1r1r2rC#src@seZdZdgZdgZdgZddiZdrZd Ze d d dZ e e e fdddZ e e efdddZee e e e effdddZeedddZddZddZeddZeddZee dd d!Zee dd"d#Zed$d%Zed&d'Zd(d)Zdsd+d,Zd-d.Zed/d0Z ed1d2Z!ed3d4Z"ed5d6Z#ed7d8Z$ed9d:Z%ed;d<Z&ed=d>Z'd?d@Z(edAdBZ)dCdDZ*edEdFZ+edGdHZ,edIdJZ-edKdLZ.edMdNZ/edOdPZ0edQdRZ1edSdTZ2edUdVZ3edWdXZ4edYdZZ5ed[d\Z6ed]d^Z7ee8dd_d`Z9edadbZ:e;dte e eee.)rrq)r<r1r1r2r#szHostFacts.enclosurescCst|jjS)z(Return the number of enclosures detected)rrr)r<r1r1r2enclosure_count#szHostFacts.enclosure_countcCstdg}|j}t}x||D]t}dd|jdD}d|krH|d|_d|kr^|j|dd|krxt|dj|_d |krt|dj|_ qWt ||_ d S) z+Determine cpu information via /proc/cpuinfoz /proc/cpuinfocSsg|] }|jqSr1)r)rrr1r1r2r#sz*HostFacts._get_cpuinfo..r>z model namer%z physical idZsiblingsz cpu coresN) rr8rrrr{rErrrrr)r<routputZcpu_setr9ror1r1r2r#s   zHostFacts._get_cpuinfocCsddtjdDS)z.z /sys/block)rr)r<r1r1r2r#szHostFacts._get_block_devscs|tdg}|j}d}tx0|D](}d|kr"|jd\}}|jd|<q"Wtfddd Drxd jdd}|S) zDetermine OS versionz/etc/os-releaserrr#c3s|]}|kVqdS)Nr1)rZ_v)rel_dictr1r2r $sz-HostFacts.operating_system..NAMEVERSIONz{} {})rr)rr8rSrrrr)r<Zraw_infoZ os_releaseZrel_strr9Zvar_nameZ var_valuer1)rr2operating_system#s  zHostFacts.operating_systemcCstjS)zReturn the hostname)rdnode)r<r1r1r2r^$szHostFacts.hostnamecCstjjdddS)Nrr%r)rdrr)r<r1r1r2 shortname$szHostFacts.shortnamecCstS)N)r])r<r1r1r2fqdn$szHostFacts.fqdncCs&dd}|j}|jjdr"|SdS)zKHighlevel check to see if the host is subscribed to receive updates/supportcSs2d}tjj|r.tdj|}t|dkr.dSdS)Nz/etc/pki/entitlementz{}/*.pemr$ZYesZNo)rrrrrr)Zentitlements_dirZpemsr1r1r2_red_hat $s   z&HostFacts.subscribed.._red_hatzRED HATr)rrr)r<rZos_namer1r1r2 subscribed$s  zHostFacts.subscribedcCs t|jS)z!Return a count of HDDs (spinners))rhdd_list)r<r1r1r2 hdd_count1$szHostFacts.hdd_countcCs&tjjd|d}tt|g}|dS)aDetermine the size of a given device The kernel always bases device size calculations based on a 512 byte sector. For more information see https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/include/linux/types.h?h=v5.15.63#n120 z /sys/blocksizei)rrrrEr)r<rZ size_pathZ size_blocksr1r1r2 _get_capacity7$szHostFacts._get_capacityhddcCs>d}x4|jD]*}|d|kr tt|jdd}||7}q W|S)z@Return the total capacity of a category of device (flash or hdd)r disk_typedisk_size_bytes)rrrEr)r<rZcapacityrZ disk_capacityr1r1r2_get_capacity_by_typeC$s    zHostFacts._get_capacity_by_typecCst}i}i}x4tdD](}t|j}tjjt|}|||<qWi}x4tdD](}t|j}tjjt|}|||<qRWx|jD]} d} d} d} d}t dj | gj } t dj | gj }t dj | gj }t dj | gj }t dj | g}td j | }t |d kr6tjj|d }|j |d}|d krDd nd}tdj | }t |d krttjj|d } t dj | g}djdd|Dj }|jdkrd}n\||kr||j| n | g||<x6|jjD](\}}||jjkr|} |j|} qWtjj ||}|j| }|jdj || t||| ||| |||d| | | |j | d|dqWg}g}xp|D]h} t| d}|r||krqvn6|j|||j}|jt| ddj|| d<|j| qvW|S)zDReturn a 'pretty' name list for each unique device in the `dev_list`z/dev/disk/by-path/*z/dev/mapper/mpath*rVz/sys/block/{}/device/modelz/sys/block/{}/device/revz/sys/block/{}/device/wwidz/sys/block/{}/device/vendorz/sys/block/{}/queue/rotationalz/sys/block/{}/holders/*r%rrrflashz/sys/block/{}/device/bsg/*z/sys/block/{}/device/vpd_pg80css|]}|tjkr|VqdS)N)rjr)rrar1r1r2r}$sz-HostFacts._get_device_info..rz {} {} ({})) descriptionrrZrevZwwiddev_namerrr alt_dev_name scsi_addr enclosure_idenclosure_slotZpath_idmpathrrrr)rrr resolverrrrDrrrrrrrrr5rrqrrr4_disk_vendor_workaroundsrrrr)r<Z disk_listZserial_num_lookupZ disk_path_maprZtgt_rawZtgtZ dm_device_maprrrrrZ disk_modelZdisk_revZ disk_wwidrZ rotationalZ holders_rawZholderrZscsi_addr_pathrrrrZ disk_vendorrZdisk_list_uniqueZ serials_seenZdevsr1r1r2rM$s              zHostFacts._get_device_infocCsdd|jDS)z1Return a list of devices that are HDDs (spinners)cSsg|]}|ddkr|qS)rrr1)rrr1r1r2r$sz&HostFacts.hdd_list..)r)r<r1r1r2r$szHostFacts.hdd_listcCsdd|jDS)z9Return a list of devices that are flash based (SSD, NVMe)cSsg|]}|ddkr|qS)rrr1)rrr1r1r2r$sz(HostFacts.flash_list..)r)r<r1r1r2 flash_list$szHostFacts.flash_listcCs |jddS)z5Return the total capacity for all HDD devices (bytes)r)r)r)r<r1r1r2hdd_capacity_bytes$szHostFacts.hdd_capacity_bytescCs t|jS)zEReturn the total capacity for all HDD devices (human readable format))rr)r<r1r1r2 hdd_capacity$szHostFacts.hdd_capacitycCs:tdgj}|j}t|dt|dt|ddS)z-Return the cpu load average data for the hostz /proc/loadavgrr%r$)Z1minZ5minZ15min)rrrr)r<rr7r1r1r2cpu_load$s   zHostFacts.cpu_loadcCs t|jS)z.zlower_*cSs g|]}tjj|jddqS)Zupper_rV)rrrr')rrr1r1r2r%szupper_*mtur operstatespeedr%rphysicaldriverZlogicalrV) rupper_devs_listlower_devs_listriftypenic_typerrZ ipv4_addressZ ipv6_addressNr&)r4_nic_path_listrrrrrrrrrErrrrrrr)r<Z hw_lookupZnic_pathrr r r rrrZdev_linkr Z driver_pathrr1r1r2r$sX   zHostFacts._process_nicscCs8g}x*|jD] }|j|ddkr |j|q Wt|S)z>Return a total count of all physical NICs detected in the hostr r)rr5r)r<Z phys_devsrr1r1r2 nic_count+%s  zHostFacts.nic_countcCs2x,|jD]"}|j|r|j}t|dSqWdS)Nr%r)rrrrE)r< field_namer9Z_dr1r1r2 _get_mem_data5%s   zHostFacts._get_mem_datacCs |jdS)z#Determine the memory installed (kb)ZMemTotal)r)r<r1r1r2memory_total_kb=%szHostFacts.memory_total_kbcCs |jdS)z9Determine the memory free (not cache, immediately usable)ZMemFree)r)r<r1r1r2memory_free_kbC%szHostFacts.memory_free_kbcCs |jdS)zCDetermine the memory available to new applications without swappingZ MemAvailable)r)r<r1r1r2memory_available_kbI%szHostFacts.memory_available_kbcCs ttjdS)z.Determine server vendor from DMI data in sysfsZ sys_vendor)rr4_dmi_path_list)r<r1r1r2rO%szHostFacts.vendorcCs:ttjd}ttjd}|dkr.|r.dj|Sdj||S)z9Determine server model information from DMI data in sysfsZproduct_familyZ product_namerz{}z{} ({}))rr4rr)r<familyproductr1r1r2rU%s     zHostFacts.modelcCs ttjdS)z5Determine server BIOS version from DMI data in sysfs bios_version)rr4r)r<r1r1r2r`%szHostFacts.bios_versioncCs ttjdS)z2Determine server BIOS date from DMI data in sysfs bios_date)rr4r)r<r1r1r2rf%szHostFacts.bios_datecCs ttjdS)z6Determine chassis serial number from DMI data in sysfschassis_serial)rr4r)r<r1r1r2rl%szHostFacts.chassis_serialcCs ttjdS)z8Determine mainboard serial number from DMI data in sysfs board_serial)rr4r)r<r1r1r2rr%szHostFacts.board_serialcCs ttjdS)z7Determine server's serial number from DMI data in sysfsproduct_serial)rr4r)r<r1r1r2rx%szHostFacts.product_serialcCstjS)z(Return the current time as Epoch seconds)r)r<r1r1r2 timestamp~%szHostFacts.timestampcCstdg}|j\}}t|S)z"Return the system uptime (in secs)z /proc/uptime)rrr)r<Zraw_timeZup_secsrr1r1r2 system_uptime%s  zHostFacts.system_uptimecstttfdfdd }tttfddd}i}tjjdrxtdgj}d|kr^|}nd|krn|}n d d d S|r|Sd d d S)zIDetermine the security features enabled in the kernel - SELinux, AppArmor)r,c si}ytjdgtjd\}}}d|d<d\}}}xv|jdD]h}|jdrd|jd\}} | j}q>|jd r|jd\}} | j}q>|jd r>|jd\}} | j}q>W|d krd |d <ndj|||d <Wn2tk r} zt j d| WYdd} ~ XnX|S)zGet the selinux statusZsestatus)rSELinuxr`rVrzSELinux status:r>z Current mode:zLoaded policy name:rzSELinux: DisabledrzSELinux: Enabled({}, {})z unable to get selinux status: %sN)rVrVrV) r(rrrrrrrr=r-r) securityrr*r)r!rZpolicyr9rtrur)r<r1r2_fetch_selinux%s,          z1HostFacts.kernel_security.._fetch_selinuxcSsi}xtjD]}tjj|r d|d<d|d<ytdg}t|dkrHiSWntk r^YnXi}xL|jdD]>}|jd\}}|j d }||kr||d 7<qpd||<qpWd j d d |j D}||}|ddj |7<|Sq WiS)zMRead the apparmor profiles directly, returning an overview of AppArmor statusZAppArmorr`zAppArmor: Enabledrz&/sys/kernel/security/apparmor/profilesrrr%z()r%rcSsg|]\}}dj||qS)z{} {})r)rrtrur1r1r2r%szFHostFacts.kernel_security.._fetch_apparmor..z({})) r4_apparmor_path_listrrrrrrrrrrqr)rZ apparmor_pathZprofilesZsummaryr9itemrZ summary_strr1r1r2_fetch_apparmor%s.      z2HostFacts.kernel_security.._fetch_apparmorz/sys/kernel/security/lsmZselinuxZapparmorrzOLinux Security Module framework is active, but is not using SELinux or AppArmor)r`rNonez0Linux Security Module framework is not available)rrDrrrrr)r<r r#ryZlsmr1)r<r2kernel_security%s zHostFacts.kernel_securitycCs|jddko|jddkS)Nr`rrzSELinux: Disabled)r%)r<r1r1r2r5%szHostFacts.selinux_enabledcCsTi}t|jddgtjd\}}}|rP|jd}dd|D}d|krP|d|d<|S)z4Get kernel parameters required/used in Ceph clustersrz-a)rrcSs&i|]}|jdd|jddqS)z = r%rr&)r)rZparamr1r1r2r%sz/HostFacts.kernel_parameters..znet.ipv4.ip_nonlocal_bind)rrrrr)r<Zk_paramrrZ param_listZ param_dictr1r1r2kernel_parameters%s  zHostFacts.kernel_parameterstcp)tcp_fileprotocolr,c Csg}ddd}||jkrgStjj|rt|}|jdd}WdQRXxJ|D]B}|jj}|d||krTt|djddd}|j |qTW|S)NZ0AZ07)r'udpr%r&r>r) rrrrrrrrrEr5) r(r)Zlistening_portsZlistening_staterZtcp_dataZconZcon_infoZ local_portr1r1r2_process_net_data%s     zHostFacts._process_net_datacCs tjdS)Nz /proc/net/tcp)r4r+)r<r1r1r2tcp_ports_used &szHostFacts.tcp_ports_usedcCs tjdS)Nz/proc/net/tcp6)r4r+)r<r1r1r2tcp6_ports_used&szHostFacts.tcp6_ports_usedcCs tjddS)Nz /proc/net/udpr*)r4r+)r<r1r1r2udp_ports_used&szHostFacts.udp_ports_usedcCs tjddS)Nz/proc/net/udp6r*)r4r+)r<r1r1r2udp6_ports_used&szHostFacts.udp6_ports_usedcs&fddtD}tj|dddS)z6Return the attributes of this HostFacts object as jsonc s@i|]8}|jd rtt|ttttttfrt||qS)r) rrMrrrErDrrSrY)rrt)r<r1r2r&s z"HostFacts.dump..r$T)rkr)rrrn)r<r7r1)r<r2r&s  zHostFacts.dumpN)rrrrr)r)r')Cr/rArBrr r!rrrryr=rrDrrrrr rrErrrrr^rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr%rPr5r&rrr+r,r-r.r/rr1r1r1r2r4#s !      c       E             P r4cCst|}t|jdS)zGgather_facts is intended to provide host related metadata to the callerN)r4rr)rrr1r1r2command_gather_facts)&sr0rP)r target_name subsystemr,cCs tjjtjj|j|d|S)Nz .target.wants)rrrrrb)rr1r2r1r1r2systemd_target_state2&s r3cCstjj|jdS)Nz /ceph.target)rrrrb)rr1r1r2 target_exists=&sr4cCsl|jstdd|jd}|jjdkrtjdt||rt|dd|gtj d\}}}|rttj d |d d St|dd |gtj d\}}}|rtj d |d d Sd|dSndSntjdt |sdSt||s\t|dd|gtj d\}}}|rtj d|d dSt|dd|gtj d\}}}|rPtj d|d dSd|dSd|dSdS)Nz,failed - must pass --fsid to specify clusterzceph-z.targetenterz(Requested to place host into maintenancerr)rzFailed to disable the z targetzfailed - to disable the targetrmzFailed to stop the zsuccess - systemd target z disabledz!skipped - target already disabledz#Requested to exit maintenance statezVskipped - systemd target not present on this host. Host removed from maintenance mode.rczFailed to enable the z$failed - unable to enable the targetrJzFailed to start the z#failed - unable to start the targetz enabled and started) rrmaintenance_actionrr-rr3r(rrrr4)rrZ_outZ_errr)r1r1r2command_maintenanceA&sL     r7c@s<eZdZddddZeeddddZeddd d ZdS) rN)r,cCs i|_dS)N)r)r<r1r1r2r=&szArgumentFacade.__init__)ror)r,cOsZ|s td|d}|jds,td||ddjdd}|jdd}||j|<dS) Nzexpected at least one argumentrz--zexpected long option, got: r$r"rr)rrr'r r)r<ror)rrr1r1r2 add_argument&s  zArgumentFacade.add_argument)rr,cCs(x"|jjD]\}}t|||q WdS)N)rrqrs)r<rrrr1r1r2rM&szArgumentFacade.apply)r/rArBr=r r8ryrMr1r1r1r2r~&s r) parser_deployr,cCs|jdddd|jddd|jddd|jd d d|jd d d|jd ddd|jddd|jddd|jdddd|jdddd|jddttjd|jddd|jddd|jdd d|jd!d"gd#d|jd$d"gd%ddS)&Nz--configz-czconfig file for new daemon)helpz --config-jsonz3Additional configuration information in JSON formatz --keyringzkeyring for new daemonz--keyzkey for new daemonz --osd-fsidz&OSD uuid, if creating an OSD containerz--skip-firewalld store_truezDo not configure firewalld)actionr:z --tcp-portsz.List of tcp ports to open in the host firewallz --port-ipsz7JSON dict mapping ports to IPs they need to be bound onz --reconfigz(Reconfigure a previously deployed daemonz--allow-ptracez$Allow SYS_PTRACE on daemon containerz--container-init)r<rr:z--memory-requestzContainer memory request/targetz--memory-limitzContainer memory hard limitz --meta-jsonz JSON dict of additional metadataz--extra-container-argsr5z1Additional container arguments to apply to daemonz--extra-entrypoint-argsz2Additional entrypoint arguments to apply to deamon)r8rlrwSUPPRESS)r9r1r1r2r&sr rc!Cs tjdtjd}|jddd|jdddd |jd td d |jd tdd |jdtdd |jdtdd |jdtdd |jddddd |jdt t dd|jdt t dd|jdddgd d!|jd"dt d#d!|jd$dd%d&d!|j d'd}|jd(d)d}|jtd*|jd+d,d}|jtd*|jd-dtjd |jd.d/d}|jtd*|jd0d1d}|jtd*|jd2dd3d |jd4d5d6d |jd7d8d}|jtd*|jd9d:d}|jtd*|jd;dd?|jd@d=dAd?|jdBdCdDd |jd4d5d6d |jdEdFd|jdGddHd |jdIddJd |jdKddLd |jdMdt tjd!|jdNdOd}|jtd*|jd;ddP|jdQdRd|jdSd=dTd?|jdUddVd |jdWddXd |jdYdZd} | jtd*| jdSd=dTd?| jdUddVd | jd[dd\d | jd]dd^d |jd_d`d} | jtd*| jd;dd?| jdSd=dTd?|jdadbd} | jtd*| jdcdddedf| jdSdTd| jd;dd| jdgdhdid| jdjdkdld| jdmdndodpdq| jdddgd d!| jdrddgd d!| jdstjdtdu| jdvddwd | jdxddyd |jdzd{d} | jtd*| jdSdTd| jd;dd?| jdstjdsdu|jd|d}d} | jtd*| jdcdddedf| jdSdTd| jdEd~d| jdgdhdd| jdjdkdld| jdstjdsdu|jddd}|jtd*|jdSd=dTd?|jdUddVd |jddd}|jt d*|jdsdd|jdSdTd|jd;dd?|jddd}|jt!d*|jdSdTd|jd;dd?|jdsdddu|jddd}|jt"d*|jdgdhdd|jdd%dd?|j#}|jddd|jddd|jdd%dd?|jdSdTd|jdddd |jddd|jddd|jddd|jdddd |jdddd |jdddd |jddd|jdt ddd|jdtj$ddd|jdtj$ddd|jdtj$ddd|jdtj$ddd|jdtj$ddd|jdtj$ddd|jdddd |jdddd |jdddd |jdddd |jdddd |jdddd |jdIddd |jdGddHd |jdddd |jddd%dd!|jdddd |jdddd |jdddd |jdddd |jdddd |jdddd |jddd|jdcdddedf|jddd|jddd|jddd|jddd|jdMdt tjd!|jddd|jdddd |jdddd |jddd}|jt%d*|jd;d=td>dP|jdSd=dTd?t&||jd}|j ddd}|jddd}|jt'd*|jdSdTd|jddddd|jddd}|jt(d*|jddd|jddd}|jt)d*|jddd|jddd}|jt*d*|jddj+t,d|jddd|jddd|jddd|jddd|jdddd |jddd}|jt-d*|jdd d}|jt.d*|jd dd gd d |jd dd}|jt/d*|jddd|jddd|jddd|jddd|jdSdTd|jddd}|jt0d*|jddd}|jdSdTd|jdt1dzdgdd|jt2d*|jddd}|jt3d*|jdSd=dTd?|jddd|jddd} | jt4d*|S(Nz3Bootstrap Ceph daemons with systemd and containers.)rformatter_classz--imagez@container image. Can also be set via the "CEPHADM_IMAGE" env var)r:z--dockerr;zuse docker instead of podman)r<r:z --data-dirzbase directory for daemon data)rr:z --log-dirzbase directory for daemon logsz--logrotate-dirz)location of logrotate configuration filesz --sysctl-dirz&location of sysctl configuration filesz --unit-dirz base directory for systemd unitsz --verbosez-vzShow debug-level log messagesz --timeoutztimeout in seconds)r`rr:z--retryzmax number of retriesz--envz-er5zset environment variable)r<rr:z--no-container-initz&Do not run podman/docker with `--init`z--no-cgroups-splitFzVDo not run containers with --cgroups=split (currently only relevant when using podman)z sub-commandrKzget cephadm version)r*rz pull the default container imagez --insecurez inspect-imagezinspect local container imagerz"list daemon instances on this hostz --no-detailzDo not include daemon statusz --legacy-dirrz%base directory for legacy daemon dataz list-networkszlist IP networksZadoptz+adopt daemon deployed with a different toolz--namez-nTzdaemon name (type.id))requiredr:z--stylezdeployment style (legacy, ...)z --clusterrPz cluster namez --config-jsonz3Additional configuration information in JSON formatz--skip-firewalldzDo not configure firewalldz --skip-pullz-do not pull the default image before adoptingz --force-startzAstart newly adopted daemon, even if it was not running previouslyz--container-initz rm-daemonzremove daemon instance)r?r<r:z --tcp-portsz/List of tcp ports to close in the host firewallz--fsidz cluster FSIDz--forcez3proceed, even though this may destroy valuable dataz--force-delete-dataz6delete valuable daemon data instead of making a backupz rm-clusterz remove all daemons for a clusterz --keep-logszdo not remove log filesz --zap-osdsz zap OSD devices for this clusterr%z4run a ceph daemon, in a container, in the foregroundshellz2run an interactive shell inside a daemon containerz--shared_ceph_folderZCEPH_SOURCE_FOLDERzuDevelopment mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder)metavarr:z--configz-cz*ceph.conf to pass through to the containerz --keyringz-kz-ceph.keyring to pass through to the containerz--mountz-mzmount a file or directory in the container. Support multiple mounts. ie: `--mount /foo /bar:/bar`. When no destination is passed, default is /mnt+)r:nargsz--volumer(zcommand (optional))rCr:z --no-hostsz-dont pass /etc/hosts through to the containerz --dry-runzCprint, but do not execute, the container command to start the shellr5z:run an interactive shell inside a running daemon containerz ceph-volumez"run ceph-volume inside a containerz4JSON file with config and (client.bootstrap-osd) keyzceph conf filezzap-osdsz.zap all OSDs associated with a particular fsidrtz$operate on the daemon's systemd unitz.z/cephadmr6z# created by cephadm /var/log/ceph/cephadm.log { rotate 7 daily compress missingok notifempty su root root } Nrrrz %s cephadm %sr"P)rrr)r addLevelNamerrrrr[rerrinteractive_logging_configlogging_config getLoggerr-setLevelr^rrrcrrr.)rroZ operationsrhandlerr1)ror2cephadm_init_loggings)s$        r`)r,cCs&tjdkr"tjjdtjddS)z+Exit if the process is not running as root.rz%ERROR: cephadm should be run as root r%N)rgeteuidrr+rrGr1r1r1r2cephadm_require_root)s  rbcCsNg}tjdd}t|}|js8tjjdtjd|jrt|jddrytj|j|WnBt k r}z&|j r|t j d|tjdWYdd}~XnXt t||y4t||_|jtttttgkrt||j|}WnJt tfk r4}z(|j r t j d|tjdWYdd}~XnX|s@d}tj|dS)Nr%z2No command specified; pass -h or --help for usage rFz ERROR: %sr)rrrWr~r+rrGrr*rrcr-rrbr`rrnrrrrrrr)rQrrrr1r1r2main)sD    rc__main__)r$rr$)r$r%r)r&r'r%)NN)NF)r&r)NN)NN)NN)NN)NN)N)N)N)N)N)NN)F)FFN)rVr")NF)TTNN)F)FFN)TN)FN)N)NNNN)r)rV)rP(r6Zasyncio.subprocessrwrJrrrrrZlogging.configrrrdrrhrTrrrjr-rrrrrrenumrZtypingrrrrrr r r r r rrrrrrrZrs configparserr contextlibrr functoolsrrrrrrZ urllib.errorrrZurllib.requestrrpathlibr r!rhrrr,r-r.r/r1r0rrrzrrrrrrrrYr[rr]r_rarrrwrvrVrrrrrrrlrrr:rrdrfrNrrr-Loggerr%rDCompletedProcessr3r8r9rFrQrUryrrrrr\Filterrr*r+r[rr=rrrrobjectrrrrrErrErFrirwr{rrrrrrPrMrArrrrrr2rrrr version_inforr#rZAbstractChildWatcherrZset_child_watcherr%r'rrr(rr;r>rArDrGrPrUrrr[r]rrgrnrqrrrurrwrrrrrrrrrrrrrerrrrrrrrrrrrrrrrrrrrrrrr rgrr"r r rrrr/rmrUr8r;rArrRrSr\rbrIrKrJrorrrrprqrrrrrrrrrrrrrr rrrrr!r(r+r.r1r8r<r?rArDrHrYr[r]rdrgrmrhrsrxr~rrcrr>rtrrrrrrrrrrrrrrrrrrrrrr|rrrrrrrrrrrrrrrrrrr|rrr rTrrrActionrr'r(r8rErqrrrrrrrrrrr4r0r3r4r7rr"rrPrxrVrWr`rbrcr/r1r1r1r2s  H            !   v*  9}<X`$r   . ^ @   )     +7     +      $$     '  po     H8!Th!0v""")R8Cr2  &&  &"$$4 U 3:1(2$ $$*:    e.*,* .( ]** *,kz";#7   b%" .@&F"c6.M <B %,PKWK// _version.pyc3 $we@sdZdZdZdZdZdS)Z(7fe91d5d5842e04be3b4f514d6dd990c54b29c76z18.2.1Z18ZreefZstableN) CEPH_GIT_VERCEPH_GIT_NICE_VER CEPH_RELEASECEPH_RELEASE_NAMECEPH_RELEASE_TYPErr*/tmp/tmpyrtrv2e9.cephadm.build/_version.pysPKWk subprocess.CompletedProcess: logger.debug(f'running function {func.__name__}, with parms: {cmd}') response = func(cmd) return response async def concurrent_tasks(func: Callable, cmd_list: List[str]) -> List[Any]: tasks = [] for cmd in cmd_list: tasks.append(run_func(func, cmd)) data = await asyncio.gather(*tasks) return data class EndPoint: """EndPoint representing an ip:port format""" def __init__(self, ip: str, port: int) -> None: self.ip = ip self.port = port def __str__(self) -> str: return f'{self.ip}:{self.port}' def __repr__(self) -> str: return f'{self.ip}:{self.port}' class ContainerInfo: def __init__(self, container_id: str, image_name: str, image_id: str, start: str, version: str) -> None: self.container_id = container_id self.image_name = image_name self.image_id = image_id self.start = start self.version = version def __eq__(self, other: Any) -> bool: if not isinstance(other, ContainerInfo): return NotImplemented return (self.container_id == other.container_id and self.image_name == other.image_name and self.image_id == other.image_id and self.start == other.start and self.version == other.version) class DeploymentType(Enum): # Fresh deployment of a daemon. DEFAULT = 'Deploy' # Redeploying a daemon. Works the same as fresh # deployment minus port checking. REDEPLOY = 'Redeploy' # Reconfiguring a daemon. Rewrites config # files and potentially restarts daemon. RECONFIG = 'Reconfig' class BaseConfig: def __init__(self) -> None: self.image: str = '' self.docker: bool = False self.data_dir: str = DATA_DIR self.log_dir: str = LOG_DIR self.logrotate_dir: str = LOGROTATE_DIR self.sysctl_dir: str = SYSCTL_DIR self.unit_dir: str = UNIT_DIR self.verbose: bool = False self.timeout: Optional[int] = DEFAULT_TIMEOUT self.retry: int = DEFAULT_RETRY self.env: List[str] = [] self.memory_request: Optional[int] = None self.memory_limit: Optional[int] = None self.log_to_journald: Optional[bool] = None self.container_init: bool = CONTAINER_INIT self.container_engine: Optional[ContainerEngine] = None def set_from_args(self, args: argparse.Namespace) -> None: argdict: Dict[str, Any] = vars(args) for k, v in argdict.items(): if hasattr(self, k): setattr(self, k, v) class CephadmContext: def __init__(self) -> None: self.__dict__['_args'] = None self.__dict__['_conf'] = BaseConfig() def set_args(self, args: argparse.Namespace) -> None: self._conf.set_from_args(args) self._args = args def has_function(self) -> bool: return 'func' in self._args def __contains__(self, name: str) -> bool: return hasattr(self, name) def __getattr__(self, name: str) -> Any: if '_conf' in self.__dict__ and hasattr(self._conf, name): return getattr(self._conf, name) elif '_args' in self.__dict__ and hasattr(self._args, name): return getattr(self._args, name) else: return super().__getattribute__(name) def __setattr__(self, name: str, value: Any) -> None: if hasattr(self._conf, name): setattr(self._conf, name, value) elif hasattr(self._args, name): setattr(self._args, name, value) else: super().__setattr__(name, value) class ContainerEngine: def __init__(self) -> None: self.path = find_program(self.EXE) @property def EXE(self) -> str: raise NotImplementedError() def __str__(self) -> str: return f'{self.EXE} ({self.path})' class Podman(ContainerEngine): EXE = 'podman' def __init__(self) -> None: super().__init__() self._version: Optional[Tuple[int, ...]] = None @property def version(self) -> Tuple[int, ...]: if self._version is None: raise RuntimeError('Please call `get_version` first') return self._version def get_version(self, ctx: CephadmContext) -> None: out, _, _ = call_throws(ctx, [self.path, 'version', '--format', '{{.Client.Version}}'], verbosity=CallVerbosity.QUIET) self._version = _parse_podman_version(out) def __str__(self) -> str: version = '.'.join(map(str, self.version)) return f'{self.EXE} ({self.path}) version {version}' class Docker(ContainerEngine): EXE = 'docker' CONTAINER_PREFERENCE = (Podman, Docker) # prefer podman to docker # During normal cephadm operations (cephadm ls, gather-facts, etc ) we use: # stdout: for JSON output only # stderr: for error, debug, info, etc logging_config = { 'version': 1, 'disable_existing_loggers': True, 'formatters': { 'cephadm': { 'format': '%(asctime)s %(thread)x %(levelname)s %(message)s' }, }, 'handlers': { 'console': { 'level': 'INFO', 'class': 'logging.StreamHandler', }, 'log_file': { 'level': 'DEBUG', 'class': 'logging.handlers.WatchedFileHandler', 'formatter': 'cephadm', 'filename': '%s/cephadm.log' % LOG_DIR, } }, 'loggers': { '': { 'level': 'DEBUG', 'handlers': ['console', 'log_file'], } } } class ExcludeErrorsFilter(logging.Filter): def filter(self, record: logging.LogRecord) -> bool: """Only lets through log messages with log level below WARNING .""" return record.levelno < logging.WARNING # When cephadm is used as standard binary (bootstrap, rm-cluster, etc) we use: # stdout: for debug and info # stderr: for errors and warnings interactive_logging_config = { 'version': 1, 'filters': { 'exclude_errors': { '()': ExcludeErrorsFilter } }, 'disable_existing_loggers': True, 'formatters': { 'cephadm': { 'format': '%(asctime)s %(thread)x %(levelname)s %(message)s' }, }, 'handlers': { 'console_stdout': { 'level': 'INFO', 'class': 'logging.StreamHandler', 'filters': ['exclude_errors'], 'stream': sys.stdout }, 'console_stderr': { 'level': 'WARNING', 'class': 'logging.StreamHandler', 'stream': sys.stderr }, 'log_file': { 'level': 'DEBUG', 'class': 'logging.handlers.WatchedFileHandler', 'formatter': 'cephadm', 'filename': '%s/cephadm.log' % LOG_DIR, } }, 'loggers': { '': { 'level': 'DEBUG', 'handlers': ['console_stdout', 'console_stderr', 'log_file'], } } } class termcolor: yellow = '\033[93m' red = '\033[31m' end = '\033[0m' class Error(Exception): pass class ClusterAlreadyExists(Exception): pass class TimeoutExpired(Error): pass class UnauthorizedRegistryError(Error): pass ################################## class Ceph(object): daemons = ('mon', 'mgr', 'osd', 'mds', 'rgw', 'rbd-mirror', 'crash', 'cephfs-mirror', 'ceph-exporter') gateways = ('iscsi', 'nfs', 'nvmeof') ################################## class OSD(object): @staticmethod def get_sysctl_settings() -> List[str]: return [ '# allow a large number of OSDs', 'fs.aio-max-nr = 1048576', 'kernel.pid_max = 4194304', ] ################################## class SNMPGateway: """Defines an SNMP gateway between Prometheus and SNMP monitoring Frameworks""" daemon_type = 'snmp-gateway' SUPPORTED_VERSIONS = ['V2c', 'V3'] default_image = DEFAULT_SNMP_GATEWAY_IMAGE DEFAULT_PORT = 9464 env_filename = 'snmp-gateway.conf' def __init__(self, ctx: CephadmContext, fsid: str, daemon_id: Union[int, str], config_json: Dict[str, Any], image: Optional[str] = None) -> None: self.ctx = ctx self.fsid = fsid self.daemon_id = daemon_id self.image = image or SNMPGateway.default_image self.uid = config_json.get('uid', 0) self.gid = config_json.get('gid', 0) self.destination = config_json.get('destination', '') self.snmp_version = config_json.get('snmp_version', 'V2c') self.snmp_community = config_json.get('snmp_community', 'public') self.log_level = config_json.get('log_level', 'info') self.snmp_v3_auth_username = config_json.get('snmp_v3_auth_username', '') self.snmp_v3_auth_password = config_json.get('snmp_v3_auth_password', '') self.snmp_v3_auth_protocol = config_json.get('snmp_v3_auth_protocol', '') self.snmp_v3_priv_protocol = config_json.get('snmp_v3_priv_protocol', '') self.snmp_v3_priv_password = config_json.get('snmp_v3_priv_password', '') self.snmp_v3_engine_id = config_json.get('snmp_v3_engine_id', '') self.validate() @classmethod def init(cls, ctx: CephadmContext, fsid: str, daemon_id: Union[int, str]) -> 'SNMPGateway': cfgs = fetch_configs(ctx) assert cfgs # assert some config data was found return cls(ctx, fsid, daemon_id, cfgs, ctx.image) @staticmethod def get_version(ctx: CephadmContext, fsid: str, daemon_id: str) -> Optional[str]: """Return the version of the notifier from it's http endpoint""" path = os.path.join(ctx.data_dir, fsid, f'snmp-gateway.{daemon_id}', 'unit.meta') try: with open(path, 'r') as env: metadata = json.loads(env.read()) except (OSError, json.JSONDecodeError): return None ports = metadata.get('ports', []) if not ports: return None try: with urlopen(f'{ports[0]}/') as r: html = r.read().decode('utf-8').split('\n') except (HTTPError, URLError): return None for h in html: stripped = h.strip() if stripped.startswith(('
', '
')) and \
', '
')): #
(version=1.2.1, branch=HEAD, revision=7...
                return stripped.split(',')[0].split('version=')[1]

        return None

    def port(self) -> int:
        endpoints = fetch_tcp_ports(self.ctx)
        if not endpoints:
            return self.DEFAULT_PORT
        return endpoints[0].port

    def get_daemon_args(self) -> List[str]:
        v3_args = []
        base_args = [

        if self.snmp_version == 'V3':
            # common auth settings
            # authPriv setting is applied if we have a privacy protocol setting
            if self.snmp_v3_priv_protocol:

        return base_args + v3_args

    def data_dir(self) -> str:
        return os.path.join(self.ctx.data_dir, self.ctx.fsid, f'{self.daemon_type}.{self.daemon_id}')

    def conf_file_path(self) -> str:
        return os.path.join(self.data_dir, self.env_filename)

    def create_daemon_conf(self) -> None:
        """Creates the environment file holding 'secrets' passed to the snmp-notifier daemon"""
        with write_new(self.conf_file_path) as f:
            if self.snmp_version == 'V2c':
                if self.snmp_v3_priv_password:

    def validate(self) -> None:
        """Validate the settings

            Error: if the fsid doesn't look like an fsid
            Error: if the snmp version is not supported
            Error: destination IP and port address missing
        if not is_fsid(self.fsid):
            raise Error(f'not a valid fsid: {self.fsid}')

        if self.snmp_version not in SNMPGateway.SUPPORTED_VERSIONS:
            raise Error(f'not a valid snmp version: {self.snmp_version}')

        if not self.destination:
            raise Error('config is missing destination attribute(:) of the target SNMP listener')

class Monitoring(object):
    """Define the configs for the monitoring containers"""

    port_map = {
        'prometheus': [9095],  # Avoid default 9090, due to conflict with cockpit UI
        'node-exporter': [9100],
        'grafana': [3000],
        'alertmanager': [9093, 9094],
        'loki': [3100],
        'promtail': [9080]

    components = {
        'prometheus': {
            'image': DEFAULT_PROMETHEUS_IMAGE,
            'cpus': '2',
            'memory': '4GB',
            'args': [
            'config-json-files': [
        'loki': {
            'image': DEFAULT_LOKI_IMAGE,
            'cpus': '1',
            'memory': '1GB',
            'args': [
            'config-json-files': [
        'promtail': {
            'image': DEFAULT_PROMTAIL_IMAGE,
            'cpus': '1',
            'memory': '1GB',
            'args': [
            'config-json-files': [
        'node-exporter': {
            'image': DEFAULT_NODE_EXPORTER_IMAGE,
            'cpus': '1',
            'memory': '1GB',
            'args': [
        'grafana': {
            'image': DEFAULT_GRAFANA_IMAGE,
            'cpus': '2',
            'memory': '4GB',
            'args': [],
            'config-json-files': [
        'alertmanager': {
            'image': DEFAULT_ALERT_MANAGER_IMAGE,
            'cpus': '2',
            'memory': '2GB',
            'args': [
            'config-json-files': [
            'config-json-args': [
    }  # type: ignore

    def get_version(ctx, container_id, daemon_type):
        # type: (CephadmContext, str, str) -> str
        :param: daemon_type Either "prometheus", "alertmanager", "loki", "promtail" or "node-exporter"
        assert daemon_type in ('prometheus', 'alertmanager', 'node-exporter', 'loki', 'promtail')
        cmd = daemon_type.replace('-', '_')
        code = -1
        err = ''
        out = ''
        version = ''
        if daemon_type == 'alertmanager':
            for cmd in ['alertmanager', 'prometheus-alertmanager']:
                out, err, code = call(ctx, [
                    ctx.container_engine.path, 'exec', container_id, cmd,
                ], verbosity=CallVerbosity.QUIET)
                if code == 0:
            cmd = 'alertmanager'  # reset cmd for version extraction
            out, err, code = call(ctx, [
                ctx.container_engine.path, 'exec', container_id, cmd, '--version'
            ], verbosity=CallVerbosity.QUIET)
        if code == 0:
            if err.startswith('%s, version ' % cmd):
                version = err.split(' ')[2]
            elif out.startswith('%s, version ' % cmd):
                version = out.split(' ')[2]
        return version


def write_new(
    destination: Union[str, Path],
    owner: Optional[Tuple[int, int]] = None,
    perms: Optional[int] = DEFAULT_MODE,
    encoding: Optional[str] = None,
) -> Generator[IO, None, None]:
    """Write a new file in a robust manner, optionally specifying the owner,
    permissions, or encoding. This function takes care to never leave a file in
    a partially-written state due to a crash or power outage by writing to
    temporary file and then renaming that temp file over to the final
    destination once all data is written.  Note that the temporary files can be
    leaked but only for a "crash" or power outage - regular exceptions will
    clean up the temporary file.
    destination = os.path.abspath(destination)
    tempname = f'{destination}.new'
    open_kwargs: Dict[str, Any] = {}
    if encoding:
        open_kwargs['encoding'] = encoding
        with open(tempname, 'w', **open_kwargs) as fh:
            yield fh
            if owner is not None:
                os.fchown(fh.fileno(), *owner)
            if perms is not None:
                os.fchmod(fh.fileno(), perms)
    except Exception:
    os.rename(tempname, destination)

def populate_files(config_dir, config_files, uid, gid):
    # type: (str, Dict, int, int) -> None
    """create config files for different services"""
    for fname in config_files:
        config_file = os.path.join(config_dir, fname)
        config_content = dict_get_join(config_files, fname)
        logger.info('Write file: %s' % (config_file))
        with write_new(config_file, owner=(uid, gid), encoding='utf-8') as f:

class NFSGanesha(object):
    """Defines a NFS-Ganesha container"""

    daemon_type = 'nfs'
    entrypoint = '/usr/bin/ganesha.nfsd'
    daemon_args = ['-F', '-L', 'STDERR']

    required_files = ['ganesha.conf']

    port_map = {
        'nfs': 2049,

    def __init__(self,
        # type: (CephadmContext, str, Union[int, str], Dict, str) -> None
        self.ctx = ctx
        self.fsid = fsid
        self.daemon_id = daemon_id
        self.image = image

        # config-json options
        self.pool = dict_get(config_json, 'pool', require=True)
        self.namespace = dict_get(config_json, 'namespace')
        self.userid = dict_get(config_json, 'userid')
        self.extra_args = dict_get(config_json, 'extra_args', [])
        self.files = dict_get(config_json, 'files', {})
        self.rgw = dict_get(config_json, 'rgw', {})

        # validate the supplied args

    def init(cls, ctx, fsid, daemon_id):
        # type: (CephadmContext, str, Union[int, str]) -> NFSGanesha
        return cls(ctx, fsid, daemon_id, fetch_configs(ctx), ctx.image)

    def get_container_mounts(self, data_dir):
        # type: (str) -> Dict[str, str]
        mounts = dict()
        mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
        mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
        mounts[os.path.join(data_dir, 'etc/ganesha')] = '/etc/ganesha:z'
        if self.rgw:
            cluster = self.rgw.get('cluster', 'ceph')
            rgw_user = self.rgw.get('user', 'admin')
            mounts[os.path.join(data_dir, 'keyring.rgw')] = \
                '/var/lib/ceph/radosgw/%s-%s/keyring:z' % (cluster, rgw_user)
        return mounts

    def get_container_envs():
        # type: () -> List[str]
        envs = [
            'CEPH_CONF=%s' % (CEPH_DEFAULT_CONF)
        return envs

    def get_version(ctx, container_id):
        # type: (CephadmContext, str) -> Optional[str]
        version = None
        out, err, code = call(ctx,
                              [ctx.container_engine.path, 'exec', container_id,
                               NFSGanesha.entrypoint, '-v'],
        if code == 0:
            match = re.search(r'NFS-Ganesha Release\s*=\s*[V]*([\d.]+)', out)
            if match:
                version = match.group(1)
        return version

    def validate(self):
        # type: () -> None
        if not is_fsid(self.fsid):
            raise Error('not an fsid: %s' % self.fsid)
        if not self.daemon_id:
            raise Error('invalid daemon_id: %s' % self.daemon_id)
        if not self.image:
            raise Error('invalid image: %s' % self.image)

        # check for the required files
        if self.required_files:
            for fname in self.required_files:
                if fname not in self.files:
                    raise Error('required file missing from config-json: %s' % fname)

        # check for an RGW config
        if self.rgw:
            if not self.rgw.get('keyring'):
                raise Error('RGW keyring is missing')
            if not self.rgw.get('user'):
                raise Error('RGW user is missing')

    def get_daemon_name(self):
        # type: () -> str
        return '%s.%s' % (self.daemon_type, self.daemon_id)

    def get_container_name(self, desc=None):
        # type: (Optional[str]) -> str
        cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
        if desc:
            cname = '%s-%s' % (cname, desc)
        return cname

    def get_daemon_args(self):
        # type: () -> List[str]
        return self.daemon_args + self.extra_args

    def create_daemon_dirs(self, data_dir, uid, gid):
        # type: (str, int, int) -> None
        """Create files under the container data dir"""
        if not os.path.isdir(data_dir):
            raise OSError('data_dir is not a directory: %s' % (data_dir))

        logger.info('Creating ganesha config...')

        # create the ganesha conf dir
        config_dir = os.path.join(data_dir, 'etc/ganesha')
        makedirs(config_dir, uid, gid, 0o755)

        # populate files from the config-json
        populate_files(config_dir, self.files, uid, gid)

        # write the RGW keyring
        if self.rgw:
            keyring_path = os.path.join(data_dir, 'keyring.rgw')
            with write_new(keyring_path, owner=(uid, gid)) as f:
                f.write(self.rgw.get('keyring', ''))


class CephIscsi(object):
    """Defines a Ceph-Iscsi container"""

    daemon_type = 'iscsi'
    entrypoint = '/usr/bin/rbd-target-api'

    required_files = ['iscsi-gateway.cfg']

    def __init__(self,
        # type: (CephadmContext, str, Union[int, str], Dict, str) -> None
        self.ctx = ctx
        self.fsid = fsid
        self.daemon_id = daemon_id
        self.image = image

        # config-json options
        self.files = dict_get(config_json, 'files', {})

        # validate the supplied args

    def init(cls, ctx, fsid, daemon_id):
        # type: (CephadmContext, str, Union[int, str]) -> CephIscsi
        return cls(ctx, fsid, daemon_id,
                   fetch_configs(ctx), ctx.image)

    def get_container_mounts(data_dir, log_dir):
        # type: (str, str) -> Dict[str, str]
        mounts = dict()
        mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
        mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
        mounts[os.path.join(data_dir, 'iscsi-gateway.cfg')] = '/etc/ceph/iscsi-gateway.cfg:z'
        mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config'
        mounts[os.path.join(data_dir, 'tcmu-runner-entrypoint.sh')] = '/usr/local/scripts/tcmu-runner-entrypoint.sh'
        mounts[log_dir] = '/var/log:z'
        mounts['/dev'] = '/dev'
        return mounts

    def get_container_binds():
        # type: () -> List[List[str]]
        binds = []
        lib_modules = ['type=bind',
        return binds

    def get_version(ctx, container_id):
        # type: (CephadmContext, str) -> Optional[str]
        version = None
        out, err, code = call(ctx,
                              [ctx.container_engine.path, 'exec', container_id,
                               '/usr/bin/python3', '-c',
                               "import pkg_resources; print(pkg_resources.require('ceph_iscsi')[0].version)"],
        if code == 0:
            version = out.strip()
        return version

    def validate(self):
        # type: () -> None
        if not is_fsid(self.fsid):
            raise Error('not an fsid: %s' % self.fsid)
        if not self.daemon_id:
            raise Error('invalid daemon_id: %s' % self.daemon_id)
        if not self.image:
            raise Error('invalid image: %s' % self.image)

        # check for the required files
        if self.required_files:
            for fname in self.required_files:
                if fname not in self.files:
                    raise Error('required file missing from config-json: %s' % fname)

    def get_daemon_name(self):
        # type: () -> str
        return '%s.%s' % (self.daemon_type, self.daemon_id)

    def get_container_name(self, desc=None):
        # type: (Optional[str]) -> str
        cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
        if desc:
            cname = '%s-%s' % (cname, desc)
        return cname

    def create_daemon_dirs(self, data_dir, uid, gid):
        # type: (str, int, int) -> None
        """Create files under the container data dir"""
        if not os.path.isdir(data_dir):
            raise OSError('data_dir is not a directory: %s' % (data_dir))

        logger.info('Creating ceph-iscsi config...')
        configfs_dir = os.path.join(data_dir, 'configfs')
        makedirs(configfs_dir, uid, gid, 0o755)

        # set up the tcmu-runner entrypoint script
        # to be mounted into the container. For more info
        # on why we need this script, see the
        # tcmu_runner_entrypoint_script function
        self.files['tcmu-runner-entrypoint.sh'] = self.tcmu_runner_entrypoint_script()

        # populate files from the config-json
        populate_files(data_dir, self.files, uid, gid)

        # we want the tcmu runner entrypoint script to be executable
        # populate_files will give it 0o600 by default
        os.chmod(os.path.join(data_dir, 'tcmu-runner-entrypoint.sh'), 0o700)

    def configfs_mount_umount(data_dir, mount=True):
        # type: (str, bool) -> List[str]
        mount_path = os.path.join(data_dir, 'configfs')
        if mount:
            cmd = 'if ! grep -qs {0} /proc/mounts; then ' \
                  'mount -t configfs none {0}; fi'.format(mount_path)
            cmd = 'if grep -qs {0} /proc/mounts; then ' \
                  'umount {0}; fi'.format(mount_path)
        return cmd.split()

    def tcmu_runner_entrypoint_script() -> str:
        # since we are having tcmu-runner be a background
        # process in its systemd unit (rbd-target-api being
        # the main process) systemd will not restart it when
        # it fails. in order to try and get around that for now
        # we can have a script mounted in the container that
        # that attempts to do the restarting for us. This script
        # can then become the entrypoint for the tcmu-runner
        # container

        # This is intended to be dropped for a better solution
        # for at least the squid release onward
        return """#!/bin/bash

if [ ! -d "${RUN_DIR}" ] ; then
    mkdir -p "${RUN_DIR}"

rm -rf "${RUN_DIR}"/*

while true
    touch "${RUN_DIR}"/start-up-$(date -Ins)

    # If we got around 3 kills/segfaults in the last minute,
    # don't start anymore
    if [ $(find "${RUN_DIR}" -type f -cmin -1 | wc -l) -ge 3 ] ; then
        exit 0

    sleep 1

    def get_tcmu_runner_container(self):
        # type: () -> CephContainer
        # daemon_id, is used to generated the cid and pid files used by podman but as both tcmu-runner
        # and rbd-target-api have the same daemon_id, it conflits and prevent the second container from
        # starting. .tcmu runner is appended to the daemon_id to fix that.
        tcmu_container = get_deployment_container(self.ctx, self.fsid, self.daemon_type, str(self.daemon_id) + '.tcmu')
        # TODO: Eventually we don't want to run tcmu-runner through this script.
        # This is intended to be a workaround backported to older releases
        # and should eventually be removed in at least squid onward
        tcmu_container.entrypoint = '/usr/local/scripts/tcmu-runner-entrypoint.sh'
        tcmu_container.cname = self.get_container_name(desc='tcmu')
        return tcmu_container


class CephNvmeof(object):
    """Defines a Ceph-Nvmeof container"""

    daemon_type = 'nvmeof'
    required_files = ['ceph-nvmeof.conf']
    default_image = DEFAULT_NVMEOF_IMAGE

    def __init__(self,
        # type: (CephadmContext, str, Union[int, str], Dict, str) -> None
        self.ctx = ctx
        self.fsid = fsid
        self.daemon_id = daemon_id
        self.image = image

        # config-json options
        self.files = dict_get(config_json, 'files', {})

        # validate the supplied args

    def init(cls, ctx, fsid, daemon_id):
        # type: (CephadmContext, str, Union[int, str]) -> CephNvmeof
        return cls(ctx, fsid, daemon_id,
                   fetch_configs(ctx), ctx.image)

    def get_container_mounts(data_dir: str) -> Dict[str, str]:
        mounts = dict()
        mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
        mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
        mounts[os.path.join(data_dir, 'ceph-nvmeof.conf')] = '/src/ceph-nvmeof.conf:z'
        mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config'
        mounts['/dev/hugepages'] = '/dev/hugepages'
        mounts['/dev/vfio/vfio'] = '/dev/vfio/vfio'
        return mounts

    def get_container_binds():
        # type: () -> List[List[str]]
        binds = []
        lib_modules = ['type=bind',
        return binds

    def get_version(ctx: CephadmContext, container_id: str) -> Optional[str]:
        out, err, ret = call(ctx,
                             [ctx.container_engine.path, 'inspect',
                              '--format', '{{index .Config.Labels "io.ceph.version"}}',
        version = None
        if ret == 0:
            version = out.strip()
        return version

    def validate(self):
        # type: () -> None
        if not is_fsid(self.fsid):
            raise Error('not an fsid: %s' % self.fsid)
        if not self.daemon_id:
            raise Error('invalid daemon_id: %s' % self.daemon_id)
        if not self.image:
            raise Error('invalid image: %s' % self.image)

        # check for the required files
        if self.required_files:
            for fname in self.required_files:
                if fname not in self.files:
                    raise Error('required file missing from config-json: %s' % fname)

    def get_daemon_name(self):
        # type: () -> str
        return '%s.%s' % (self.daemon_type, self.daemon_id)

    def get_container_name(self, desc=None):
        # type: (Optional[str]) -> str
        cname = '%s-%s' % (self.fsid, self.get_daemon_name())
        if desc:
            cname = '%s-%s' % (cname, desc)
        return cname

    def create_daemon_dirs(self, data_dir, uid, gid):
        # type: (str, int, int) -> None
        """Create files under the container data dir"""
        if not os.path.isdir(data_dir):
            raise OSError('data_dir is not a directory: %s' % (data_dir))

        logger.info('Creating ceph-nvmeof config...')
        configfs_dir = os.path.join(data_dir, 'configfs')
        makedirs(configfs_dir, uid, gid, 0o755)

        # populate files from the config-json
        populate_files(data_dir, self.files, uid, gid)

    def configfs_mount_umount(data_dir, mount=True):
        # type: (str, bool) -> List[str]
        mount_path = os.path.join(data_dir, 'configfs')
        if mount:
            cmd = 'if ! grep -qs {0} /proc/mounts; then ' \
                  'mount -t configfs none {0}; fi'.format(mount_path)
            cmd = 'if grep -qs {0} /proc/mounts; then ' \
                  'umount {0}; fi'.format(mount_path)
        return cmd.split()

    def get_sysctl_settings() -> List[str]:
        return [
            'vm.nr_hugepages = 4096',


class CephExporter(object):
    """Defines a Ceph exporter container"""

    daemon_type = 'ceph-exporter'
    entrypoint = '/usr/bin/ceph-exporter'
    DEFAULT_PORT = 9926
    port_map = {
        'ceph-exporter': DEFAULT_PORT,

    def __init__(self,
                 ctx: CephadmContext,
                 fsid: str, daemon_id: Union[int, str],
                 config_json: Dict[str, Any],
                 image: str = DEFAULT_IMAGE) -> None:
        self.ctx = ctx
        self.fsid = fsid
        self.daemon_id = daemon_id
        self.image = image

        self.sock_dir = config_json.get('sock-dir', '/var/run/ceph/')
        ipv4_addrs, _ = get_ip_addresses(get_hostname())
        addrs = '' if ipv4_addrs else '::'
        self.addrs = config_json.get('addrs', addrs)
        self.port = config_json.get('port', self.DEFAULT_PORT)
        self.prio_limit = config_json.get('prio-limit', 5)
        self.stats_period = config_json.get('stats-period', 5)


    def init(cls, ctx: CephadmContext, fsid: str,
             daemon_id: Union[int, str]) -> 'CephExporter':
        return cls(ctx, fsid, daemon_id,
                   fetch_configs(ctx), ctx.image)

    def get_container_mounts() -> Dict[str, str]:
        mounts = dict()
        mounts['/var/run/ceph'] = '/var/run/ceph:z'
        return mounts

    def get_daemon_args(self) -> List[str]:
        args = [
        return args

    def validate(self) -> None:
        if not os.path.isdir(self.sock_dir):
            raise Error(f'Directory does not exist. Got: {self.sock_dir}')


class HAproxy(object):
    """Defines an HAproxy container"""
    daemon_type = 'haproxy'
    required_files = ['haproxy.cfg']
    default_image = DEFAULT_HAPROXY_IMAGE

    def __init__(self,
                 ctx: CephadmContext,
                 fsid: str, daemon_id: Union[int, str],
                 config_json: Dict, image: str) -> None:
        self.ctx = ctx
        self.fsid = fsid
        self.daemon_id = daemon_id
        self.image = image

        # config-json options
        self.files = dict_get(config_json, 'files', {})


    def init(cls, ctx: CephadmContext,
             fsid: str, daemon_id: Union[int, str]) -> 'HAproxy':
        return cls(ctx, fsid, daemon_id, fetch_configs(ctx),

    def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
        """Create files under the container data dir"""
        if not os.path.isdir(data_dir):
            raise OSError('data_dir is not a directory: %s' % (data_dir))

        # create additional directories in data dir for HAproxy to use
        if not os.path.isdir(os.path.join(data_dir, 'haproxy')):
            makedirs(os.path.join(data_dir, 'haproxy'), uid, gid, DATA_DIR_MODE)

        data_dir = os.path.join(data_dir, 'haproxy')
        populate_files(data_dir, self.files, uid, gid)

    def get_daemon_args(self) -> List[str]:
        return ['haproxy', '-f', '/var/lib/haproxy/haproxy.cfg']

    def validate(self):
        # type: () -> None
        if not is_fsid(self.fsid):
            raise Error('not an fsid: %s' % self.fsid)
        if not self.daemon_id:
            raise Error('invalid daemon_id: %s' % self.daemon_id)
        if not self.image:
            raise Error('invalid image: %s' % self.image)

        # check for the required files
        if self.required_files:
            for fname in self.required_files:
                if fname not in self.files:
                    raise Error('required file missing from config-json: %s' % fname)

    def get_daemon_name(self):
        # type: () -> str
        return '%s.%s' % (self.daemon_type, self.daemon_id)

    def get_container_name(self, desc=None):
        # type: (Optional[str]) -> str
        cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
        if desc:
            cname = '%s-%s' % (cname, desc)
        return cname

    def extract_uid_gid_haproxy(self) -> Tuple[int, int]:
        # better directory for this?
        return extract_uid_gid(self.ctx, file_path='/var/lib')

    def get_container_mounts(data_dir: str) -> Dict[str, str]:
        mounts = dict()
        mounts[os.path.join(data_dir, 'haproxy')] = '/var/lib/haproxy'
        return mounts

    def get_sysctl_settings() -> List[str]:
        return [
            '# IP forwarding and non-local bind',
            'net.ipv4.ip_forward = 1',
            'net.ipv4.ip_nonlocal_bind = 1',


class Keepalived(object):
    """Defines an Keepalived container"""
    daemon_type = 'keepalived'
    required_files = ['keepalived.conf']
    default_image = DEFAULT_KEEPALIVED_IMAGE

    def __init__(self,
                 ctx: CephadmContext,
                 fsid: str, daemon_id: Union[int, str],
                 config_json: Dict, image: str) -> None:
        self.ctx = ctx
        self.fsid = fsid
        self.daemon_id = daemon_id
        self.image = image

        # config-json options
        self.files = dict_get(config_json, 'files', {})


    def init(cls, ctx: CephadmContext, fsid: str,
             daemon_id: Union[int, str]) -> 'Keepalived':
        return cls(ctx, fsid, daemon_id,
                   fetch_configs(ctx), ctx.image)

    def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
        """Create files under the container data dir"""
        if not os.path.isdir(data_dir):
            raise OSError('data_dir is not a directory: %s' % (data_dir))

        # create additional directories in data dir for keepalived to use
        if not os.path.isdir(os.path.join(data_dir, 'keepalived')):
            makedirs(os.path.join(data_dir, 'keepalived'), uid, gid, DATA_DIR_MODE)

        # populate files from the config-json
        populate_files(data_dir, self.files, uid, gid)

    def validate(self):
        # type: () -> None
        if not is_fsid(self.fsid):
            raise Error('not an fsid: %s' % self.fsid)
        if not self.daemon_id:
            raise Error('invalid daemon_id: %s' % self.daemon_id)
        if not self.image:
            raise Error('invalid image: %s' % self.image)

        # check for the required files
        if self.required_files:
            for fname in self.required_files:
                if fname not in self.files:
                    raise Error('required file missing from config-json: %s' % fname)

    def get_daemon_name(self):
        # type: () -> str
        return '%s.%s' % (self.daemon_type, self.daemon_id)

    def get_container_name(self, desc=None):
        # type: (Optional[str]) -> str
        cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
        if desc:
            cname = '%s-%s' % (cname, desc)
        return cname

    def get_container_envs():
        # type: () -> List[str]
        envs = [
            'KEEPALIVED_CMD=/usr/sbin/keepalived -n -l -f /etc/keepalived/keepalived.conf',
        return envs

    def get_sysctl_settings() -> List[str]:
        return [
            '# IP forwarding and non-local bind',
            'net.ipv4.ip_forward = 1',
            'net.ipv4.ip_nonlocal_bind = 1',

    def extract_uid_gid_keepalived(self) -> Tuple[int, int]:
        # better directory for this?
        return extract_uid_gid(self.ctx, file_path='/var/lib')

    def get_container_mounts(data_dir: str) -> Dict[str, str]:
        mounts = dict()
        mounts[os.path.join(data_dir, 'keepalived.conf')] = '/etc/keepalived/keepalived.conf'
        return mounts


class Tracing(object):
    """Define the configs for the jaeger tracing containers"""

    components: Dict[str, Dict[str, Any]] = {
        'elasticsearch': {
            'envs': ['discovery.type=single-node']
        'jaeger-agent': {
            'image': DEFAULT_JAEGER_AGENT_IMAGE,
        'jaeger-collector': {
        'jaeger-query': {
            'image': DEFAULT_JAEGER_QUERY_IMAGE,
    }  # type: ignore

    def set_configuration(config: Dict[str, str], daemon_type: str) -> None:
        if daemon_type in ['jaeger-collector', 'jaeger-query']:
            assert 'elasticsearch_nodes' in config
            Tracing.components[daemon_type]['envs'] = [
        if daemon_type == 'jaeger-agent':
            assert 'collector_nodes' in config
            Tracing.components[daemon_type]['daemon_args'] = [


class CustomContainer(object):
    """Defines a custom container"""
    daemon_type = 'container'

    def __init__(self,
                 fsid: str, daemon_id: Union[int, str],
                 config_json: Dict, image: str) -> None:
        self.fsid = fsid
        self.daemon_id = daemon_id
        self.image = image

        # config-json options
        self.entrypoint = dict_get(config_json, 'entrypoint')
        self.uid = dict_get(config_json, 'uid', 65534)  # nobody
        self.gid = dict_get(config_json, 'gid', 65534)  # nobody
        self.volume_mounts = dict_get(config_json, 'volume_mounts', {})
        self.args = dict_get(config_json, 'args', [])
        self.envs = dict_get(config_json, 'envs', [])
        self.privileged = dict_get(config_json, 'privileged', False)
        self.bind_mounts = dict_get(config_json, 'bind_mounts', [])
        self.ports = dict_get(config_json, 'ports', [])
        self.dirs = dict_get(config_json, 'dirs', [])
        self.files = dict_get(config_json, 'files', {})

    def init(cls, ctx: CephadmContext,
             fsid: str, daemon_id: Union[int, str]) -> 'CustomContainer':
        return cls(fsid, daemon_id,
                   fetch_configs(ctx), ctx.image)

    def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
        Create dirs/files below the container data directory.
        logger.info('Creating custom container configuration '
                    'dirs/files in {} ...'.format(data_dir))

        if not os.path.isdir(data_dir):
            raise OSError('data_dir is not a directory: %s' % data_dir)

        for dir_path in self.dirs:
            logger.info('Creating directory: {}'.format(dir_path))
            dir_path = os.path.join(data_dir, dir_path.strip('/'))
            makedirs(dir_path, uid, gid, 0o755)

        for file_path in self.files:
            logger.info('Creating file: {}'.format(file_path))
            content = dict_get_join(self.files, file_path)
            file_path = os.path.join(data_dir, file_path.strip('/'))
            with write_new(file_path, owner=(uid, gid), encoding='utf-8') as f:

    def get_daemon_args(self) -> List[str]:
        return []

    def get_container_args(self) -> List[str]:
        return self.args

    def get_container_envs(self) -> List[str]:
        return self.envs

    def get_container_mounts(self, data_dir: str) -> Dict[str, str]:
        Get the volume mounts. Relative source paths will be located below

            /foo/conf: /conf
            foo/conf: /conf
            /foo/conf: /conf
            /var/lib/ceph///foo/conf: /conf
        mounts = {}
        for source, destination in self.volume_mounts.items():
            source = os.path.join(data_dir, source)
            mounts[source] = destination
        return mounts

    def get_container_binds(self, data_dir: str) -> List[List[str]]:
        Get the bind mounts. Relative `source=...` paths will be located below

        binds = self.bind_mounts.copy()
        for bind in binds:
            for index, value in enumerate(bind):
                match = re.match(r'^source=(.+)$', value)
                if match:
                    bind[index] = 'source={}'.format(os.path.join(
                        data_dir, match.group(1)))
        return binds


def touch(file_path: str, uid: Optional[int] = None, gid: Optional[int] = None) -> None:
    if uid and gid:
        os.chown(file_path, uid, gid)


def dict_get(d: Dict, key: str, default: Any = None, require: bool = False) -> Any:
    Helper function to get a key from a dictionary.
    :param d: The dictionary to process.
    :param key: The name of the key to get.
    :param default: The default value in case the key does not
        exist. Default is `None`.
    :param require: Set to `True` if the key is required. An
        exception will be raised if the key does not exist in
        the given dictionary.
    :return: Returns the value of the given key.
    :raises: :exc:`self.Error` if the given key does not exist
        and `require` is set to `True`.
    if require and key not in d.keys():
        raise Error('{} missing from dict'.format(key))
    return d.get(key, default)  # type: ignore


def dict_get_join(d: Dict[str, Any], key: str) -> Any:
    Helper function to get the value of a given key from a dictionary.
    `List` values will be converted to a string by joining them with a
    line break.
    :param d: The dictionary to process.
    :param key: The name of the key to get.
    :return: Returns the value of the given key. If it was a `list`, it
        will be joining with a line break.
    value = d.get(key)
    if isinstance(value, list):
        value = '\n'.join(map(str, value))
    return value


def get_supported_daemons():
    # type: () -> List[str]
    supported_daemons = list(Ceph.daemons)
    assert len(supported_daemons) == len(set(supported_daemons))
    return supported_daemons


class PortOccupiedError(Error):

def attempt_bind(ctx, s, address, port):
    # type: (CephadmContext, socket.socket, str, int) -> None
        s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        s.bind((address, port))
    except OSError as e:
        if e.errno == errno.EADDRINUSE:
            msg = 'Cannot bind to IP %s port %d: %s' % (address, port, e)
            raise PortOccupiedError(msg)
            raise e
    except Exception as e:
        raise Error(e)

def port_in_use(ctx: CephadmContext, endpoint: EndPoint) -> bool:
    """Detect whether a port is in use on the local machine - IPv4 and IPv6"""
    logger.info('Verifying port %s ...' % str(endpoint))

    def _port_in_use(af: socket.AddressFamily, address: str) -> bool:
            s = socket.socket(af, socket.SOCK_STREAM)
            attempt_bind(ctx, s, address, endpoint.port)
        except PortOccupiedError:
            return True
        except OSError as e:
            if e.errno in (errno.EAFNOSUPPORT, errno.EADDRNOTAVAIL):
                # Ignore EAFNOSUPPORT and EADDRNOTAVAIL as two interfaces are
                # being tested here and one might be intentionally be disabled.
                # In that case no error should be raised.
                return False
                raise e
        return False

    if endpoint.ip != '' and endpoint.ip != '::':
        if is_ipv6(endpoint.ip):
            return _port_in_use(socket.AF_INET6, endpoint.ip)
            return _port_in_use(socket.AF_INET, endpoint.ip)

    return any(_port_in_use(af, address) for af, address in (
        (socket.AF_INET, ''),
        (socket.AF_INET6, '::')

def check_ip_port(ctx, ep):
    # type: (CephadmContext, EndPoint) -> None
    if not ctx.skip_ping_check:
        logger.info(f'Verifying IP {ep.ip} port {ep.port} ...')
        if is_ipv6(ep.ip):
            s = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
            ip = unwrap_ipv6(ep.ip)
            s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            ip = ep.ip
        attempt_bind(ctx, s, ip, ep.port)


# this is an abbreviated version of
# https://github.com/benediktschmitt/py-filelock/blob/master/filelock.py
# that drops all of the compatibility (this is Unix/Linux only).

class Timeout(TimeoutError):
    Raised when the lock could not be acquired in *timeout*

    def __init__(self, lock_file: str) -> None:
        #: The path of the file lock.
        self.lock_file = lock_file
        return None

    def __str__(self) -> str:
        temp = "The file lock '{}' could not be acquired."\
        return temp

class _Acquire_ReturnProxy(object):
    def __init__(self, lock: 'FileLock') -> None:
        self.lock = lock
        return None

    def __enter__(self) -> 'FileLock':
        return self.lock

    def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
        return None

class FileLock(object):
    def __init__(self, ctx: CephadmContext, name: str, timeout: int = -1) -> None:
        if not os.path.exists(LOCK_DIR):
            os.mkdir(LOCK_DIR, 0o700)
        self._lock_file = os.path.join(LOCK_DIR, name + '.lock')
        self.ctx = ctx

        # The file descriptor for the *_lock_file* as it is returned by the
        # os.open() function.
        # This file lock is only NOT None, if the object currently holds the
        # lock.
        self._lock_file_fd: Optional[int] = None
        self.timeout = timeout
        # The lock counter is used for implementing the nested locking
        # mechanism. Whenever the lock is acquired, the counter is increased and
        # the lock is only released, when this value is 0 again.
        self._lock_counter = 0
        return None

    def is_locked(self) -> bool:
        return self._lock_file_fd is not None

    def acquire(self, timeout: Optional[int] = None, poll_intervall: float = 0.05) -> _Acquire_ReturnProxy:
        Acquires the file lock or fails with a :exc:`Timeout` error.
        .. code-block:: python
            # You can use this method in the context manager (recommended)
            with lock.acquire():
            # Or use an equivalent try-finally construct:
        :arg float timeout:
            The maximum time waited for the file lock.
            If ``timeout < 0``, there is no timeout and this method will
            block until the lock could be acquired.
            If ``timeout`` is None, the default :attr:`~timeout` is used.
        :arg float poll_intervall:
            We check once in *poll_intervall* seconds if we can acquire the
            file lock.
        :raises Timeout:
            if the lock could not be acquired in *timeout* seconds.
        .. versionchanged:: 2.0.0
            This method returns now a *proxy* object instead of *self*,
            so that it can be used in a with statement without side effects.

        # Use the default timeout, if no timeout is provided.
        if timeout is None:
            timeout = self.timeout

        # Increment the number right at the beginning.
        # We can still undo it, if something fails.
        self._lock_counter += 1

        lock_id = id(self)
        lock_filename = self._lock_file
        start_time = time.time()
            while True:
                if not self.is_locked:
                    logger.log(QUIET_LOG_LEVEL, 'Acquiring lock %s on %s', lock_id,

                if self.is_locked:
                    logger.log(QUIET_LOG_LEVEL, 'Lock %s acquired on %s', lock_id,
                elif timeout >= 0 and time.time() - start_time > timeout:
                    logger.warning('Timeout acquiring lock %s on %s', lock_id,
                    raise Timeout(self._lock_file)
                        'Lock %s not acquired on %s, waiting %s seconds ...',
                        lock_id, lock_filename, poll_intervall
        except Exception:
            # Something did go wrong, so decrement the counter.
            self._lock_counter = max(0, self._lock_counter - 1)

        return _Acquire_ReturnProxy(lock=self)

    def release(self, force: bool = False) -> None:
        Releases the file lock.
        Please note, that the lock is only completely released, if the lock
        counter is 0.
        Also note, that the lock file itself is not automatically deleted.
        :arg bool force:
            If true, the lock counter is ignored and the lock is released in
            every case.
        if self.is_locked:
            self._lock_counter -= 1

            if self._lock_counter == 0 or force:
                # lock_id = id(self)
                # lock_filename = self._lock_file

                # Can't log in shutdown:
                #  File "/usr/lib64/python3.9/logging/__init__.py", line 1175, in _open
                #    NameError: name 'open' is not defined
                # logger.debug('Releasing lock %s on %s', lock_id, lock_filename)
                self._lock_counter = 0
                # logger.debug('Lock %s released on %s', lock_id, lock_filename)

        return None

    def __enter__(self) -> 'FileLock':
        return self

    def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
        return None

    def __del__(self) -> None:
        return None

    def _acquire(self) -> None:
        open_mode = os.O_RDWR | os.O_CREAT | os.O_TRUNC
        fd = os.open(self._lock_file, open_mode)

            fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
        except (IOError, OSError):
            self._lock_file_fd = fd
        return None

    def _release(self) -> None:
        # Do not remove the lockfile:
        #   https://github.com/benediktschmitt/py-filelock/issues/31
        #   https://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition
        fd = self._lock_file_fd
        self._lock_file_fd = None
        fcntl.flock(fd, fcntl.LOCK_UN)  # type: ignore
        os.close(fd)  # type: ignore
        return None

# Popen wrappers, lifted from ceph-volume

class CallVerbosity(Enum):
    # Format:
    # Normal Operation: , Errors: 
    # NOTE: QUIET log level is custom level only used when --verbose is passed

    # Normal Operation: None, Errors: None
    SILENT = 0
    # Normal Operation: QUIET, Error: QUIET
    QUIET = 1
    # Normal Operation: DEBUG, Error: DEBUG
    DEBUG = 2
    # Normal Operation: QUIET, Error: INFO
    # Normal Operation: DEBUG, Error: INFO
    # Normal Operation: INFO, Error: INFO
    VERBOSE = 5

    def success_log_level(self) -> int:
        _verbosity_level_to_log_level = {
            self.SILENT: 0,
            self.QUIET: QUIET_LOG_LEVEL,
            self.DEBUG: logging.DEBUG,
            self.VERBOSE_ON_FAILURE: logging.DEBUG,
            self.VERBOSE: logging.INFO
        return _verbosity_level_to_log_level[self]  # type: ignore

    def error_log_level(self) -> int:
        _verbosity_level_to_log_level = {
            self.SILENT: 0,
            self.QUIET: QUIET_LOG_LEVEL,
            self.DEBUG: logging.DEBUG,
            self.QUIET_UNLESS_ERROR: logging.INFO,
            self.VERBOSE_ON_FAILURE: logging.INFO,
            self.VERBOSE: logging.INFO
        return _verbosity_level_to_log_level[self]  # type: ignore

# disable coverage for the next block. this is copy-n-paste
# from other code for compatibilty on older python versions
if sys.version_info < (3, 8):  # pragma: no cover
    import itertools
    import threading
    import warnings
    from asyncio import events

    class ThreadedChildWatcher(asyncio.AbstractChildWatcher):
        """Threaded child watcher implementation.
        The watcher uses a thread per process
        for waiting for the process finish.
        It doesn't require subscription on POSIX signal
        but a thread creation is not free.
        The watcher has O(1) complexity, its performance doesn't depend
        on amount of spawn processes.

        def __init__(self) -> None:
            self._pid_counter = itertools.count(0)
            self._threads: Dict[Any, Any] = {}

        def is_active(self) -> bool:
            return True

        def close(self) -> None:

        def _join_threads(self) -> None:
            """Internal: Join all non-daemon threads"""
            threads = [thread for thread in list(self._threads.values())
                       if thread.is_alive() and not thread.daemon]
            for thread in threads:

        def __enter__(self) -> Any:
            return self

        def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:

        def __del__(self, _warn: Any = warnings.warn) -> None:
            threads = [thread for thread in list(self._threads.values())
                       if thread.is_alive()]
            if threads:
                _warn(f'{self.__class__} has registered but not finished child processes',

        def add_child_handler(self, pid: Any, callback: Any, *args: Any) -> None:
            loop = events.get_event_loop()
            thread = threading.Thread(target=self._do_waitpid,
                                      args=(loop, pid, callback, args),
            self._threads[pid] = thread

        def remove_child_handler(self, pid: Any) -> bool:
            # asyncio never calls remove_child_handler() !!!
            # The method is no-op but is implemented because
            # abstract base classe requires it
            return True

        def attach_loop(self, loop: Any) -> None:

        def _do_waitpid(self, loop: Any, expected_pid: Any, callback: Any, args: Any) -> None:
            assert expected_pid > 0

                pid, status = os.waitpid(expected_pid, 0)
            except ChildProcessError:
                # The child process is already reaped
                # (may happen if waitpid() is called elsewhere).
                pid = expected_pid
                returncode = 255
                    'Unknown child process pid %d, will report returncode 255',
                if os.WIFEXITED(status):
                    returncode = os.WEXITSTATUS(status)
                elif os.WIFSIGNALED(status):
                    returncode = -os.WTERMSIG(status)
                    raise ValueError(f'unknown wait status {status}')
                if loop.get_debug():
                    logger.debug('process %s exited with returncode %s',
                                 expected_pid, returncode)

            if loop.is_closed():
                logger.warning('Loop %r that handles pid %r is closed', loop, pid)
                loop.call_soon_threadsafe(callback, pid, returncode, *args)


    # unlike SafeChildWatcher which handles SIGCHLD in the main thread,
    # ThreadedChildWatcher runs in a separated thread, hence allows us to
    # run create_subprocess_exec() in non-main thread, see
    # https://bugs.python.org/issue35621

    from asyncio import run as async_run   # type: ignore[attr-defined]
except ImportError:  # pragma: no cover
    # disable coverage for this block. it should be a copy-n-paste from
    # from newer libs for compatibilty on older python versions
    def async_run(coro):  # type: ignore
        loop = asyncio.new_event_loop()
            return loop.run_until_complete(coro)

def call(ctx: CephadmContext,
         command: List[str],
         desc: Optional[str] = None,
         verbosity: CallVerbosity = CallVerbosity.VERBOSE_ON_FAILURE,
         timeout: Optional[int] = DEFAULT_TIMEOUT,
         **kwargs: Any) -> Tuple[str, str, int]:
    Wrap subprocess.Popen to

    - log stdout/stderr to a logger,
    - decode utf-8
    - cleanly return out, err, returncode

    :param timeout: timeout in seconds

    prefix = command[0] if desc is None else desc
    if prefix:
        prefix += ': '
    timeout = timeout or ctx.timeout

    async def run_with_timeout() -> Tuple[str, str, int]:
        process = await asyncio.create_subprocess_exec(
        assert process.stdout
        assert process.stderr
            stdout, stderr = await asyncio.wait_for(
        except asyncio.TimeoutError:
            # try to terminate the process assuming it is still running.  It's
            # possible that even after killing the process it will not
            # complete, particularly if it is D-state.  If that happens the
            # process.wait call will block, but we're no worse off than before
            # when the timeout did not work.  Additionally, there are other
            # corner-cases we could try and handle here but we decided to start
            # simple.
            await process.wait()
            logger.info(prefix + f'timeout after {timeout} seconds')
            return '', '', 124
            assert process.returncode is not None
            return (

    stdout, stderr, returncode = async_run(run_with_timeout())
    log_level = verbosity.success_log_level()
    if returncode != 0:
        log_level = verbosity.error_log_level()
        logger.log(log_level, f'Non-zero exit code {returncode} from {" ".join(command)}')
    for line in stdout.splitlines():
        logger.log(log_level, prefix + 'stdout ' + line)
    for line in stderr.splitlines():
        logger.log(log_level, prefix + 'stderr ' + line)
    return stdout, stderr, returncode

def call_throws(
        ctx: CephadmContext,
        command: List[str],
        desc: Optional[str] = None,
        verbosity: CallVerbosity = CallVerbosity.VERBOSE_ON_FAILURE,
        timeout: Optional[int] = DEFAULT_TIMEOUT,
        **kwargs: Any) -> Tuple[str, str, int]:
    out, err, ret = call(ctx, command, desc, verbosity, timeout, **kwargs)
    if ret:
        for s in (out, err):
            if s.strip() and len(s.splitlines()) <= 2:  # readable message?
                raise RuntimeError(f'Failed command: {" ".join(command)}: {s}')
        raise RuntimeError('Failed command: %s' % ' '.join(command))
    return out, err, ret

def call_timeout(ctx, command, timeout):
    # type: (CephadmContext, List[str], int) -> int
    logger.debug('Running command (timeout=%s): %s'
                 % (timeout, ' '.join(command)))

    def raise_timeout(command, timeout):
        # type: (List[str], int) -> NoReturn
        msg = 'Command `%s` timed out after %s seconds' % (command, timeout)
        raise TimeoutExpired(msg)

        return subprocess.call(command, timeout=timeout, env=os.environ.copy())
    except subprocess.TimeoutExpired:
        raise_timeout(command, timeout)


def json_loads_retry(cli_func: Callable[[], str]) -> Any:
    for sleep_secs in [1, 4, 4]:
            return json.loads(cli_func())
        except json.JSONDecodeError:
            logger.debug('Invalid JSON. Retrying in %s seconds...' % sleep_secs)
    return json.loads(cli_func())

def is_available(ctx, what, func):
    # type: (CephadmContext, str, Callable[[], bool]) -> None
    Wait for a service to become available

    :param what: the name of the service
    :param func: the callable object that determines availability
    retry = ctx.retry
    logger.info('Waiting for %s...' % what)
    num = 1
    while True:
        if func():
            logger.info('%s is available'
                        % what)
        elif num > retry:
            raise Error('%s not available after %s tries'
                        % (what, retry))

        logger.info('%s not available, waiting (%s/%s)...'
                    % (what, num, retry))

        num += 1

def read_config(fn):
    # type: (Optional[str]) -> ConfigParser
    cp = ConfigParser()
    if fn:
    return cp

def pathify(p):
    # type: (str) -> str
    p = os.path.expanduser(p)
    return os.path.abspath(p)

def get_file_timestamp(fn):
    # type: (str) -> Optional[str]
        mt = os.path.getmtime(fn)
        return datetime.datetime.fromtimestamp(
            mt, tz=datetime.timezone.utc
    except Exception:
        return None

def try_convert_datetime(s):
    # type: (str) -> Optional[str]
    # This is super irritating because
    #  1) podman and docker use different formats
    #  2) python's strptime can't parse either one
    # I've seen:
    #  docker 18.09.7:  2020-03-03T09:21:43.636153304Z
    #  podman 1.7.0:    2020-03-03T15:52:30.136257504-06:00
    #                   2020-03-03 15:52:30.136257504 -0600 CST
    # (In the podman case, there is a different string format for
    # 'inspect' and 'inspect --format {{.Created}}'!!)

    # In *all* cases, the 9 digit second precision is too much for
    # python's strptime.  Shorten it to 6 digits.
    p = re.compile(r'(\.[\d]{6})[\d]*')
    s = p.sub(r'\1', s)

    # replace trailing Z with -0000, since (on python 3.6.8) it won't parse
    if s and s[-1] == 'Z':
        s = s[:-1] + '-0000'

    # cut off the redundant 'CST' part that strptime can't parse, if
    # present.
    v = s.split(' ')
    s = ' '.join(v[0:3])

    # try parsing with several format strings
    fmts = [
        '%Y-%m-%d %H:%M:%S.%f %z',
    for f in fmts:
            # return timestamp normalized to UTC, rendered as DATEFMT.
            return datetime.datetime.strptime(s, f).astimezone(tz=datetime.timezone.utc).strftime(DATEFMT)
        except ValueError:
    return None

def _parse_podman_version(version_str):
    # type: (str) -> Tuple[int, ...]
    def to_int(val: str, org_e: Optional[Exception] = None) -> int:
        if not val and org_e:
            raise org_e
            return int(val)
        except ValueError as e:
            return to_int(val[0:-1], org_e or e)

    return tuple(map(to_int, version_str.split('.')))

def get_hostname():
    # type: () -> str
    return socket.gethostname()

def get_short_hostname():
    # type: () -> str
    return get_hostname().split('.', 1)[0]

def get_fqdn():
    # type: () -> str
    return socket.getfqdn() or socket.gethostname()

def get_ip_addresses(hostname: str) -> Tuple[List[str], List[str]]:
    items = socket.getaddrinfo(hostname, None,
    ipv4_addresses = [i[4][0] for i in items if i[0] == socket.AF_INET]
    ipv6_addresses = [i[4][0] for i in items if i[0] == socket.AF_INET6]
    return ipv4_addresses, ipv6_addresses

def get_arch():
    # type: () -> str
    return platform.uname().machine

def generate_service_id():
    # type: () -> str
    return get_short_hostname() + '.' + ''.join(random.choice(string.ascii_lowercase)
                                                for _ in range(6))

def generate_password():
    # type: () -> str
    return ''.join(random.choice(string.ascii_lowercase + string.digits)
                   for i in range(10))

def normalize_container_id(i):
    # type: (str) -> str
    # docker adds the sha256: prefix, but AFAICS both
    # docker (18.09.7 in bionic at least) and podman
    # both always use sha256, so leave off the prefix
    # for consistency.
    prefix = 'sha256:'
    if i.startswith(prefix):
        i = i[len(prefix):]
    return i

def make_fsid():
    # type: () -> str
    return str(uuid.uuid1())

def is_fsid(s):
    # type: (str) -> bool
    except ValueError:
        return False
    return True

def validate_fsid(func: FuncT) -> FuncT:
    def _validate_fsid(ctx: CephadmContext) -> Any:
        if 'fsid' in ctx and ctx.fsid:
            if not is_fsid(ctx.fsid):
                raise Error('not an fsid: %s' % ctx.fsid)
        return func(ctx)
    return cast(FuncT, _validate_fsid)

def infer_fsid(func: FuncT) -> FuncT:
    If we only find a single fsid in /var/lib/ceph/*, use that
    def _infer_fsid(ctx: CephadmContext) -> Any:
        if 'fsid' in ctx and ctx.fsid:
            logger.debug('Using specified fsid: %s' % ctx.fsid)
            return func(ctx)

        fsids = set()

        cp = read_config(ctx.config)
        if cp.has_option('global', 'fsid'):
            fsids.add(cp.get('global', 'fsid'))

        daemon_list = list_daemons(ctx, detail=False)
        for daemon in daemon_list:
            if not is_fsid(daemon['fsid']):
                # 'unknown' fsid
            elif 'name' not in ctx or not ctx.name:
                # ctx.name not specified
            elif daemon['name'] == ctx.name:
                # ctx.name is a match
        fsids = sorted(fsids)

        if not fsids:
            # some commands do not always require an fsid
        elif len(fsids) == 1:
            logger.info('Inferring fsid %s' % fsids[0])
            ctx.fsid = fsids[0]
            raise Error('Cannot infer an fsid, one must be specified (using --fsid): %s' % fsids)
        return func(ctx)

    return cast(FuncT, _infer_fsid)

def infer_config(func: FuncT) -> FuncT:
    Infer the cluster configuration using the following priority order:
     1- if the user has provided custom conf file (-c option) use it
     2- otherwise if daemon --name has been provided use daemon conf
     3- otherwise find the mon daemon conf file and use it (if v1)
     4- otherwise if {ctx.data_dir}/{fsid}/{CEPH_CONF_DIR} dir exists use it
     5- finally: fallback to the default file /etc/ceph/ceph.conf
    def _infer_config(ctx: CephadmContext) -> Any:

        def config_path(daemon_type: str, daemon_name: str) -> str:
            data_dir = get_data_dir(ctx.fsid, ctx.data_dir, daemon_type, daemon_name)
            return os.path.join(data_dir, 'config')

        def get_mon_daemon_name(fsid: str) -> Optional[str]:
            daemon_list = list_daemons(ctx, detail=False)
            for daemon in daemon_list:
                if (
                    daemon.get('name', '').startswith('mon.')
                    and daemon.get('fsid', '') == fsid
                    and daemon.get('style', '') == 'cephadm:v1'
                    and os.path.exists(config_path('mon', daemon['name'].split('.', 1)[1]))
                    return daemon['name']
            return None

        ctx.config = ctx.config if 'config' in ctx else None
        #  check if user has provided conf by using -c option
        if ctx.config and (ctx.config != CEPH_DEFAULT_CONF):
            logger.debug(f'Using specified config: {ctx.config}')
            return func(ctx)

        if 'fsid' in ctx and ctx.fsid:
            name = ctx.name if ('name' in ctx and ctx.name) else get_mon_daemon_name(ctx.fsid)
            if name is not None:
                # daemon name has been specified (or inferred from mon), let's use its conf
                ctx.config = config_path(name.split('.', 1)[0], name.split('.', 1)[1])
                # no daemon, in case the cluster has a config dir then use it
                ceph_conf = f'{ctx.data_dir}/{ctx.fsid}/{CEPH_CONF_DIR}/{CEPH_CONF}'
                if os.path.exists(ceph_conf):
                    ctx.config = ceph_conf

        if ctx.config:
            logger.info(f'Inferring config {ctx.config}')
        elif os.path.exists(CEPH_DEFAULT_CONF):
            logger.debug(f'Using default config {CEPH_DEFAULT_CONF}')
            ctx.config = CEPH_DEFAULT_CONF
        return func(ctx)

    return cast(FuncT, _infer_config)

def _get_default_image(ctx: CephadmContext) -> str:
        warn = """This is a development version of cephadm.
For information regarding the latest stable release:
        for line in warn.splitlines():
            logger.warning('{}{}{}'.format(termcolor.yellow, line, termcolor.end))
    return DEFAULT_IMAGE

def infer_image(func: FuncT) -> FuncT:
    Use the most recent ceph image
    def _infer_image(ctx: CephadmContext) -> Any:
        if not ctx.image:
            ctx.image = os.environ.get('CEPHADM_IMAGE')
        if not ctx.image:
            ctx.image = infer_local_ceph_image(ctx, ctx.container_engine.path)
        if not ctx.image:
            ctx.image = _get_default_image(ctx)
        return func(ctx)

    return cast(FuncT, _infer_image)

def require_image(func: FuncT) -> FuncT:
    Require the global --image flag to be set
    def _require_image(ctx: CephadmContext) -> Any:
        if not ctx.image:
            raise Error('This command requires the global --image option to be set')
        return func(ctx)

    return cast(FuncT, _require_image)

def default_image(func: FuncT) -> FuncT:
    def _default_image(ctx: CephadmContext) -> Any:
        return func(ctx)

    return cast(FuncT, _default_image)

def update_default_image(ctx: CephadmContext) -> None:
    if getattr(ctx, 'image', None):
    ctx.image = None  # ensure ctx.image exists to avoid repeated `getattr`s
    name = getattr(ctx, 'name', None)
    if name:
        type_ = name.split('.', 1)[0]
        if type_ in Monitoring.components:
            ctx.image = Monitoring.components[type_]['image']
        if type_ == 'haproxy':
            ctx.image = HAproxy.default_image
        if type_ == 'keepalived':
            ctx.image = Keepalived.default_image
        if type_ == SNMPGateway.daemon_type:
            ctx.image = SNMPGateway.default_image
        if type_ == CephNvmeof.daemon_type:
            ctx.image = CephNvmeof.default_image
        if type_ in Tracing.components:
            ctx.image = Tracing.components[type_]['image']
    if not ctx.image:
        ctx.image = os.environ.get('CEPHADM_IMAGE')
    if not ctx.image:
        ctx.image = _get_default_image(ctx)

def executes_early(func: FuncT) -> FuncT:
    """Decorator that indicates the command function is meant to have no
    dependencies and no environmental requirements and can therefore be
    executed as non-root and with no logging, etc. Commands that have this
    decorator applied must be simple and self-contained.
    cast(Any, func)._execute_early = True
    return func

def deprecated_command(func: FuncT) -> FuncT:
    def _deprecated_command(ctx: CephadmContext) -> Any:
        logger.warning(f'Deprecated command used: {func}')
        if NO_DEPRECATED:
            raise Error('running deprecated commands disabled')
        return func(ctx)

    return cast(FuncT, _deprecated_command)

def get_container_info(ctx: CephadmContext, daemon_filter: str, by_name: bool) -> Optional[ContainerInfo]:
    :param ctx: Cephadm context
    :param daemon_filter: daemon name or type
    :param by_name: must be set to True if daemon name is provided
    :return: Container information or None
    def daemon_name_or_type(daemon: Dict[str, str]) -> str:
        return daemon['name'] if by_name else daemon['name'].split('.', 1)[0]

    if by_name and '.' not in daemon_filter:
        logger.warning(f'Trying to get container info using invalid daemon name {daemon_filter}')
        return None
    daemons = list_daemons(ctx, detail=False)
    matching_daemons = [d for d in daemons if daemon_name_or_type(d) == daemon_filter and d['fsid'] == ctx.fsid]
    if matching_daemons:
        d_type, d_id = matching_daemons[0]['name'].split('.', 1)
        out, _, code = get_container_stats(ctx, ctx.container_engine.path, ctx.fsid, d_type, d_id)
        if not code:
            (container_id, image_name, image_id, start, version) = out.strip().split(',')
            return ContainerInfo(container_id, image_name, image_id, start, version)
    return None

def infer_local_ceph_image(ctx: CephadmContext, container_path: str) -> Optional[str]:
     Infer the local ceph image based on the following priority criteria:
       1- the image specified by --image arg (if provided).
       2- the same image as the daemon container specified by --name arg (if provided).
       3- image used by any ceph container running on the host. In this case we use daemon types.
       4- if no container is found then we use the most ceph recent image on the host.

     Note: any selected container must have the same fsid inferred previously.

    :return: The most recent local ceph image (already pulled)
    # '|' special character is used to separate the output fields into:
    #  - Repository@digest
    #  - Image Id
    #  - Image Tag
    #  - Image creation date
    out, _, _ = call_throws(ctx,
                            [container_path, 'images',
                             '--filter', 'label=ceph=True',
                             '--filter', 'dangling=false',
                             '--format', '{{.Repository}}@{{.Digest}}|{{.ID}}|{{.Tag}}|{{.CreatedAt}}'])

    container_info = None
    daemon_name = ctx.name if ('name' in ctx and ctx.name and '.' in ctx.name) else None
    daemons_ls = [daemon_name] if daemon_name is not None else Ceph.daemons  # daemon types: 'mon', 'mgr', etc
    for daemon in daemons_ls:
        container_info = get_container_info(ctx, daemon, daemon_name is not None)
        if container_info is not None:
            logger.debug(f"Using container info for daemon '{daemon}'")

    for image in out.splitlines():
        if image and not image.isspace():
            (digest, image_id, tag, created_date) = image.lstrip().split('|')
            if container_info is not None and image_id not in container_info.image_id:
            if digest and not digest.endswith('@'):
                logger.info(f"Using ceph image with id '{image_id}' and tag '{tag}' created on {created_date}\n{digest}")
                return digest
    return None

def write_tmp(s, uid, gid):
    # type: (str, int, int) -> IO[str]
    tmp_f = tempfile.NamedTemporaryFile(mode='w',
    os.fchown(tmp_f.fileno(), uid, gid)

    return tmp_f

def makedirs(dir, uid, gid, mode):
    # type: (str, int, int, int) -> None
    if not os.path.exists(dir):
        os.makedirs(dir, mode=mode)
        os.chmod(dir, mode)
    os.chown(dir, uid, gid)
    os.chmod(dir, mode)   # the above is masked by umask...

def get_data_dir(fsid, data_dir, t, n):
    # type: (str, str, str, Union[int, str]) -> str
    return os.path.join(data_dir, fsid, '%s.%s' % (t, n))

def get_log_dir(fsid, log_dir):
    # type: (str, str) -> str
    return os.path.join(log_dir, fsid)

def make_data_dir_base(fsid, data_dir, uid, gid):
    # type: (str, str, int, int) -> str
    data_dir_base = os.path.join(data_dir, fsid)
    makedirs(data_dir_base, uid, gid, DATA_DIR_MODE)
    makedirs(os.path.join(data_dir_base, 'crash'), uid, gid, DATA_DIR_MODE)
    makedirs(os.path.join(data_dir_base, 'crash', 'posted'), uid, gid,
    return data_dir_base

def make_data_dir(ctx, fsid, daemon_type, daemon_id, uid=None, gid=None):
    # type: (CephadmContext, str, str, Union[int, str], Optional[int], Optional[int]) -> str
    if uid is None or gid is None:
        uid, gid = extract_uid_gid(ctx)
    make_data_dir_base(fsid, ctx.data_dir, uid, gid)
    data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
    makedirs(data_dir, uid, gid, DATA_DIR_MODE)
    return data_dir

def make_log_dir(ctx, fsid, uid=None, gid=None):
    # type: (CephadmContext, str, Optional[int], Optional[int]) -> str
    if uid is None or gid is None:
        uid, gid = extract_uid_gid(ctx)
    log_dir = get_log_dir(fsid, ctx.log_dir)
    makedirs(log_dir, uid, gid, LOG_DIR_MODE)
    return log_dir

def make_var_run(ctx, fsid, uid, gid):
    # type: (CephadmContext, str, int, int) -> None
    call_throws(ctx, ['install', '-d', '-m0770', '-o', str(uid), '-g', str(gid),
                      '/var/run/ceph/%s' % fsid])

def copy_tree(ctx, src, dst, uid=None, gid=None):
    # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None
    Copy a directory tree from src to dst
    if uid is None or gid is None:
        (uid, gid) = extract_uid_gid(ctx)

    for src_dir in src:
        dst_dir = dst
        if os.path.isdir(dst):
            dst_dir = os.path.join(dst, os.path.basename(src_dir))

        logger.debug('copy directory `%s` -> `%s`' % (src_dir, dst_dir))
        shutil.rmtree(dst_dir, ignore_errors=True)
        shutil.copytree(src_dir, dst_dir)  # dirs_exist_ok needs python 3.8

        for dirpath, dirnames, filenames in os.walk(dst_dir):
            logger.debug('chown %s:%s `%s`' % (uid, gid, dirpath))
            os.chown(dirpath, uid, gid)
            for filename in filenames:
                logger.debug('chown %s:%s `%s`' % (uid, gid, filename))
                os.chown(os.path.join(dirpath, filename), uid, gid)

def copy_files(ctx, src, dst, uid=None, gid=None):
    # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None
    Copy a files from src to dst
    if uid is None or gid is None:
        (uid, gid) = extract_uid_gid(ctx)

    for src_file in src:
        dst_file = dst
        if os.path.isdir(dst):
            dst_file = os.path.join(dst, os.path.basename(src_file))

        logger.debug('copy file `%s` -> `%s`' % (src_file, dst_file))
        shutil.copyfile(src_file, dst_file)

        logger.debug('chown %s:%s `%s`' % (uid, gid, dst_file))
        os.chown(dst_file, uid, gid)

def move_files(ctx, src, dst, uid=None, gid=None):
    # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None
    Move files from src to dst
    if uid is None or gid is None:
        (uid, gid) = extract_uid_gid(ctx)

    for src_file in src:
        dst_file = dst
        if os.path.isdir(dst):
            dst_file = os.path.join(dst, os.path.basename(src_file))

        if os.path.islink(src_file):
            # shutil.move() in py2 does not handle symlinks correctly
            src_rl = os.readlink(src_file)
            logger.debug("symlink '%s' -> '%s'" % (dst_file, src_rl))
            os.symlink(src_rl, dst_file)
            logger.debug("move file '%s' -> '%s'" % (src_file, dst_file))
            shutil.move(src_file, dst_file)
            logger.debug('chown %s:%s `%s`' % (uid, gid, dst_file))
            os.chown(dst_file, uid, gid)

def recursive_chown(path: str, uid: int, gid: int) -> None:
    for dirpath, dirnames, filenames in os.walk(path):
        os.chown(dirpath, uid, gid)
        for filename in filenames:
            os.chown(os.path.join(dirpath, filename), uid, gid)

# copied from distutils
def find_executable(executable: str, path: Optional[str] = None) -> Optional[str]:
    """Tries to find 'executable' in the directories listed in 'path'.
    A string listing directories separated by 'os.pathsep'; defaults to
    os.environ['PATH'].  Returns the complete filename or None if not found.
    _, ext = os.path.splitext(executable)
    if (sys.platform == 'win32') and (ext != '.exe'):
        executable = executable + '.exe'  # pragma: no cover

    if os.path.isfile(executable):
        return executable

    if path is None:
        path = os.environ.get('PATH', None)
        if path is None:
                path = os.confstr('CS_PATH')
            except (AttributeError, ValueError):
                # os.confstr() or CS_PATH is not available
                path = os.defpath
        # bpo-35755: Don't use os.defpath if the PATH environment variable is
        # set to an empty string

    # PATH='' doesn't match, whereas PATH=':' looks in the current directory
    if not path:
        return None

    paths = path.split(os.pathsep)
    for p in paths:
        f = os.path.join(p, executable)
        if os.path.isfile(f):
            # the file exists, we have a shot at spawn working
            return f
    return None

def find_program(filename):
    # type: (str) -> str
    name = find_executable(filename)
    if name is None:
        raise ValueError('%s not found' % filename)
    return name

def find_container_engine(ctx: CephadmContext) -> Optional[ContainerEngine]:
    if ctx.docker:
        return Docker()
        for i in CONTAINER_PREFERENCE:
                return i()
            except Exception:
    return None

def check_container_engine(ctx: CephadmContext) -> ContainerEngine:
    engine = ctx.container_engine
    if not isinstance(engine, CONTAINER_PREFERENCE):
        # See https://github.com/python/mypy/issues/8993
        exes: List[str] = [i.EXE for i in CONTAINER_PREFERENCE]  # type: ignore
        raise Error('No container engine binary found ({}). Try run `apt/dnf/yum/zypper install `'.format(' or '.join(exes)))
    elif isinstance(engine, Podman):
        if engine.version < MIN_PODMAN_VERSION:
            raise Error('podman version %d.%d.%d or later is required' % MIN_PODMAN_VERSION)
    return engine

def get_unit_name(fsid, daemon_type, daemon_id=None):
    # type: (str, str, Optional[Union[int, str]]) -> str
    # accept either name or type + id
    if daemon_id is not None:
        return 'ceph-%s@%s.%s' % (fsid, daemon_type, daemon_id)
        return 'ceph-%s@%s' % (fsid, daemon_type)

def get_unit_name_by_daemon_name(ctx: CephadmContext, fsid: str, name: str) -> str:
    daemon = get_daemon_description(ctx, fsid, name)
        return daemon['systemd_unit']
    except KeyError:
        raise Error('Failed to get unit name for {}'.format(daemon))

def check_unit(ctx, unit_name):
    # type: (CephadmContext, str) -> Tuple[bool, str, bool]
    # NOTE: we ignore the exit code here because systemctl outputs
    # various exit codes based on the state of the service, but the
    # string result is more explicit (and sufficient).
    enabled = False
    installed = False
        out, err, code = call(ctx, ['systemctl', 'is-enabled', unit_name],
        if code == 0:
            enabled = True
            installed = True
        elif 'disabled' in out:
            installed = True
    except Exception as e:
        logger.warning('unable to run systemctl: %s' % e)
        enabled = False
        installed = False

    state = 'unknown'
        out, err, code = call(ctx, ['systemctl', 'is-active', unit_name],
        out = out.strip()
        if out in ['active']:
            state = 'running'
        elif out in ['inactive']:
            state = 'stopped'
        elif out in ['failed', 'auto-restart']:
            state = 'error'
            state = 'unknown'
    except Exception as e:
        logger.warning('unable to run systemctl: %s' % e)
        state = 'unknown'
    return (enabled, state, installed)

def check_units(ctx, units, enabler=None):
    # type: (CephadmContext, List[str], Optional[Packager]) -> bool
    for u in units:
        (enabled, state, installed) = check_unit(ctx, u)
        if enabled and state == 'running':
            logger.info('Unit %s is enabled and running' % u)
            return True
        if enabler is not None:
            if installed:
                logger.info('Enabling unit %s' % u)
    return False

def is_container_running(ctx: CephadmContext, c: 'CephContainer') -> bool:
    if ctx.name.split('.', 1)[0] in ['agent', 'cephadm-exporter']:
        # these are non-containerized daemon types
        return False
    return bool(get_running_container_name(ctx, c))

def get_running_container_name(ctx: CephadmContext, c: 'CephContainer') -> Optional[str]:
    for name in [c.cname, c.old_cname]:
        out, err, ret = call(ctx, [
            ctx.container_engine.path, 'container', 'inspect',
            '--format', '{{.State.Status}}', name
        if out.strip() == 'running':
            return name
    return None

def get_legacy_config_fsid(cluster, legacy_dir=None):
    # type: (str, Optional[str]) -> Optional[str]
    config_file = '/etc/ceph/%s.conf' % cluster
    if legacy_dir is not None:
        config_file = os.path.abspath(legacy_dir + config_file)

    if os.path.exists(config_file):
        config = read_config(config_file)
        if config.has_section('global') and config.has_option('global', 'fsid'):
            return config.get('global', 'fsid')
    return None

def get_legacy_daemon_fsid(ctx, cluster,
                           daemon_type, daemon_id, legacy_dir=None):
    # type: (CephadmContext, str, str, Union[int, str], Optional[str]) -> Optional[str]
    fsid = None
    if daemon_type == 'osd':
            fsid_file = os.path.join(ctx.data_dir,
                                     'ceph-%s' % daemon_id,
            if legacy_dir is not None:
                fsid_file = os.path.abspath(legacy_dir + fsid_file)
            with open(fsid_file, 'r') as f:
                fsid = f.read().strip()
        except IOError:
    if not fsid:
        fsid = get_legacy_config_fsid(cluster, legacy_dir=legacy_dir)
    return fsid

def should_log_to_journald(ctx: CephadmContext) -> bool:
    if ctx.log_to_journald is not None:
        return ctx.log_to_journald
    return isinstance(ctx.container_engine, Podman) and \
        ctx.container_engine.version >= CGROUPS_SPLIT_PODMAN_VERSION

def get_daemon_args(ctx, fsid, daemon_type, daemon_id):
    # type: (CephadmContext, str, str, Union[int, str]) -> List[str]
    r = list()  # type: List[str]

    if daemon_type in Ceph.daemons and daemon_type not in ['crash', 'ceph-exporter']:
        r += [
            '--setuser', 'ceph',
            '--setgroup', 'ceph',
        log_to_journald = should_log_to_journald(ctx)
        if log_to_journald:
            r += [
            r += [
                '--default-log-stderr-prefix=debug ',
        if daemon_type == 'mon':
            r += [
            if log_to_journald:
                r += [
                r += ['--default-mon-cluster-log-to-stderr=true']
    elif daemon_type in Monitoring.components:
        metadata = Monitoring.components[daemon_type]
        r += metadata.get('args', list())
        # set ip and port to bind to for nodeexporter,alertmanager,prometheus
        if daemon_type not in ['grafana', 'loki', 'promtail']:
            ip = ''
            port = Monitoring.port_map[daemon_type][0]
            meta = fetch_meta(ctx)
            if meta:
                if 'ip' in meta and meta['ip']:
                    ip = meta['ip']
                if 'ports' in meta and meta['ports']:
                    port = meta['ports'][0]
            r += [f'--web.listen-address={ip}:{port}']
            if daemon_type == 'prometheus':
                config = fetch_configs(ctx)
                retention_time = config.get('retention_time', '15d')
                retention_size = config.get('retention_size', '0')  # default to disabled
                r += [f'--storage.tsdb.retention.time={retention_time}']
                r += [f'--storage.tsdb.retention.size={retention_size}']
                scheme = 'http'
                host = get_fqdn()
                # in case host is not an fqdn then we use the IP to
                # avoid producing a broken web.external-url link
                if '.' not in host:
                    ipv4_addrs, ipv6_addrs = get_ip_addresses(get_hostname())
                    # use the first ipv4 (if any) otherwise use the first ipv6
                    addr = next(iter(ipv4_addrs or ipv6_addrs), None)
                    host = wrap_ipv6(addr) if addr else host
                r += [f'--web.external-url={scheme}://{host}:{port}']
        if daemon_type == 'alertmanager':
            config = fetch_configs(ctx)
            peers = config.get('peers', list())  # type: ignore
            for peer in peers:
                r += ['--cluster.peer={}'.format(peer)]
                r += [f'--web.config.file={config["web_config"]}']
            except KeyError:
            # some alertmanager, by default, look elsewhere for a config
            r += ['--config.file=/etc/alertmanager/alertmanager.yml']
        if daemon_type == 'promtail':
            r += ['--config.expand-env']
        if daemon_type == 'prometheus':
            config = fetch_configs(ctx)
                r += [f'--web.config.file={config["web_config"]}']
            except KeyError:
        if daemon_type == 'node-exporter':
            config = fetch_configs(ctx)
                r += [f'--web.config.file={config["web_config"]}']
            except KeyError:
            r += ['--path.procfs=/host/proc',
    elif daemon_type == 'jaeger-agent':
    elif daemon_type == NFSGanesha.daemon_type:
        nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id)
        r += nfs_ganesha.get_daemon_args()
    elif daemon_type == CephExporter.daemon_type:
        ceph_exporter = CephExporter.init(ctx, fsid, daemon_id)
    elif daemon_type == HAproxy.daemon_type:
        haproxy = HAproxy.init(ctx, fsid, daemon_id)
        r += haproxy.get_daemon_args()
    elif daemon_type == CustomContainer.daemon_type:
        cc = CustomContainer.init(ctx, fsid, daemon_id)
    elif daemon_type == SNMPGateway.daemon_type:
        sc = SNMPGateway.init(ctx, fsid, daemon_id)

    return r

def create_daemon_dirs(ctx, fsid, daemon_type, daemon_id, uid, gid,
                       config=None, keyring=None):
    # type: (CephadmContext, str, str, Union[int, str], int, int, Optional[str], Optional[str]) ->  None
    data_dir = make_data_dir(ctx, fsid, daemon_type, daemon_id, uid=uid, gid=gid)

    if daemon_type in Ceph.daemons:
        make_log_dir(ctx, fsid, uid=uid, gid=gid)

    if config:
        config_path = os.path.join(data_dir, 'config')
        with write_new(config_path, owner=(uid, gid)) as f:

    if keyring:
        keyring_path = os.path.join(data_dir, 'keyring')
        with write_new(keyring_path, owner=(uid, gid)) as f:

    if daemon_type in Monitoring.components.keys():
        config_json = fetch_configs(ctx)

        # Set up directories specific to the monitoring component
        config_dir = ''
        data_dir_root = ''
        if daemon_type == 'prometheus':
            data_dir_root = get_data_dir(fsid, ctx.data_dir,
                                         daemon_type, daemon_id)
            config_dir = 'etc/prometheus'
            makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
            makedirs(os.path.join(data_dir_root, config_dir, 'alerting'), uid, gid, 0o755)
            makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
            recursive_chown(os.path.join(data_dir_root, 'etc'), uid, gid)
            recursive_chown(os.path.join(data_dir_root, 'data'), uid, gid)
        elif daemon_type == 'grafana':
            data_dir_root = get_data_dir(fsid, ctx.data_dir,
                                         daemon_type, daemon_id)
            config_dir = 'etc/grafana'
            makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
            makedirs(os.path.join(data_dir_root, config_dir, 'certs'), uid, gid, 0o755)
            makedirs(os.path.join(data_dir_root, config_dir, 'provisioning/datasources'), uid, gid, 0o755)
            makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
            touch(os.path.join(data_dir_root, 'data', 'grafana.db'), uid, gid)
        elif daemon_type == 'alertmanager':
            data_dir_root = get_data_dir(fsid, ctx.data_dir,
                                         daemon_type, daemon_id)
            config_dir = 'etc/alertmanager'
            makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
            makedirs(os.path.join(data_dir_root, config_dir, 'data'), uid, gid, 0o755)
        elif daemon_type == 'promtail':
            data_dir_root = get_data_dir(fsid, ctx.data_dir,
                                         daemon_type, daemon_id)
            config_dir = 'etc/promtail'
            makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
            makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
        elif daemon_type == 'loki':
            data_dir_root = get_data_dir(fsid, ctx.data_dir,
                                         daemon_type, daemon_id)
            config_dir = 'etc/loki'
            makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
            makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
        elif daemon_type == 'node-exporter':
            data_dir_root = get_data_dir(fsid, ctx.data_dir,
                                         daemon_type, daemon_id)
            config_dir = 'etc/node-exporter'
            makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
            recursive_chown(os.path.join(data_dir_root, 'etc'), uid, gid)

        # populate the config directory for the component from the config-json
        if 'files' in config_json:
            for fname in config_json['files']:
                # work around mypy wierdness where it thinks `str`s aren't Anys
                # when used for dictionary values! feels like possibly a mypy bug?!
                cfg = cast(Dict[str, Any], config_json['files'])
                content = dict_get_join(cfg, fname)
                if os.path.isabs(fname):
                    fpath = os.path.join(data_dir_root, fname.lstrip(os.path.sep))
                    fpath = os.path.join(data_dir_root, config_dir, fname)
                with write_new(fpath, owner=(uid, gid), encoding='utf-8') as f:

    elif daemon_type == NFSGanesha.daemon_type:
        nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id)
        nfs_ganesha.create_daemon_dirs(data_dir, uid, gid)

    elif daemon_type == CephIscsi.daemon_type:
        ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id)
        ceph_iscsi.create_daemon_dirs(data_dir, uid, gid)

    elif daemon_type == CephNvmeof.daemon_type:
        ceph_nvmeof = CephNvmeof.init(ctx, fsid, daemon_id)
        ceph_nvmeof.create_daemon_dirs(data_dir, uid, gid)

    elif daemon_type == HAproxy.daemon_type:
        haproxy = HAproxy.init(ctx, fsid, daemon_id)
        haproxy.create_daemon_dirs(data_dir, uid, gid)

    elif daemon_type == Keepalived.daemon_type:
        keepalived = Keepalived.init(ctx, fsid, daemon_id)
        keepalived.create_daemon_dirs(data_dir, uid, gid)

    elif daemon_type == CustomContainer.daemon_type:
        cc = CustomContainer.init(ctx, fsid, daemon_id)
        cc.create_daemon_dirs(data_dir, uid, gid)

    elif daemon_type == SNMPGateway.daemon_type:
        sg = SNMPGateway.init(ctx, fsid, daemon_id)

    _write_custom_conf_files(ctx, daemon_type, str(daemon_id), fsid, uid, gid)

def _write_custom_conf_files(ctx: CephadmContext, daemon_type: str, daemon_id: str, fsid: str, uid: int, gid: int) -> None:
    # mostly making this its own function to make unit testing easier
    ccfiles = fetch_custom_config_files(ctx)
    if not ccfiles:
    custom_config_dir = os.path.join(ctx.data_dir, fsid, 'custom_config_files', f'{daemon_type}.{daemon_id}')
    if not os.path.exists(custom_config_dir):
        makedirs(custom_config_dir, uid, gid, 0o755)
    mandatory_keys = ['mount_path', 'content']
    for ccf in ccfiles:
        if all(k in ccf for k in mandatory_keys):
            file_path = os.path.join(custom_config_dir, os.path.basename(ccf['mount_path']))
            with write_new(file_path, owner=(uid, gid), encoding='utf-8') as f:
            # temporary workaround to make custom config files work for tcmu-runner
            # container we deploy with iscsi until iscsi is refactored
            if daemon_type == 'iscsi':
                tcmu_config_dir = custom_config_dir + '.tcmu'
                if not os.path.exists(tcmu_config_dir):
                    makedirs(tcmu_config_dir, uid, gid, 0o755)
                tcmu_file_path = os.path.join(tcmu_config_dir, os.path.basename(ccf['mount_path']))
                with write_new(tcmu_file_path, owner=(uid, gid), encoding='utf-8') as f:

def get_parm(option: str) -> Dict[str, str]:
    js = _get_config_json(option)
    # custom_config_files is a special field that may be in the config
    # dict. It is used for mounting custom config files into daemon's containers
    # and should be accessed through the "fetch_custom_config_files" function.
    # For get_parm we need to discard it.
    js.pop('custom_config_files', None)
    return js

def _get_config_json(option: str) -> Dict[str, Any]:
    if not option:
        return dict()

    global cached_stdin
    if option == '-':
        if cached_stdin is not None:
            j = cached_stdin
            j = sys.stdin.read()
            cached_stdin = j
        # inline json string
        if option[0] == '{' and option[-1] == '}':
            j = option
        # json file
        elif os.path.exists(option):
            with open(option, 'r') as f:
                j = f.read()
            raise Error('Config file {} not found'.format(option))

        js = json.loads(j)
    except ValueError as e:
        raise Error('Invalid JSON in {}: {}'.format(option, e))
        return js

def fetch_meta(ctx: CephadmContext) -> Dict[str, Any]:
    """Return a dict containing metadata about a deployment.
    meta = getattr(ctx, 'meta_properties', None)
    if meta is not None:
        return meta
    mjson = getattr(ctx, 'meta_json', None)
    if mjson is not None:
        meta = json.loads(mjson) or {}
        ctx.meta_properties = meta
        return meta
    return {}

def fetch_configs(ctx: CephadmContext) -> Dict[str, str]:
    """Return a dict containing arbitrary configuration parameters.
    This function filters out the key 'custom_config_files' which
    must not be part of a deployment's configuration key-value pairs.
    To access custom configuration file data, use `fetch_custom_config_files`.
    # ctx.config_blobs is *always* a dict. it is created once when
    # a command is parsed/processed and stored "forever"
    cfg_blobs = getattr(ctx, 'config_blobs', None)
    if cfg_blobs:
        cfg_blobs = dict(cfg_blobs)
        cfg_blobs.pop('custom_config_files', None)
        return cfg_blobs
    # ctx.config_json is the legacy equivalent of config_blobs. it is a
    # string that either contains json or refers to a file name where
    # the file contains json.
    cfg_json = getattr(ctx, 'config_json', None)
    if cfg_json:
        jdata = _get_config_json(cfg_json) or {}
        jdata.pop('custom_config_files', None)
        return jdata
    return {}

def fetch_custom_config_files(ctx: CephadmContext) -> List[Dict[str, Any]]:
    """Return a list containing dicts that can be used to populate
    custom configuration files for containers.
    # NOTE: this function works like the opposite of fetch_configs.
    # instead of filtering out custom_config_files, it returns only
    # the content in that key.
    cfg_blobs = getattr(ctx, 'config_blobs', None)
    if cfg_blobs:
        return cfg_blobs.get('custom_config_files', [])
    cfg_json = getattr(ctx, 'config_json', None)
    if cfg_json:
        jdata = _get_config_json(cfg_json)
        return jdata.get('custom_config_files', [])
    return []

def fetch_tcp_ports(ctx: CephadmContext) -> List[EndPoint]:
    """Return a list of Endpoints, which have a port and ip attribute
    ports = getattr(ctx, 'tcp_ports', None)
    if ports is None:
        ports = []
    if isinstance(ports, str):
        ports = list(map(int, ports.split()))
    port_ips: Dict[str, str] = {}
    port_ips_attr: Union[str, Dict[str, str], None] = getattr(ctx, 'port_ips', None)
    if isinstance(port_ips_attr, str):
        port_ips = json.loads(port_ips_attr)
    elif port_ips_attr is not None:
        # if it's not None or a str, assume it's already the dict we want
        port_ips = port_ips_attr

    endpoints: List[EndPoint] = []
    for port in ports:
        if str(port) in port_ips:
            endpoints.append(EndPoint(port_ips[str(port)], port))
            endpoints.append(EndPoint('', port))

    return endpoints

def get_config_and_keyring(ctx):
    # type: (CephadmContext) -> Tuple[Optional[str], Optional[str]]
    config = None
    keyring = None

    d = fetch_configs(ctx)
    if d:
        config = d.get('config')
        keyring = d.get('keyring')
        if config and keyring:
            return config, keyring

    if 'config' in ctx and ctx.config:
            with open(ctx.config, 'r') as f:
                config = f.read()
        except FileNotFoundError as e:
            raise Error(e)

    if 'key' in ctx and ctx.key:
        keyring = '[%s]\n\tkey = %s\n' % (ctx.name, ctx.key)
    elif 'keyring' in ctx and ctx.keyring:
            with open(ctx.keyring, 'r') as f:
                keyring = f.read()
        except FileNotFoundError as e:
            raise Error(e)

    return config, keyring

def get_container_binds(ctx, fsid, daemon_type, daemon_id):
    # type: (CephadmContext, str, str, Union[int, str, None]) -> List[List[str]]
    binds = list()

    if daemon_type == CephIscsi.daemon_type:
    if daemon_type == CephNvmeof.daemon_type:
    elif daemon_type == CustomContainer.daemon_type:
        assert daemon_id
        cc = CustomContainer.init(ctx, fsid, daemon_id)
        data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)

    return binds

def get_container_mounts(ctx, fsid, daemon_type, daemon_id,
    # type: (CephadmContext, str, str, Union[int, str, None], Optional[bool]) -> Dict[str, str]
    mounts = dict()

    if daemon_type in Ceph.daemons:
        if fsid:
            run_path = os.path.join('/var/run/ceph', fsid)
            if os.path.exists(run_path):
                mounts[run_path] = '/var/run/ceph:z'
            log_dir = get_log_dir(fsid, ctx.log_dir)
            mounts[log_dir] = '/var/log/ceph:z'
            crash_dir = '/var/lib/ceph/%s/crash' % fsid
            if os.path.exists(crash_dir):
                mounts[crash_dir] = '/var/lib/ceph/crash:z'
            if daemon_type != 'crash' and should_log_to_journald(ctx):
                journald_sock_dir = '/run/systemd/journal'
                mounts[journald_sock_dir] = journald_sock_dir

    if daemon_type in Ceph.daemons and daemon_id:
        data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
        if daemon_type == 'rgw':
            cdata_dir = '/var/lib/ceph/radosgw/ceph-rgw.%s' % (daemon_id)
            cdata_dir = '/var/lib/ceph/%s/ceph-%s' % (daemon_type, daemon_id)
        if daemon_type != 'crash':
            mounts[data_dir] = cdata_dir + ':z'
        if not no_config:
            mounts[data_dir + '/config'] = '/etc/ceph/ceph.conf:z'
        if daemon_type in ['rbd-mirror', 'cephfs-mirror', 'crash', 'ceph-exporter']:
            # these do not search for their keyrings in a data directory
            mounts[data_dir + '/keyring'] = '/etc/ceph/ceph.client.%s.%s.keyring' % (daemon_type, daemon_id)

    if daemon_type in ['mon', 'osd', 'clusterless-ceph-volume']:
        mounts['/dev'] = '/dev'  # FIXME: narrow this down?
        mounts['/run/udev'] = '/run/udev'
    if daemon_type in ['osd', 'clusterless-ceph-volume']:
        mounts['/sys'] = '/sys'  # for numa.cc, pick_address, cgroups, ...
        mounts['/run/lvm'] = '/run/lvm'
        mounts['/run/lock/lvm'] = '/run/lock/lvm'
    if daemon_type == 'osd':
        # selinux-policy in the container may not match the host.
        if HostFacts(ctx).selinux_enabled:
            cluster_dir = f'{ctx.data_dir}/{fsid}'
            selinux_folder = f'{cluster_dir}/selinux'
            if os.path.exists(cluster_dir):
                if not os.path.exists(selinux_folder):
                    os.makedirs(selinux_folder, mode=0o755)
                mounts[selinux_folder] = '/sys/fs/selinux:ro'
                logger.error(f'Cluster direcotry {cluster_dir} does not exist.')
        mounts['/'] = '/rootfs'

        if ctx.shared_ceph_folder:  # make easy manager modules/ceph-volume development
            ceph_folder = pathify(ctx.shared_ceph_folder)
            if os.path.exists(ceph_folder):
                mounts[ceph_folder + '/src/ceph-volume/ceph_volume'] = '/usr/lib/python3.6/site-packages/ceph_volume'
                mounts[ceph_folder + '/src/cephadm/cephadm.py'] = '/usr/sbin/cephadm'
                mounts[ceph_folder + '/src/pybind/mgr'] = '/usr/share/ceph/mgr'
                mounts[ceph_folder + '/src/python-common/ceph'] = '/usr/lib/python3.6/site-packages/ceph'
                mounts[ceph_folder + '/monitoring/ceph-mixin/dashboards_out'] = '/etc/grafana/dashboards/ceph-dashboard'
                mounts[ceph_folder + '/monitoring/ceph-mixin/prometheus_alerts.yml'] = '/etc/prometheus/ceph/ceph_default_alerts.yml'
                                             'Ceph shared source folder does not exist.',
    except AttributeError:

    if daemon_type in Monitoring.components and daemon_id:
        data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
        log_dir = get_log_dir(fsid, ctx.log_dir)
        if daemon_type == 'prometheus':
            mounts[os.path.join(data_dir, 'etc/prometheus')] = '/etc/prometheus:Z'
            mounts[os.path.join(data_dir, 'data')] = '/prometheus:Z'
        elif daemon_type == 'loki':
            mounts[os.path.join(data_dir, 'etc/loki')] = '/etc/loki:Z'
            mounts[os.path.join(data_dir, 'data')] = '/loki:Z'
        elif daemon_type == 'promtail':
            mounts[os.path.join(data_dir, 'etc/promtail')] = '/etc/promtail:Z'
            mounts[log_dir] = '/var/log/ceph:z'
            mounts[os.path.join(data_dir, 'data')] = '/promtail:Z'
        elif daemon_type == 'node-exporter':
            mounts[os.path.join(data_dir, 'etc/node-exporter')] = '/etc/node-exporter:Z'
            mounts['/proc'] = '/host/proc:ro'
            mounts['/sys'] = '/host/sys:ro'
            mounts['/'] = '/rootfs:ro'
        elif daemon_type == 'grafana':
            mounts[os.path.join(data_dir, 'etc/grafana/grafana.ini')] = '/etc/grafana/grafana.ini:Z'
            mounts[os.path.join(data_dir, 'etc/grafana/provisioning/datasources')] = '/etc/grafana/provisioning/datasources:Z'
            mounts[os.path.join(data_dir, 'etc/grafana/certs')] = '/etc/grafana/certs:Z'
            mounts[os.path.join(data_dir, 'data/grafana.db')] = '/var/lib/grafana/grafana.db:Z'
        elif daemon_type == 'alertmanager':
            mounts[os.path.join(data_dir, 'etc/alertmanager')] = '/etc/alertmanager:Z'

    if daemon_type == NFSGanesha.daemon_type:
        assert daemon_id
        data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
        nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id)

    if daemon_type == HAproxy.daemon_type:
        assert daemon_id
        data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)

    if daemon_type == CephNvmeof.daemon_type:
        assert daemon_id
        data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)

    if daemon_type == CephIscsi.daemon_type:
        assert daemon_id
        data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
        # Removes ending ".tcmu" from data_dir a tcmu-runner uses the same data_dir
        # as rbd-runner-api
        if data_dir.endswith('.tcmu'):
            data_dir = re.sub(r'\.tcmu$', '', data_dir)
        log_dir = get_log_dir(fsid, ctx.log_dir)
        mounts.update(CephIscsi.get_container_mounts(data_dir, log_dir))

    if daemon_type == Keepalived.daemon_type:
        assert daemon_id
        data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)

    if daemon_type == CustomContainer.daemon_type:
        assert daemon_id
        cc = CustomContainer.init(ctx, fsid, daemon_id)
        data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)

    # Modifications podman makes to /etc/hosts causes issues with
    # certain daemons (specifically referencing "host.containers.internal" entry
    # being added to /etc/hosts in this case). To avoid that, but still
    # allow users to use /etc/hosts for hostname resolution, we can
    # mount the host's /etc/hosts file.
    # https://tracker.ceph.com/issues/58532
    # https://tracker.ceph.com/issues/57018
    if isinstance(ctx.container_engine, Podman):
        if os.path.exists('/etc/hosts'):
            if '/etc/hosts' not in mounts:
                mounts['/etc/hosts'] = '/etc/hosts:ro'

    return mounts

def get_ceph_volume_container(ctx: CephadmContext,
                              privileged: bool = True,
                              cname: str = '',
                              volume_mounts: Dict[str, str] = {},
                              bind_mounts: Optional[List[List[str]]] = None,
                              args: List[str] = [],
                              envs: Optional[List[str]] = None) -> 'CephContainer':
    if envs is None:
        envs = []

    return CephContainer(

def set_pids_limit_unlimited(ctx: CephadmContext, container_args: List[str]) -> None:
    # set container's pids-limit to unlimited rather than default (Docker 4096 / Podman 2048)
    # Useful for daemons like iscsi where the default pids-limit limits the number of luns
    # per iscsi target or rgw where increasing the rgw_thread_pool_size to a value near
    # the default pids-limit may cause the container to crash.
    if (
        isinstance(ctx.container_engine, Podman)
        and ctx.container_engine.version >= PIDS_LIMIT_UNLIMITED_PODMAN_VERSION

def get_container(ctx: CephadmContext,
                  fsid: str, daemon_type: str, daemon_id: Union[int, str],
                  privileged: bool = False,
                  ptrace: bool = False,
                  container_args: Optional[List[str]] = None) -> 'CephContainer':
    entrypoint: str = ''
    name: str = ''
    ceph_args: List[str] = []
    envs: List[str] = []
    host_network: bool = True

    if daemon_type in Ceph.daemons:
    if container_args is None:
        container_args = []
    if daemon_type in Ceph.daemons or daemon_type in Ceph.gateways:
        set_pids_limit_unlimited(ctx, container_args)
    if daemon_type in ['mon', 'osd']:
        # mon and osd need privileged in order for libudev to query devices
        privileged = True
    if daemon_type == 'rgw':
        entrypoint = '/usr/bin/radosgw'
        name = 'client.rgw.%s' % daemon_id
    elif daemon_type == 'rbd-mirror':
        entrypoint = '/usr/bin/rbd-mirror'
        name = 'client.rbd-mirror.%s' % daemon_id
    elif daemon_type == 'cephfs-mirror':
        entrypoint = '/usr/bin/cephfs-mirror'
        name = 'client.cephfs-mirror.%s' % daemon_id
    elif daemon_type == 'crash':
        entrypoint = '/usr/bin/ceph-crash'
        name = 'client.crash.%s' % daemon_id
    elif daemon_type in ['mon', 'mgr', 'mds', 'osd']:
        entrypoint = '/usr/bin/ceph-' + daemon_type
        name = '%s.%s' % (daemon_type, daemon_id)
    elif daemon_type in Monitoring.components:
        entrypoint = ''
    elif daemon_type in Tracing.components:
        entrypoint = ''
        name = '%s.%s' % (daemon_type, daemon_id)
        config = fetch_configs(ctx)
        Tracing.set_configuration(config, daemon_type)
        envs.extend(Tracing.components[daemon_type].get('envs', []))
    elif daemon_type == NFSGanesha.daemon_type:
        entrypoint = NFSGanesha.entrypoint
        name = '%s.%s' % (daemon_type, daemon_id)
    elif daemon_type == CephExporter.daemon_type:
        entrypoint = CephExporter.entrypoint
        name = 'client.ceph-exporter.%s' % daemon_id
    elif daemon_type == HAproxy.daemon_type:
        name = '%s.%s' % (daemon_type, daemon_id)
        container_args.extend(['--user=root'])  # haproxy 2.4 defaults to a different user
    elif daemon_type == Keepalived.daemon_type:
        name = '%s.%s' % (daemon_type, daemon_id)
        container_args.extend(['--cap-add=NET_ADMIN', '--cap-add=NET_RAW'])
    elif daemon_type == CephNvmeof.daemon_type:
        name = '%s.%s' % (daemon_type, daemon_id)
        container_args.extend(['--ulimit', 'memlock=-1:-1'])
        container_args.extend(['--ulimit', 'nofile=10240'])
        container_args.extend(['--cap-add=SYS_ADMIN', '--cap-add=CAP_SYS_NICE'])
    elif daemon_type == CephIscsi.daemon_type:
        entrypoint = CephIscsi.entrypoint
        name = '%s.%s' % (daemon_type, daemon_id)
        # So the container can modprobe iscsi_target_mod and have write perms
        # to configfs we need to make this a privileged container.
        privileged = True
    elif daemon_type == CustomContainer.daemon_type:
        cc = CustomContainer.init(ctx, fsid, daemon_id)
        entrypoint = cc.entrypoint
        host_network = False

    if daemon_type in Monitoring.components:
        uid, gid = extract_uid_gid_monitoring(ctx, daemon_type)
        monitoring_args = [
            # FIXME: disable cpu/memory limits for the time being (not supported
            # by ubuntu 18.04 kernel!)
        if daemon_type == 'node-exporter':
            # in order to support setting '--path.procfs=/host/proc','--path.sysfs=/host/sys',
            # '--path.rootfs=/rootfs' for node-exporter we need to disable selinux separation
            # between the node-exporter container and the host to avoid selinux denials
            container_args.extend(['--security-opt', 'label=disable'])
    elif daemon_type == 'crash':
        ceph_args = ['-n', name]
    elif daemon_type in Ceph.daemons:
        ceph_args = ['-n', name, '-f']
    elif daemon_type == SNMPGateway.daemon_type:
        sg = SNMPGateway.init(ctx, fsid, daemon_id)

    # if using podman, set -d, --conmon-pidfile & --cidfile flags
    # so service can have Type=Forking
    if isinstance(ctx.container_engine, Podman):
        runtime_dir = '/run'
            '-d', '--log-driver', 'journald',
            runtime_dir + '/ceph-%s@%s.%s.service-pid' % (fsid, daemon_type, daemon_id),
            runtime_dir + '/ceph-%s@%s.%s.service-cid' % (fsid, daemon_type, daemon_id),
        if ctx.container_engine.version >= CGROUPS_SPLIT_PODMAN_VERSION and not ctx.no_cgroups_split:
        # if /etc/hosts doesn't exist, we can be confident
        # users aren't using it for host name resolution
        # and adding --no-hosts avoids bugs created in certain daemons
        # by modifications podman makes to /etc/hosts
        # https://tracker.ceph.com/issues/58532
        # https://tracker.ceph.com/issues/57018
        if not os.path.exists('/etc/hosts'):

    return CephContainer.for_daemon(
        args=ceph_args + get_daemon_args(ctx, fsid, daemon_type, daemon_id),
        volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id),
        bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id),

def extract_uid_gid(ctx, img='', file_path='/var/lib/ceph'):
    # type: (CephadmContext, str, Union[str, List[str]]) -> Tuple[int, int]

    if not img:
        img = ctx.image

    if isinstance(file_path, str):
        paths = [file_path]
        paths = file_path

    ex: Optional[Tuple[str, RuntimeError]] = None

    for fp in paths:
            out = CephContainer(
                args=['-c', '%u %g', fp]
            uid, gid = out.split(' ')
            return int(uid), int(gid)
        except RuntimeError as e:
            ex = (fp, e)
    if ex:
        raise Error(f'Failed to extract uid/gid for path {ex[0]}: {ex[1]}')

    raise RuntimeError('uid/gid not found')

def deploy_daemon(ctx: CephadmContext, fsid: str, daemon_type: str,
                  daemon_id: Union[int, str], c: Optional['CephContainer'],
                  uid: int, gid: int, config: Optional[str] = None,
                  keyring: Optional[str] = None, osd_fsid: Optional[str] = None,
                  deployment_type: DeploymentType = DeploymentType.DEFAULT,
                  endpoints: Optional[List[EndPoint]] = None) -> None:

    endpoints = endpoints or []
    # only check port in use if fresh deployment since service
    # we are redeploying/reconfiguring will already be using the port
    if deployment_type == DeploymentType.DEFAULT:
        if any([port_in_use(ctx, e) for e in endpoints]):
            if daemon_type == 'mgr':
                # non-fatal for mgr when we are in mgr_standby_modules=false, but we can't
                # tell whether that is the case here.
                    f"ceph-mgr TCP port(s) {','.join(map(str, endpoints))} already in use"
                raise Error("TCP Port(s) '{}' required for {} already in use".format(','.join(map(str, endpoints)), daemon_type))

    data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
    if deployment_type == DeploymentType.RECONFIG and not os.path.exists(data_dir):
        raise Error('cannot reconfig, data path %s does not exist' % data_dir)
    if daemon_type == 'mon' and not os.path.exists(data_dir):
        assert config
        assert keyring
        # tmp keyring file
        tmp_keyring = write_tmp(keyring, uid, gid)

        # tmp config file
        tmp_config = write_tmp(config, uid, gid)

        # --mkfs
        create_daemon_dirs(ctx, fsid, daemon_type, daemon_id, uid, gid)
        mon_dir = get_data_dir(fsid, ctx.data_dir, 'mon', daemon_id)
        log_dir = get_log_dir(fsid, ctx.log_dir)
                '-i', str(daemon_id),
                '--fsid', fsid,
                '-c', '/tmp/config',
                '--keyring', '/tmp/keyring',
            ] + get_daemon_args(ctx, fsid, 'mon', daemon_id),
                log_dir: '/var/log/ceph:z',
                mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (daemon_id),
                tmp_keyring.name: '/tmp/keyring:z',
                tmp_config.name: '/tmp/config:z',

        # write conf
        with write_new(mon_dir + '/config', owner=(uid, gid)) as f:
        # dirs, conf, keyring
            fsid, daemon_type, daemon_id,
            uid, gid,
            config, keyring)

    # only write out unit files and start daemon
    # with systemd if this is not a reconfig
    if deployment_type != DeploymentType.RECONFIG:
        if daemon_type == CephadmAgent.daemon_type:
            config_js = fetch_configs(ctx)
            assert isinstance(config_js, dict)

            cephadm_agent = CephadmAgent(ctx, fsid, daemon_id)
            if c:
                deploy_daemon_units(ctx, fsid, uid, gid, daemon_type, daemon_id,
                                    c, osd_fsid=osd_fsid, endpoints=endpoints)
                raise RuntimeError('attempting to deploy a daemon without a container image')

    if not os.path.exists(data_dir + '/unit.created'):
        with write_new(data_dir + '/unit.created', owner=(uid, gid)) as f:
            f.write('mtime is time the daemon deployment was created\n')

    with write_new(data_dir + '/unit.configured', owner=(uid, gid)) as f:
        f.write('mtime is time we were last configured\n')

    update_firewalld(ctx, daemon_type)

    # Open ports explicitly required for the daemon
    if endpoints:
        fw = Firewalld(ctx)
        fw.open_ports([e.port for e in endpoints] + fw.external_ports.get(daemon_type, []))

    # If this was a reconfig and the daemon is not a Ceph daemon, restart it
    # so it can pick up potential changes to its configuration files
    if deployment_type == DeploymentType.RECONFIG and daemon_type not in Ceph.daemons:
        # ceph daemons do not need a restart; others (presumably) do to pick
        # up the new config
        call_throws(ctx, ['systemctl', 'reset-failed',
                          get_unit_name(fsid, daemon_type, daemon_id)])
        call_throws(ctx, ['systemctl', 'restart',
                          get_unit_name(fsid, daemon_type, daemon_id)])

def _write_container_cmd_to_bash(ctx, file_obj, container, comment=None, background=False):
    # type: (CephadmContext, IO[str], CephContainer, Optional[str], Optional[bool]) -> None
    if comment:
        # Sometimes adding a comment, especially if there are multiple containers in one
        # unit file, makes it easier to read and grok.
        file_obj.write('# ' + comment + '\n')
    # Sometimes, adding `--rm` to a run_cmd doesn't work. Let's remove the container manually
    file_obj.write('! ' + ' '.join(container.rm_cmd(old_cname=True)) + ' 2> /dev/null\n')
    file_obj.write('! ' + ' '.join(container.rm_cmd()) + ' 2> /dev/null\n')
    # Sometimes, `podman rm` doesn't find the container. Then you'll have to add `--storage`
    if isinstance(ctx.container_engine, Podman):
            '! '
            + ' '.join([shlex.quote(a) for a in container.rm_cmd(storage=True)])
            + ' 2> /dev/null\n')
            '! '
            + ' '.join([shlex.quote(a) for a in container.rm_cmd(old_cname=True, storage=True)])
            + ' 2> /dev/null\n')

    # container run command
        ' '.join([shlex.quote(a) for a in container.run_cmd()])
        + (' &' if background else '') + '\n')

def clean_cgroup(ctx: CephadmContext, fsid: str, unit_name: str) -> None:
    # systemd may fail to cleanup cgroups from previous stopped unit, which will cause next "systemctl start" to fail.
    # see https://tracker.ceph.com/issues/50998

    CGROUPV2_PATH = Path('/sys/fs/cgroup')
    if not (CGROUPV2_PATH / 'system.slice').exists():
        # Only unified cgroup is affected, skip if not the case

    slice_name = 'system-ceph\\x2d{}.slice'.format(fsid.replace('-', '\\x2d'))
    cg_path = CGROUPV2_PATH / 'system.slice' / slice_name / f'{unit_name}.service'
    if not cg_path.exists():

    def cg_trim(path: Path) -> None:
        for p in path.iterdir():
            if p.is_dir():
    except OSError:
        logger.warning(f'Failed to trim old cgroups {cg_path}')

def deploy_daemon_units(
    ctx: CephadmContext,
    fsid: str,
    uid: int,
    gid: int,
    daemon_type: str,
    daemon_id: Union[int, str],
    c: 'CephContainer',
    enable: bool = True,
    start: bool = True,
    osd_fsid: Optional[str] = None,
    endpoints: Optional[List[EndPoint]] = None,
) -> None:
    # cmd

    def add_stop_actions(f: TextIO, timeout: Optional[int]) -> None:
        # following generated script basically checks if the container exists
        # before stopping it. Exit code will be success either if it doesn't
        # exist or if it exists and is stopped successfully.
        container_exists = f'{ctx.container_engine.path} inspect %s &>/dev/null'
        f.write(f'! {container_exists % c.old_cname} || {" ".join(c.stop_cmd(old_cname=True, timeout=timeout))} \n')
        f.write(f'! {container_exists % c.cname} || {" ".join(c.stop_cmd(timeout=timeout))} \n')

    data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
    run_file_path = data_dir + '/unit.run'
    meta_file_path = data_dir + '/unit.meta'
    with write_new(run_file_path) as f, write_new(meta_file_path) as metaf:

        f.write('set -e\n')

        if daemon_type in Ceph.daemons:
            install_path = find_program('install')
            f.write('{install_path} -d -m0770 -o {uid} -g {gid} /var/run/ceph/{fsid}\n'.format(install_path=install_path, fsid=fsid, uid=uid, gid=gid))

        # pre-start cmd(s)
        if daemon_type == 'osd':
            # osds have a pre-start step
            assert osd_fsid
            simple_fn = os.path.join('/etc/ceph/osd',
                                     '%s-%s.json.adopted-by-cephadm' % (daemon_id, osd_fsid))
            if os.path.exists(simple_fn):
                f.write('# Simple OSDs need chown on startup:\n')
                for n in ['block', 'block.db', 'block.wal']:
                    p = os.path.join(data_dir, n)
                    f.write('[ ! -L {p} ] || chown {uid}:{gid} {p}\n'.format(p=p, uid=uid, gid=gid))
                # if ceph-volume does not support 'ceph-volume activate', we must
                # do 'ceph-volume lvm activate'.
                test_cv = get_ceph_volume_container(
                    args=['activate', '--bad-option'],
                    volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id),
                    bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id),
                    cname='ceph-%s-%s.%s-activate-test' % (fsid, daemon_type, daemon_id),
                out, err, ret = call(ctx, test_cv.run_cmd(), verbosity=CallVerbosity.SILENT)
                #  bad: ceph-volume: error: unrecognized arguments: activate --bad-option
                # good: ceph-volume: error: unrecognized arguments: --bad-option
                if 'unrecognized arguments: activate' in err:
                    # older ceph-volume without top-level activate or --no-tmpfs
                    cmd = [
                        'lvm', 'activate',
                        str(daemon_id), osd_fsid,
                    cmd = [
                        '--osd-id', str(daemon_id),
                        '--osd-uuid', osd_fsid,

                prestart = get_ceph_volume_container(
                    volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id),
                    bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id),
                    cname='ceph-%s-%s.%s-activate' % (fsid, daemon_type, daemon_id),
                _write_container_cmd_to_bash(ctx, f, prestart, 'LVM OSDs use ceph-volume lvm activate')
        elif daemon_type == CephIscsi.daemon_type:
            f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=True)) + '\n')
            ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id)
            tcmu_container = ceph_iscsi.get_tcmu_runner_container()
            _write_container_cmd_to_bash(ctx, f, tcmu_container, 'iscsi tcmu-runner container', background=True)

        _write_container_cmd_to_bash(ctx, f, c, '%s.%s' % (daemon_type, str(daemon_id)))

        # some metadata about the deploy
        meta: Dict[str, Any] = fetch_meta(ctx)
            'memory_request': int(ctx.memory_request) if ctx.memory_request else None,
            'memory_limit': int(ctx.memory_limit) if ctx.memory_limit else None,
        if not meta.get('ports'):
            if endpoints:
                meta['ports'] = [e.port for e in endpoints]
                meta['ports'] = []
        metaf.write(json.dumps(meta, indent=4) + '\n')

    timeout = 30 if daemon_type == 'osd' else None
    # post-stop command(s)
    with write_new(data_dir + '/unit.poststop') as f:
        # this is a fallback to eventually stop any underlying container that was not stopped properly by unit.stop,
        # this could happen in very slow setups as described in the issue https://tracker.ceph.com/issues/58242.
        add_stop_actions(cast(TextIO, f), timeout)
        if daemon_type == 'osd':
            assert osd_fsid
            poststop = get_ceph_volume_container(
                    'lvm', 'deactivate',
                    str(daemon_id), osd_fsid,
                volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id),
                bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id),
                cname='ceph-%s-%s.%s-deactivate' % (fsid, daemon_type,
            _write_container_cmd_to_bash(ctx, f, poststop, 'deactivate osd')
        elif daemon_type == CephIscsi.daemon_type:
            # make sure we also stop the tcmu container
            runtime_dir = '/run'
            ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id)
            tcmu_container = ceph_iscsi.get_tcmu_runner_container()
            f.write('! ' + ' '.join(tcmu_container.stop_cmd()) + '\n')
            f.write('! ' + 'rm ' + runtime_dir + '/ceph-%s@%s.%s.service-pid' % (fsid, daemon_type, str(daemon_id) + '.tcmu') + '\n')
            f.write('! ' + 'rm ' + runtime_dir + '/ceph-%s@%s.%s.service-cid' % (fsid, daemon_type, str(daemon_id) + '.tcmu') + '\n')
            f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=False)) + '\n')

    # post-stop command(s)
    with write_new(data_dir + '/unit.stop') as f:
        add_stop_actions(cast(TextIO, f), timeout)

    if c:
        with write_new(data_dir + '/unit.image') as f:
            f.write(c.image + '\n')

    # sysctl
    install_sysctl(ctx, fsid, daemon_type)

    # systemd
    install_base_units(ctx, fsid)
    unit = get_unit_file(ctx, fsid)
    unit_file = 'ceph-%s@.service' % (fsid)
    with write_new(ctx.unit_dir + '/' + unit_file, perms=None) as f:
    call_throws(ctx, ['systemctl', 'daemon-reload'])

    unit_name = get_unit_name(fsid, daemon_type, daemon_id)
    call(ctx, ['systemctl', 'stop', unit_name],
    call(ctx, ['systemctl', 'reset-failed', unit_name],
    if enable:
        call_throws(ctx, ['systemctl', 'enable', unit_name])
    if start:
        clean_cgroup(ctx, fsid, unit_name)
        call_throws(ctx, ['systemctl', 'start', unit_name])

class Firewalld(object):

    # for specifying ports we should always open when opening
    # ports for a daemon of that type. Main use case is for ports
    # that we should open when deploying the daemon type but that
    # the daemon itself may not necessarily need to bind to the port.
    # This needs to be handed differently as we don't want to fail
    # deployment if the port cannot be bound to but we still want to
    # open the port in the firewall.
    external_ports: Dict[str, List[int]] = {
        'iscsi': [3260]  # 3260 is the well known iSCSI port

    def __init__(self, ctx):
        # type: (CephadmContext) -> None
        self.ctx = ctx
        self.available = self.check()

    def check(self):
        # type: () -> bool
        self.cmd = find_executable('firewall-cmd')
        if not self.cmd:
            logger.debug('firewalld does not appear to be present')
            return False
        (enabled, state, _) = check_unit(self.ctx, 'firewalld.service')
        if not enabled:
            logger.debug('firewalld.service is not enabled')
            return False
        if state != 'running':
            logger.debug('firewalld.service is not running')
            return False

        logger.info('firewalld ready')
        return True

    def enable_service_for(self, daemon_type):
        # type: (str) -> None
        if not self.available:
            logger.debug('Not possible to enable service <%s>. firewalld.service is not available' % daemon_type)

        if daemon_type == 'mon':
            svc = 'ceph-mon'
        elif daemon_type in ['mgr', 'mds', 'osd']:
            svc = 'ceph'
        elif daemon_type == NFSGanesha.daemon_type:
            svc = 'nfs'

        if not self.cmd:
            raise RuntimeError('command not defined')

        out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--query-service', svc], verbosity=CallVerbosity.DEBUG)
        if ret:
            logger.info('Enabling firewalld service %s in current zone...' % svc)
            out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--add-service', svc])
            if ret:
                raise RuntimeError(
                    'unable to add service %s to current zone: %s' % (svc, err))
            logger.debug('firewalld service %s is enabled in current zone' % svc)

    def open_ports(self, fw_ports):
        # type: (List[int]) -> None
        if not self.available:
            logger.debug('Not possible to open ports <%s>. firewalld.service is not available' % fw_ports)

        if not self.cmd:
            raise RuntimeError('command not defined')

        for port in fw_ports:
            tcp_port = str(port) + '/tcp'
            out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--query-port', tcp_port], verbosity=CallVerbosity.DEBUG)
            if ret:
                logger.info('Enabling firewalld port %s in current zone...' % tcp_port)
                out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--add-port', tcp_port])
                if ret:
                    raise RuntimeError('unable to add port %s to current zone: %s' %
                                       (tcp_port, err))
                logger.debug('firewalld port %s is enabled in current zone' % tcp_port)

    def close_ports(self, fw_ports):
        # type: (List[int]) -> None
        if not self.available:
            logger.debug('Not possible to close ports <%s>. firewalld.service is not available' % fw_ports)

        if not self.cmd:
            raise RuntimeError('command not defined')

        for port in fw_ports:
            tcp_port = str(port) + '/tcp'
            out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--query-port', tcp_port], verbosity=CallVerbosity.DEBUG)
            if not ret:
                logger.info('Disabling port %s in current zone...' % tcp_port)
                out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--remove-port', tcp_port])
                if ret:
                    raise RuntimeError('unable to remove port %s from current zone: %s' %
                                       (tcp_port, err))
                    logger.info(f'Port {tcp_port} disabled')
                logger.info(f'firewalld port {tcp_port} already closed')

    def apply_rules(self):
        # type: () -> None
        if not self.available:

        if not self.cmd:
            raise RuntimeError('command not defined')

        call_throws(self.ctx, [self.cmd, '--reload'])

def update_firewalld(ctx, daemon_type):
    # type: (CephadmContext, str) -> None
    if not ('skip_firewalld' in ctx and ctx.skip_firewalld):
        firewall = Firewalld(ctx)

def install_sysctl(ctx: CephadmContext, fsid: str, daemon_type: str) -> None:
    Set up sysctl settings
    def _write(conf: Path, lines: List[str]) -> None:
        lines = [
            '# created by cephadm',
        with write_new(conf, owner=None, perms=None) as f:

    conf = Path(ctx.sysctl_dir).joinpath(f'90-ceph-{fsid}-{daemon_type}.conf')
    lines: List = []

    if daemon_type == 'osd':
        lines = OSD.get_sysctl_settings()
    elif daemon_type == 'haproxy':
        lines = HAproxy.get_sysctl_settings()
    elif daemon_type == 'keepalived':
        lines = Keepalived.get_sysctl_settings()
    elif daemon_type == CephNvmeof.daemon_type:
        lines = CephNvmeof.get_sysctl_settings()
    lines = filter_sysctl_settings(ctx, lines)

    # apply the sysctl settings
    if lines:
        Path(ctx.sysctl_dir).mkdir(mode=0o755, exist_ok=True)
        _write(conf, lines)
        call_throws(ctx, ['sysctl', '--system'])

def sysctl_get(ctx: CephadmContext, variable: str) -> Union[str, None]:
    Read a sysctl setting by executing 'sysctl -b {variable}'
    out, err, code = call(ctx, ['sysctl', '-b', variable])
    return out or None

def filter_sysctl_settings(ctx: CephadmContext, lines: List[str]) -> List[str]:
    Given a list of sysctl settings, examine the system's current configuration
    and return those which are not currently set as described.
    def test_setting(desired_line: str) -> bool:
        # Remove any comments
        comment_start = desired_line.find('#')
        if comment_start != -1:
            desired_line = desired_line[:comment_start]
        desired_line = desired_line.strip()
        if not desired_line or desired_line.isspace():
            return False
        setting, desired_value = map(lambda s: s.strip(), desired_line.split('='))
        if not setting or not desired_value:
            return False
        actual_value = sysctl_get(ctx, setting)
        return desired_value != actual_value
    return list(filter(test_setting, lines))

def migrate_sysctl_dir(ctx: CephadmContext, fsid: str) -> None:
    Cephadm once used '/usr/lib/sysctl.d' for storing sysctl configuration.
    This moves it to '/etc/sysctl.d'.
    deprecated_location: str = '/usr/lib/sysctl.d'
    deprecated_confs: List[str] = glob(f'{deprecated_location}/90-ceph-{fsid}-*.conf')
    if not deprecated_confs:

    file_count: int = len(deprecated_confs)
    logger.info(f'Found sysctl {file_count} files in deprecated location {deprecated_location}. Starting Migration.')
    for conf in deprecated_confs:
            shutil.move(conf, ctx.sysctl_dir)
            file_count -= 1
        except shutil.Error as err:
            if str(err).endswith('already exists'):
                logger.warning(f'Destination file already exists. Deleting {conf}.')
                    file_count -= 1
                except OSError as del_err:
                    logger.warning(f'Could not remove {conf}: {del_err}.')
                logger.warning(f'Could not move {conf} from {deprecated_location} to {ctx.sysctl_dir}: {err}')

    # Log successful migration
    if file_count == 0:
        logger.info(f'Successfully migrated sysctl config to {ctx.sysctl_dir}.')

    # Log partially successful / unsuccessful migration
    files_processed: int = len(deprecated_confs)
    if file_count < files_processed:
        status: str = f'partially successful (failed {file_count}/{files_processed})'
    elif file_count == files_processed:
        status = 'unsuccessful'
    logger.warning(f'Migration of sysctl configuration {status}. You may want to perform a migration manually.')

def install_base_units(ctx, fsid):
    # type: (CephadmContext, str) -> None
    Set up ceph.target and ceph-$fsid.target units.
    # global unit
    existed = os.path.exists(ctx.unit_dir + '/ceph.target')
    with write_new(ctx.unit_dir + '/ceph.target', perms=None) as f:
                'Description=All Ceph clusters and services\n'
    if not existed:
        # we disable before enable in case a different ceph.target
        # (from the traditional package) is present; while newer
        # systemd is smart enough to disable the old
        # (/lib/systemd/...) and enable the new (/etc/systemd/...),
        # some older versions of systemd error out with EEXIST.
        call_throws(ctx, ['systemctl', 'disable', 'ceph.target'])
        call_throws(ctx, ['systemctl', 'enable', 'ceph.target'])
        call_throws(ctx, ['systemctl', 'start', 'ceph.target'])

    # cluster unit
    existed = os.path.exists(ctx.unit_dir + '/ceph-%s.target' % fsid)
    with write_new(ctx.unit_dir + f'/ceph-{fsid}.target', perms=None) as f:
            'Description=Ceph cluster {fsid}\n'
            'WantedBy=multi-user.target ceph.target\n'.format(
    if not existed:
        call_throws(ctx, ['systemctl', 'enable', 'ceph-%s.target' % fsid])
        call_throws(ctx, ['systemctl', 'start', 'ceph-%s.target' % fsid])

    # don't overwrite file in order to allow users to manipulate it
    if os.path.exists(ctx.logrotate_dir + f'/ceph-{fsid}'):

    # logrotate for the cluster
    with write_new(ctx.logrotate_dir + f'/ceph-{fsid}', perms=None) as f:
        This is a bit sloppy in that the killall/pkill will touch all ceph daemons
        in all containers, but I don't see an elegant way to send SIGHUP *just* to
        the daemons for this cluster.  (1) systemd kill -s will get the signal to
        podman, but podman will exit.  (2) podman kill will get the signal to the
        first child (bash), but that isn't the ceph daemon.  This is simpler and
        should be harmless.
        targets: List[str] = [

        f.write("""# created by cephadm
/var/log/ceph/%s/*.log {
    rotate 7
        killall -q -1 %s || pkill -1 -x '%s' || true
    su root root
""" % (fsid, ' '.join(targets), '|'.join(targets)))

def get_unit_file(ctx, fsid):
    # type: (CephadmContext, str) -> str
    extra_args = ''
    if isinstance(ctx.container_engine, Podman):
        extra_args = ('ExecStartPre=-/bin/rm -f %t/%n-pid %t/%n-cid\n'
                      'ExecStopPost=-/bin/rm -f %t/%n-pid %t/%n-cid\n'
        if ctx.container_engine.version >= CGROUPS_SPLIT_PODMAN_VERSION:
            extra_args += 'Delegate=yes\n'

    docker = isinstance(ctx.container_engine, Docker)
    u = """# generated by cephadm
Description=Ceph %i for {fsid}

# According to:
#   http://www.freedesktop.org/wiki/Software/systemd/NetworkTarget
# these can be removed once ceph-mon will dynamically change network
# configuration.
After=network-online.target local-fs.target time-sync.target{docker_after}
Wants=network-online.target local-fs.target time-sync.target


ExecStart=/bin/bash {data_dir}/{fsid}/%i/unit.run
ExecStop=-/bin/bash -c 'bash {data_dir}/{fsid}/%i/unit.stop'
ExecStopPost=-/bin/bash {data_dir}/{fsid}/%i/unit.poststop
           # if docker, we depend on docker.service
           docker_after=' docker.service' if docker else '',
           docker_requires='Requires=docker.service\n' if docker else '')

    return u


class CephContainer:
    def __init__(self,
                 ctx: CephadmContext,
                 image: str,
                 entrypoint: str,
                 args: List[str] = [],
                 volume_mounts: Dict[str, str] = {},
                 cname: str = '',
                 container_args: List[str] = [],
                 envs: Optional[List[str]] = None,
                 privileged: bool = False,
                 ptrace: bool = False,
                 bind_mounts: Optional[List[List[str]]] = None,
                 init: Optional[bool] = None,
                 host_network: bool = True,
                 memory_request: Optional[str] = None,
                 memory_limit: Optional[str] = None,
                 ) -> None:
        self.ctx = ctx
        self.image = image
        self.entrypoint = entrypoint
        self.args = args
        self.volume_mounts = volume_mounts
        self._cname = cname
        self.container_args = container_args
        self.envs = envs
        self.privileged = privileged
        self.ptrace = ptrace
        self.bind_mounts = bind_mounts if bind_mounts else []
        self.init = init if init else ctx.container_init
        self.host_network = host_network
        self.memory_request = memory_request
        self.memory_limit = memory_limit

    def for_daemon(cls,
                   ctx: CephadmContext,
                   fsid: str,
                   daemon_type: str,
                   daemon_id: str,
                   entrypoint: str,
                   args: List[str] = [],
                   volume_mounts: Dict[str, str] = {},
                   container_args: List[str] = [],
                   envs: Optional[List[str]] = None,
                   privileged: bool = False,
                   ptrace: bool = False,
                   bind_mounts: Optional[List[List[str]]] = None,
                   init: Optional[bool] = None,
                   host_network: bool = True,
                   memory_request: Optional[str] = None,
                   memory_limit: Optional[str] = None,
                   ) -> 'CephContainer':
        return cls(
            cname='ceph-%s-%s.%s' % (fsid, daemon_type, daemon_id),

    def cname(self) -> str:
        podman adds the current container name to the /etc/hosts
        file. Turns out, python's `socket.getfqdn()` differs from
        `hostname -f`, when we have the container names containing
        dots in it.:

        # podman run --name foo.bar.baz.com ceph/ceph /bin/bash
        [root@sebastians-laptop /]# cat /etc/hosts   localhost
        ::1         localhost   sebastians-laptop foo.bar.baz.com
        [root@sebastians-laptop /]# hostname -f
        [root@sebastians-laptop /]# python3 -c 'import socket; print(socket.getfqdn())'

        Fascinatingly, this doesn't happen when using dashes.
        return self._cname.replace('.', '-')

    def cname(self, val: str) -> None:
        self._cname = val

    def old_cname(self) -> str:
        return self._cname

    def run_cmd(self) -> List[str]:
        cmd_args: List[str] = [
            # some containers (ahem, haproxy) override this, but we want a fast
            # shutdown always (and, more importantly, a successful exit even if we
            # fall back to SIGKILL).

        if isinstance(self.ctx.container_engine, Podman):
            if os.path.exists('/etc/ceph/podman-auth.json'):

        if isinstance(self.ctx.container_engine, Docker):
            cmd_args.extend(['--ulimit', 'nofile=1048576'])

        envs: List[str] = [
            '-e', 'CONTAINER_IMAGE=%s' % self.image,
            '-e', 'NODE_NAME=%s' % get_hostname(),
        vols: List[str] = []
        binds: List[str] = []

        if self.memory_request:
            cmd_args.extend(['-e', 'POD_MEMORY_REQUEST', str(self.memory_request)])
        if self.memory_limit:
            cmd_args.extend(['-e', 'POD_MEMORY_LIMIT', str(self.memory_limit)])
            cmd_args.extend(['--memory', str(self.memory_limit)])

        if self.host_network:
        if self.entrypoint:
            cmd_args.extend(['--entrypoint', self.entrypoint])
        if self.privileged:
                # let OSD etc read block devs that haven't been chowned
        if self.ptrace and not self.privileged:
            # if privileged, the SYS_PTRACE cap is already added
            # in addition, --cap-add and --privileged are mutually
            # exclusive since podman >= 2.0
        if self.init:
            envs += ['-e', 'CEPH_USE_RANDOM_NONCE=1']
        if self.cname:
            cmd_args.extend(['--name', self.cname])
        if self.envs:
            for env in self.envs:
                envs.extend(['-e', env])

        vols = sum(
            [['-v', '%s:%s' % (host_dir, container_dir)]
             for host_dir, container_dir in self.volume_mounts.items()], [])
        binds = sum([['--mount', '{}'.format(','.join(bind))]
                     for bind in self.bind_mounts], [])

        return \
            cmd_args + self.container_args + \
            envs + vols + binds + \
            [self.image] + self.args  # type: ignore

    def shell_cmd(self, cmd: List[str]) -> List[str]:
        cmd_args: List[str] = [
        envs: List[str] = [
            '-e', 'CONTAINER_IMAGE=%s' % self.image,
            '-e', 'NODE_NAME=%s' % get_hostname(),
        vols: List[str] = []
        binds: List[str] = []

        if self.host_network:
        if self.ctx.no_hosts:
        if self.privileged:
                # let OSD etc read block devs that haven't been chowned
        if self.init:
            envs += ['-e', 'CEPH_USE_RANDOM_NONCE=1']
        if self.envs:
            for env in self.envs:
                envs.extend(['-e', env])

        vols = sum(
            [['-v', '%s:%s' % (host_dir, container_dir)]
             for host_dir, container_dir in self.volume_mounts.items()], [])
        binds = sum([['--mount', '{}'.format(','.join(bind))]
                     for bind in self.bind_mounts], [])

        return cmd_args + self.container_args + envs + vols + binds + [
            '--entrypoint', cmd[0],
        ] + cmd[1:]

    def exec_cmd(self, cmd):
        # type: (List[str]) -> List[str]
        cname = get_running_container_name(self.ctx, self)
        if not cname:
            raise Error('unable to find container "{}"'.format(self.cname))
        return [
        ] + self.container_args + [
        ] + cmd

    def rm_cmd(self, old_cname: bool = False, storage: bool = False) -> List[str]:
        ret = [
            'rm', '-f',
        if storage:
        if old_cname:
        return ret

    def stop_cmd(self, old_cname: bool = False, timeout: Optional[int] = None) -> List[str]:
        if timeout is None:
            ret = [
                'stop', self.old_cname if old_cname else self.cname,
            ret = [
                'stop', '-t', f'{timeout}',
                self.old_cname if old_cname else self.cname,
        return ret

    def run(self, timeout=DEFAULT_TIMEOUT, verbosity=CallVerbosity.VERBOSE_ON_FAILURE):
        # type: (Optional[int], CallVerbosity) -> str
        out, _, _ = call_throws(self.ctx, self.run_cmd(),
                                desc=self.entrypoint, timeout=timeout, verbosity=verbosity)
        return out


class MgrListener(Thread):
    def __init__(self, agent: 'CephadmAgent') -> None:
        self.agent = agent
        self.stop = False
        super(MgrListener, self).__init__(target=self.run)

    def run(self) -> None:
        listenSocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        listenSocket.bind(('', int(self.agent.listener_port)))
        ssl_ctx = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
        ssl_ctx.verify_mode = ssl.CERT_REQUIRED
        ssl_ctx.load_cert_chain(self.agent.listener_cert_path, self.agent.listener_key_path)
        secureListenSocket = ssl_ctx.wrap_socket(listenSocket, server_side=True)
        while not self.stop:
                    conn, _ = secureListenSocket.accept()
                except socket.timeout:
                    length: int = int(conn.recv(10).decode())
                except Exception as e:
                    err_str = f'Failed to extract length of payload from message: {e}'
                while True:
                    payload = conn.recv(length).decode()
                    if not payload:
                        data: Dict[Any, Any] = json.loads(payload)
                    except Exception as e:
                        err_str = f'Failed to extract json payload from message: {e}'
                        if 'config' in data:
                        logger.debug(f'Got mgr message {data}')
            except Exception as e:
                logger.error(f'Mgr Listener encountered exception: {e}')

    def shutdown(self) -> None:
        self.stop = True

    def handle_json_payload(self, data: Dict[Any, Any]) -> None:
        self.agent.ack = int(data['counter'])
        if 'config' in data:
            logger.info('Received new config from mgr')
            config = data['config']
            for filename in config:
                if filename in self.agent.required_files:
                    file_path = os.path.join(self.agent.daemon_dir, filename)
                    with write_new(file_path) as f:

class CephadmAgent():

    daemon_type = 'agent'
    default_port = 8498
    loop_interval = 30
    stop = False

    required_files = [

    def __init__(self, ctx: CephadmContext, fsid: str, daemon_id: Union[int, str] = ''):
        self.ctx = ctx
        self.fsid = fsid
        self.daemon_id = daemon_id
        self.starting_port = 14873
        self.target_ip = ''
        self.target_port = ''
        self.host = ''
        self.daemon_dir = os.path.join(ctx.data_dir, self.fsid, f'{self.daemon_type}.{self.daemon_id}')
        self.config_path = os.path.join(self.daemon_dir, 'agent.json')
        self.keyring_path = os.path.join(self.daemon_dir, 'keyring')
        self.ca_path = os.path.join(self.daemon_dir, 'root_cert.pem')
        self.listener_cert_path = os.path.join(self.daemon_dir, 'listener.crt')
        self.listener_key_path = os.path.join(self.daemon_dir, 'listener.key')
        self.listener_port = ''
        self.ack = 1
        self.event = Event()
        self.mgr_listener = MgrListener(self)
        self.ls_gatherer = AgentGatherer(self, lambda: self._get_ls(), 'Ls')
        self.volume_gatherer = AgentGatherer(self, lambda: self._ceph_volume(enhanced=False), 'Volume')
        self.device_enhanced_scan = False
        self.recent_iteration_run_times: List[float] = [0.0, 0.0, 0.0]
        self.recent_iteration_index: int = 0
        self.cached_ls_values: Dict[str, Dict[str, str]] = {}

    def validate(self, config: Dict[str, str] = {}) -> None:
        # check for the required files
        for fname in self.required_files:
            if fname not in config:
                raise Error('required file missing from config: %s' % fname)

    def deploy_daemon_unit(self, config: Dict[str, str] = {}) -> None:
        if not config:
            raise Error('Agent needs a config')
        assert isinstance(config, dict)

        # Create the required config files in the daemons dir, with restricted permissions
        for filename in config:
            if filename in self.required_files:
                file_path = os.path.join(self.daemon_dir, filename)
                with write_new(file_path) as f:

        unit_run_path = os.path.join(self.daemon_dir, 'unit.run')
        with write_new(unit_run_path) as f:

        meta: Dict[str, Any] = fetch_meta(self.ctx)
        meta_file_path = os.path.join(self.daemon_dir, 'unit.meta')
        with write_new(meta_file_path) as f:
            f.write(json.dumps(meta, indent=4) + '\n')

        unit_file_path = os.path.join(self.ctx.unit_dir, self.unit_name())
        with write_new(unit_file_path) as f:

        call_throws(self.ctx, ['systemctl', 'daemon-reload'])
        call(self.ctx, ['systemctl', 'stop', self.unit_name()],
        call(self.ctx, ['systemctl', 'reset-failed', self.unit_name()],
        call_throws(self.ctx, ['systemctl', 'enable', '--now', self.unit_name()])

    def unit_name(self) -> str:
        return '{}.service'.format(get_unit_name(self.fsid, self.daemon_type, self.daemon_id))

    def unit_run(self) -> str:
        py3 = shutil.which('python3')
        binary_path = os.path.realpath(sys.argv[0])
        return ('set -e\n' + f'{py3} {binary_path} agent --fsid {self.fsid} --daemon-id {self.daemon_id} &\n')

    def unit_file(self) -> str:
        return """#generated by cephadm
Description=cephadm agent for cluster {fsid}


ExecStart=/bin/bash {data_dir}/unit.run


    def shutdown(self) -> None:
        self.stop = True
        if self.mgr_listener.is_alive():
        if self.ls_gatherer.is_alive():
        if self.volume_gatherer.is_alive():

    def wakeup(self) -> None:

    def pull_conf_settings(self) -> None:
            with open(self.config_path, 'r') as f:
                config = json.load(f)
                self.target_ip = config['target_ip']
                self.target_port = config['target_port']
                self.loop_interval = int(config['refresh_period'])
                self.starting_port = int(config['listener_port'])
                self.host = config['host']
                use_lsm = config['device_enhanced_scan']
        except Exception as e:
            raise Error(f'Failed to get agent target ip and port from config: {e}')

            with open(self.keyring_path, 'r') as f:
                self.keyring = f.read()
        except Exception as e:
            raise Error(f'Failed to get agent keyring: {e}')

        assert self.target_ip and self.target_port

        self.device_enhanced_scan = False
        if use_lsm.lower() == 'true':
            self.device_enhanced_scan = True
        self.volume_gatherer.update_func(lambda: self._ceph_volume(enhanced=self.device_enhanced_scan))

    def run(self) -> None:

            for _ in range(1001):
                if not port_in_use(self.ctx, EndPoint('', self.starting_port)):
                    self.listener_port = str(self.starting_port)
                self.starting_port += 1
            if not self.listener_port:
                raise Error(f'All 1000 ports starting at {str(self.starting_port - 1001)} taken.')
        except Exception as e:
            raise Error(f'Failed to pick port for agent to listen on: {e}')

        if not self.mgr_listener.is_alive():

        if not self.ls_gatherer.is_alive():

        if not self.volume_gatherer.is_alive():

        ssl_ctx = ssl.create_default_context()
        ssl_ctx.check_hostname = True
        ssl_ctx.verify_mode = ssl.CERT_REQUIRED

        while not self.stop:
            start_time = time.monotonic()
            ack = self.ack

            # part of the networks info is returned as a set which is not JSON
            # serializable. The set must be converted to a list
            networks = list_networks(self.ctx)
            networks_list: Dict[str, Dict[str, List[str]]] = {}
            for key in networks.keys():
                networks_list[key] = {}
                for k, v in networks[key].items():
                    networks_list[key][k] = list(v)

            data = json.dumps({'host': self.host,
                               'ls': (self.ls_gatherer.data if self.ack == self.ls_gatherer.ack
                                      and self.ls_gatherer.data is not None else []),
                               'networks': networks_list,
                               'facts': HostFacts(self.ctx).dump(),
                               'volume': (self.volume_gatherer.data if self.ack == self.volume_gatherer.ack
                                          and self.volume_gatherer.data is not None else ''),
                               'ack': str(ack),
                               'keyring': self.keyring,
                               'port': self.listener_port})
            data = data.encode('ascii')

            url = f'https://{self.target_ip}:{self.target_port}/data/'
                req = Request(url, data, {'Content-Type': 'application/json'})
                send_time = time.monotonic()
                with urlopen(req, context=ssl_ctx) as response:
                    response_str = response.read()
                    response_json = json.loads(response_str)
                    total_request_time = datetime.timedelta(seconds=(time.monotonic() - send_time)).total_seconds()
                    logger.info(f'Received mgr response: "{response_json["result"]}" {total_request_time} seconds after sending request.')
            except Exception as e:
                logger.error(f'Failed to send metadata to mgr: {e}')

            end_time = time.monotonic()
            run_time = datetime.timedelta(seconds=(end_time - start_time))
            self.recent_iteration_run_times[self.recent_iteration_index] = run_time.total_seconds()
            self.recent_iteration_index = (self.recent_iteration_index + 1) % 3
            run_time_average = sum(self.recent_iteration_run_times, 0.0) / len([t for t in self.recent_iteration_run_times if t])

            self.event.wait(max(self.loop_interval - int(run_time_average), 0))

    def _ceph_volume(self, enhanced: bool = False) -> Tuple[str, bool]:
        self.ctx.command = 'inventory --format=json'.split()
        if enhanced:
        self.ctx.fsid = self.fsid

        stream = io.StringIO()
        with redirect_stdout(stream):

        stdout = stream.getvalue()

        if stdout:
            return (stdout, False)
            raise Exception('ceph-volume returned empty value')

    def _daemon_ls_subset(self) -> Dict[str, Dict[str, Any]]:
        # gets a subset of ls info quickly. The results of this will tell us if our
        # cached info is still good or if we need to run the full ls again.
        # for legacy containers, we just grab the full info. For cephadmv1 containers,
        # we only grab enabled, state, mem_usage and container id. If container id has
        # not changed for any daemon, we assume our cached info is good.
        daemons: Dict[str, Dict[str, Any]] = {}
        data_dir = self.ctx.data_dir
        seen_memusage = {}  # type: Dict[str, int]
        out, err, code = call(
            [self.ctx.container_engine.path, 'stats', '--format', '{{.ID}},{{.MemUsage}}', '--no-stream'],
        seen_memusage_cid_len, seen_memusage = _parse_mem_usage(code, out)
        # we need a mapping from container names to ids. Later we will convert daemon
        # names to container names to get daemons container id to see if it has changed
        out, err, code = call(
            [self.ctx.container_engine.path, 'ps', '--format', '{{.ID}},{{.Names}}', '--no-trunc'],
        name_id_mapping: Dict[str, str] = self._parse_container_id_name(code, out)
        for i in os.listdir(data_dir):
            if i in ['mon', 'osd', 'mds', 'mgr']:
                daemon_type = i
                for j in os.listdir(os.path.join(data_dir, i)):
                    if '-' not in j:
                    (cluster, daemon_id) = j.split('-', 1)
                    legacy_unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id)
                    (enabled, state, _) = check_unit(self.ctx, legacy_unit_name)
                    daemons[f'{daemon_type}.{daemon_id}'] = {
                        'style': 'legacy',
                        'name': '%s.%s' % (daemon_type, daemon_id),
                        'fsid': self.ctx.fsid if self.ctx.fsid is not None else 'unknown',
                        'systemd_unit': legacy_unit_name,
                        'enabled': 'true' if enabled else 'false',
                        'state': state,
            elif is_fsid(i):
                fsid = str(i)  # convince mypy that fsid is a str here
                for j in os.listdir(os.path.join(data_dir, i)):
                    if '.' in j and os.path.isdir(os.path.join(data_dir, fsid, j)):
                        (daemon_type, daemon_id) = j.split('.', 1)
                        unit_name = get_unit_name(fsid, daemon_type, daemon_id)
                        (enabled, state, _) = check_unit(self.ctx, unit_name)
                        daemons[j] = {
                            'style': 'cephadm:v1',
                            'systemd_unit': unit_name,
                            'enabled': 'true' if enabled else 'false',
                            'state': state,
                        c = CephContainer.for_daemon(self.ctx, self.ctx.fsid, daemon_type, daemon_id, 'bash')
                        container_id: Optional[str] = None
                        for name in (c.cname, c.old_cname):
                            if name in name_id_mapping:
                                container_id = name_id_mapping[name]
                        daemons[j]['container_id'] = container_id
                        if container_id:
                            daemons[j]['memory_usage'] = seen_memusage.get(container_id[0:seen_memusage_cid_len])
        return daemons

    def _parse_container_id_name(self, code: int, out: str) -> Dict[str, str]:
        # map container names to ids from ps output
        name_id_mapping = {}  # type: Dict[str, str]
        if not code:
            for line in out.splitlines():
                id, name = line.split(',')
                name_id_mapping[name] = id
        return name_id_mapping

    def _get_ls(self) -> Tuple[List[Dict[str, str]], bool]:
        if not self.cached_ls_values:
            logger.info('No cached ls output. Running full daemon ls')
            ls = list_daemons(self.ctx)
            for d in ls:
                self.cached_ls_values[d['name']] = d
            return (ls, True)
            ls_subset = self._daemon_ls_subset()
            need_full_ls = False
            state_change = False
            if set(self.cached_ls_values.keys()) != set(ls_subset.keys()):
                # case for a new daemon in ls or an old daemon no longer appearing.
                # If that happens we need a full ls
                logger.info('Change detected in state of daemons. Running full daemon ls')
                self.cached_ls_values = {}
                ls = list_daemons(self.ctx)
                for d in ls:
                    self.cached_ls_values[d['name']] = d
                return (ls, True)
            for daemon, info in self.cached_ls_values.items():
                if info['style'] == 'legacy':
                    # for legacy containers, ls_subset just grabs all the info
                    self.cached_ls_values[daemon] = ls_subset[daemon]
                    if info['container_id'] != ls_subset[daemon]['container_id']:
                        # case for container id having changed. We need full ls as
                        # info we didn't grab like version and start time could have changed
                        need_full_ls = True

                    # want to know if a daemons state change because in those cases we want
                    # to report back quicker
                    if (
                        self.cached_ls_values[daemon]['enabled'] != ls_subset[daemon]['enabled']
                        or self.cached_ls_values[daemon]['state'] != ls_subset[daemon]['state']
                        state_change = True
                    # if we reach here, container id matched. Update the few values we do track
                    # from ls subset: state, enabled, memory_usage.
                    self.cached_ls_values[daemon]['enabled'] = ls_subset[daemon]['enabled']
                    self.cached_ls_values[daemon]['state'] = ls_subset[daemon]['state']
                    if 'memory_usage' in ls_subset[daemon]:
                        self.cached_ls_values[daemon]['memory_usage'] = ls_subset[daemon]['memory_usage']
            if need_full_ls:
                logger.info('Change detected in state of daemons. Running full daemon ls')
                ls = list_daemons(self.ctx)
                self.cached_ls_values = {}
                for d in ls:
                    self.cached_ls_values[d['name']] = d
                return (ls, True)
                ls = [info for daemon, info in self.cached_ls_values.items()]
                return (ls, state_change)

class AgentGatherer(Thread):
    def __init__(self, agent: 'CephadmAgent', func: Callable, gatherer_type: str = 'Unnamed', initial_ack: int = 0) -> None:
        self.agent = agent
        self.func = func
        self.gatherer_type = gatherer_type
        self.ack = initial_ack
        self.event = Event()
        self.data: Any = None
        self.stop = False
        self.recent_iteration_run_times: List[float] = [0.0, 0.0, 0.0]
        self.recent_iteration_index: int = 0
        super(AgentGatherer, self).__init__(target=self.run)

    def run(self) -> None:
        while not self.stop:
                start_time = time.monotonic()

                ack = self.agent.ack
                change = False
                    self.data, change = self.func()
                except Exception as e:
                    logger.error(f'{self.gatherer_type} Gatherer encountered exception gathering data: {e}')
                    self.data = None
                if ack != self.ack or change:
                    self.ack = ack

                end_time = time.monotonic()
                run_time = datetime.timedelta(seconds=(end_time - start_time))
                self.recent_iteration_run_times[self.recent_iteration_index] = run_time.total_seconds()
                self.recent_iteration_index = (self.recent_iteration_index + 1) % 3
                run_time_average = sum(self.recent_iteration_run_times, 0.0) / len([t for t in self.recent_iteration_run_times if t])

                self.event.wait(max(self.agent.loop_interval - int(run_time_average), 0))
            except Exception as e:
                logger.error(f'{self.gatherer_type} Gatherer encountered exception: {e}')

    def shutdown(self) -> None:
        self.stop = True

    def wakeup(self) -> None:

    def update_func(self, func: Callable) -> None:
        self.func = func

def command_agent(ctx: CephadmContext) -> None:
    agent = CephadmAgent(ctx, ctx.fsid, ctx.daemon_id)

    if not os.path.isdir(agent.daemon_dir):
        raise Error(f'Agent daemon directory {agent.daemon_dir} does not exist. Perhaps agent was never deployed?')



def command_version(ctx):
    # type: (CephadmContext) -> int
    import importlib

        vmod = importlib.import_module('_version')
    except ImportError:
        print('cephadm version UNKNOWN')
        return 1
    _unset = ''
    print('cephadm version {0} ({1}) {2} ({3})'.format(
        getattr(vmod, 'CEPH_GIT_NICE_VER', _unset),
        getattr(vmod, 'CEPH_GIT_VER', _unset),
        getattr(vmod, 'CEPH_RELEASE_NAME', _unset),
        getattr(vmod, 'CEPH_RELEASE_TYPE', _unset),
    return 0


def command_pull(ctx):
    # type: (CephadmContext) -> int

        _pull_image(ctx, ctx.image, ctx.insecure)
    except UnauthorizedRegistryError:
        err_str = 'Failed to pull container image. Check that host(s) are logged into the registry'
        logger.debug(f'Pulling image for `command_pull` failed: {err_str}')
        raise Error(err_str)
    return command_inspect_image(ctx)

def _pull_image(ctx, image, insecure=False):
    # type: (CephadmContext, str, bool) -> None
    logger.info('Pulling container image %s...' % image)

    ignorelist = [
        'error creating read-write layer with ID',
        'net/http: TLS handshake timeout',
        'Digest did not match, expected',

    cmd = [ctx.container_engine.path, 'pull', image]
    if isinstance(ctx.container_engine, Podman):
        if insecure:

        if os.path.exists('/etc/ceph/podman-auth.json'):
    cmd_str = ' '.join(cmd)

    for sleep_secs in [1, 4, 25]:
        out, err, ret = call(ctx, cmd, verbosity=CallVerbosity.QUIET_UNLESS_ERROR)
        if not ret:

        if 'unauthorized' in err:
            raise UnauthorizedRegistryError()

        if not any(pattern in err for pattern in ignorelist):
            raise Error('Failed command: %s' % cmd_str)

        logger.info('`%s` failed transiently. Retrying. waiting %s seconds...' % (cmd_str, sleep_secs))

    raise Error('Failed command: %s: maximum retries reached' % cmd_str)


def command_inspect_image(ctx):
    # type: (CephadmContext) -> int
    out, err, ret = call_throws(ctx, [
        ctx.container_engine.path, 'inspect',
        '--format', '{{.ID}},{{.RepoDigests}}',
    if ret:
        return errno.ENOENT
    info_from = get_image_info_from_inspect(out.strip(), ctx.image)

    ver = CephContainer(ctx, ctx.image, 'ceph', ['--version']).run().strip()
    info_from['ceph_version'] = ver

    print(json.dumps(info_from, indent=4, sort_keys=True))
    return 0

def normalize_image_digest(digest: str) -> str:
    Normal case:
    >>> normalize_image_digest('ceph/ceph', 'docker.io')

    No change:
    >>> normalize_image_digest('quay.ceph.io/ceph/ceph', 'docker.io')

    >>> normalize_image_digest('docker.io/ubuntu', 'docker.io')

    >>> normalize_image_digest('localhost/ceph', 'docker.io')
    known_shortnames = [
    for image in known_shortnames:
        if digest.startswith(image):
            return f'{DEFAULT_REGISTRY}/{digest}'
    return digest

def get_image_info_from_inspect(out, image):
    # type: (str, str) -> Dict[str, Union[str,List[str]]]
    image_id, digests = out.split(',', 1)
    if not out:
        raise Error('inspect {}: empty result'.format(image))
    r = {
        'image_id': normalize_container_id(image_id)
    }  # type: Dict[str, Union[str,List[str]]]
    if digests:
        r['repo_digests'] = list(map(normalize_image_digest, digests[1: -1].split(' ')))
    return r


def check_subnet(subnets: str) -> Tuple[int, List[int], str]:
    """Determine whether the given string is a valid subnet

    :param subnets: subnet string, a single definition or comma separated list of CIDR subnets
    :returns: return code, IP version list of the subnets and msg describing any errors validation errors

    rc = 0
    versions = set()
    errors = []
    subnet_list = subnets.split(',')
    for subnet in subnet_list:
        # ensure the format of the string is as expected address/netmask
        subnet = subnet.strip()
        if not re.search(r'\/\d+$', subnet):
            rc = 1
            errors.append(f'{subnet} is not in CIDR format (address/netmask)')
            v = ipaddress.ip_network(subnet).version
        except ValueError as e:
            rc = 1
            errors.append(f'{subnet} invalid: {str(e)}')

    return rc, list(versions), ', '.join(errors)

def unwrap_ipv6(address):
    # type: (str) -> str
    if address.startswith('[') and address.endswith(']'):
        return address[1: -1]
    return address

def wrap_ipv6(address):
    # type: (str) -> str

    # We cannot assume it's already wrapped or even an IPv6 address if
    # it's already wrapped it'll not pass (like if it's a hostname) and trigger
    # the ValueError
        if ipaddress.ip_address(address).version == 6:
            return f'[{address}]'
    except ValueError:

    return address

def is_ipv6(address):
    # type: (str) -> bool
    address = unwrap_ipv6(address)
        return ipaddress.ip_address(address).version == 6
    except ValueError:
        logger.warning('Address: {} is not a valid IP address'.format(address))
        return False

def ip_in_subnets(ip_addr: str, subnets: str) -> bool:
    """Determine if the ip_addr belongs to any of the subnets list."""
    subnet_list = [x.strip() for x in subnets.split(',')]
    for subnet in subnet_list:
        ip_address = unwrap_ipv6(ip_addr) if is_ipv6(ip_addr) else ip_addr
        if ipaddress.ip_address(ip_address) in ipaddress.ip_network(subnet):
            return True
    return False

def parse_mon_addrv(addrv_arg: str) -> List[EndPoint]:
    """Parse mon-addrv param into a list of mon end points."""
    r = re.compile(r':(\d+)$')
    addrv_args = []
    addr_arg = addrv_arg
    if addr_arg[0] != '[' or addr_arg[-1] != ']':
        raise Error(f'--mon-addrv value {addr_arg} must use square brackets')

    for addr in addr_arg[1: -1].split(','):
        hasport = r.findall(addr)
        if not hasport:
            raise Error(f'--mon-addrv value {addr_arg} must include port number')
        port_str = hasport[0]
        addr = re.sub(r'^v\d+:', '', addr)  # strip off v1: or v2: prefix
        base_ip = addr[0:-(len(port_str)) - 1]
        addrv_args.append(EndPoint(base_ip, int(port_str)))

    return addrv_args

def parse_mon_ip(mon_ip: str) -> List[EndPoint]:
    """Parse mon-ip param into a list of mon end points."""
    r = re.compile(r':(\d+)$')
    addrv_args = []
    hasport = r.findall(mon_ip)
    if hasport:
        port_str = hasport[0]
        base_ip = mon_ip[0:-(len(port_str)) - 1]
        addrv_args.append(EndPoint(base_ip, int(port_str)))
        # No port provided: use fixed ports for ceph monitor
        addrv_args.append(EndPoint(mon_ip, 3300))
        addrv_args.append(EndPoint(mon_ip, 6789))

    return addrv_args

def build_addrv_params(addrv: List[EndPoint]) -> str:
    """Convert mon end-points (ip:port) into the format: [v[1|2]:ip:port1]"""
    if len(addrv) > 2:
        raise Error('Detected a local mon-addrv list with more than 2 entries.')
    port_to_ver: Dict[int, str] = {6789: 'v1', 3300: 'v2'}
    addr_arg_list: List[str] = []
    for ep in addrv:
        if ep.port in port_to_ver:
            ver = port_to_ver[ep.port]
            ver = 'v2'  # default mon protocol version if port is not provided
            logger.warning(f'Using msgr2 protocol for unrecognized port {ep}')

    addr_arg = '[{0}]'.format(','.join(addr_arg_list))
    return addr_arg

def get_public_net_from_cfg(ctx: CephadmContext) -> Optional[str]:
    """Get mon public network from configuration file."""
    cp = read_config(ctx.config)
    if not cp.has_option('global', 'public_network'):
        return None

    # Ensure all public CIDR networks are valid
    public_network = cp.get('global', 'public_network').strip('"').strip("'")
    rc, _, err_msg = check_subnet(public_network)
    if rc:
        raise Error(f'Invalid public_network {public_network} parameter: {err_msg}')

    # Ensure all public CIDR networks are configured locally
    configured_subnets = set([x.strip() for x in public_network.split(',')])
    local_subnets = set([x[0] for x in list_networks(ctx).items()])
    valid_public_net = False
    for net in configured_subnets:
        if net in local_subnets:
            valid_public_net = True
            logger.warning(f'The public CIDR network {net} (from -c conf file) is not configured locally.')
    if not valid_public_net:
        raise Error(f'None of the public CIDR network(s) {configured_subnets} (from -c conf file) is configured locally.')

    # Ensure public_network is compatible with the provided mon-ip (or mon-addrv)
    if ctx.mon_ip:
        if not ip_in_subnets(ctx.mon_ip, public_network):
            raise Error(f'The provided --mon-ip {ctx.mon_ip} does not belong to any public_network(s) {public_network}')
    elif ctx.mon_addrv:
        addrv_args = parse_mon_addrv(ctx.mon_addrv)
        for addrv in addrv_args:
            if not ip_in_subnets(addrv.ip, public_network):
                raise Error(f'The provided --mon-addrv {addrv.ip} ip does not belong to any public_network(s) {public_network}')

    logger.debug(f'Using mon public network from configuration file {public_network}')
    return public_network

def infer_mon_network(ctx: CephadmContext, mon_eps: List[EndPoint]) -> Optional[str]:
    """Infer mon public network from local network."""
    # Make sure IP is configured locally, and then figure out the CIDR network
    mon_networks = []
    for net, ifaces in list_networks(ctx).items():
        # build local_ips list for the specified network
        local_ips: List[Union[ipaddress.IPv4Address, ipaddress.IPv6Address]] = []
        for _, ls in ifaces.items():
            local_ips.extend([ipaddress.ip_address(ip) for ip in ls])

        # check if any of mon ips belong to this net
        for mon_ep in mon_eps:
                if ipaddress.ip_address(unwrap_ipv6(mon_ep.ip)) in local_ips:
                    logger.info(f'Mon IP `{mon_ep.ip}` is in CIDR network `{net}`')
            except ValueError as e:
                logger.warning(f'Cannot infer CIDR network for mon IP `{mon_ep.ip}` : {e}')

    if not mon_networks:
        raise Error('Cannot infer CIDR network. Pass --skip-mon-network to configure it later')
        logger.debug(f'Inferred mon public CIDR from local network configuration {mon_networks}')

    mon_networks = list(set(mon_networks))  # remove duplicates
    return ','.join(mon_networks)

def prepare_mon_addresses(ctx: CephadmContext) -> Tuple[str, bool, Optional[str]]:
    """Get mon public network configuration."""
    ipv6 = False
    addrv_args: List[EndPoint] = []
    mon_addrv: str = ''  # i.e: [v2:,v1:]

    if ctx.mon_ip:
        ipv6 = is_ipv6(ctx.mon_ip)
        if ipv6:
            ctx.mon_ip = wrap_ipv6(ctx.mon_ip)
        addrv_args = parse_mon_ip(ctx.mon_ip)
        mon_addrv = build_addrv_params(addrv_args)
    elif ctx.mon_addrv:
        ipv6 = ctx.mon_addrv.count('[') > 1
        addrv_args = parse_mon_addrv(ctx.mon_addrv)
        mon_addrv = ctx.mon_addrv
        raise Error('must specify --mon-ip or --mon-addrv')

    if addrv_args:
        for end_point in addrv_args:
            check_ip_port(ctx, end_point)

    logger.debug(f'Base mon IP(s) is {addrv_args}, mon addrv is {mon_addrv}')
    mon_network = None
    if not ctx.skip_mon_network:
        mon_network = get_public_net_from_cfg(ctx) or infer_mon_network(ctx, addrv_args)

    return (mon_addrv, ipv6, mon_network)

def prepare_cluster_network(ctx: CephadmContext) -> Tuple[str, bool]:
    # the cluster network may not exist on this node, so all we can do is
    # validate that the address given is valid ipv4 or ipv6 subnet
    ipv6_cluster_network = False
    cp = read_config(ctx.config)
    cluster_network = ctx.cluster_network
    if cluster_network is None and cp.has_option('global', 'cluster_network'):
        cluster_network = cp.get('global', 'cluster_network').strip('"').strip("'")

    if cluster_network:
        cluster_nets = set([x.strip() for x in cluster_network.split(',')])
        local_subnets = set([x[0] for x in list_networks(ctx).items()])
        for net in cluster_nets:
            if net not in local_subnets:
                logger.warning(f'The cluster CIDR network {net} is not configured locally.')

        rc, versions, err_msg = check_subnet(cluster_network)
        if rc:
            raise Error(f'Invalid --cluster-network parameter: {err_msg}')
        ipv6_cluster_network = True if 6 in versions else False
        logger.info('Internal network (--cluster-network) has not '
                    'been provided, OSD replication will default to '
                    'the public_network')

    return cluster_network, ipv6_cluster_network

def create_initial_keys(
    ctx: CephadmContext,
    uid: int, gid: int,
    mgr_id: str
) -> Tuple[str, str, str, Any, Any]:  # type: ignore

    _image = ctx.image

    # create some initial keys
    logger.info('Creating initial keys...')
    mon_key = CephContainer(
    admin_key = CephContainer(
    mgr_key = CephContainer(

    keyring = ('[mon.]\n'
               '\tkey = %s\n'
               '\tcaps mon = allow *\n'
               '\tkey = %s\n'
               '\tcaps mon = allow *\n'
               '\tcaps mds = allow *\n'
               '\tcaps mgr = allow *\n'
               '\tcaps osd = allow *\n'
               '\tkey = %s\n'
               '\tcaps mon = profile mgr\n'
               '\tcaps mds = allow *\n'
               '\tcaps osd = allow *\n'
               % (mon_key, admin_key, mgr_id, mgr_key))

    admin_keyring = write_tmp('[client.admin]\n'
                              '\tkey = ' + admin_key + '\n',
                              uid, gid)

    # tmp keyring file
    bootstrap_keyring = write_tmp(keyring, uid, gid)
    return (mon_key, mgr_key, admin_key,
            bootstrap_keyring, admin_keyring)

def create_initial_monmap(
    ctx: CephadmContext,
    uid: int, gid: int,
    fsid: str,
    mon_id: str, mon_addr: str
) -> Any:
    logger.info('Creating initial monmap...')
    monmap = write_tmp('', 0, 0)
    out = CephContainer(
            '--fsid', fsid,
            '--addv', mon_id, mon_addr,
            monmap.name: '/tmp/monmap:z',
    logger.debug(f'monmaptool for {mon_id} {mon_addr} on {out}')

    # pass monmap file to ceph user for use by ceph-mon --mkfs below
    os.fchown(monmap.fileno(), uid, gid)
    return monmap

def prepare_create_mon(
    ctx: CephadmContext,
    uid: int, gid: int,
    fsid: str, mon_id: str,
    bootstrap_keyring_path: str,
    monmap_path: str
) -> Tuple[str, str]:
    logger.info('Creating mon...')
    create_daemon_dirs(ctx, fsid, 'mon', mon_id, uid, gid)
    mon_dir = get_data_dir(fsid, ctx.data_dir, 'mon', mon_id)
    log_dir = get_log_dir(fsid, ctx.log_dir)
    out = CephContainer(
            '-i', mon_id,
            '--fsid', fsid,
            '-c', '/dev/null',
            '--monmap', '/tmp/monmap',
            '--keyring', '/tmp/keyring',
        ] + get_daemon_args(ctx, fsid, 'mon', mon_id),
            log_dir: '/var/log/ceph:z',
            mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id),
            bootstrap_keyring_path: '/tmp/keyring:z',
            monmap_path: '/tmp/monmap:z',
    logger.debug(f'create mon.{mon_id} on {out}')
    return (mon_dir, log_dir)

def create_mon(
    ctx: CephadmContext,
    uid: int, gid: int,
    fsid: str, mon_id: str
) -> None:
    mon_c = get_container(ctx, fsid, 'mon', mon_id)
    ctx.meta_properties = {'service_name': 'mon'}
    deploy_daemon(ctx, fsid, 'mon', mon_id, mon_c, uid, gid,
                  config=None, keyring=None)

def wait_for_mon(
    ctx: CephadmContext,
    mon_id: str, mon_dir: str,
    admin_keyring_path: str, config_path: str
) -> None:
    logger.info('Waiting for mon to start...')
    c = CephContainer(
            mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id),
            admin_keyring_path: '/etc/ceph/ceph.client.admin.keyring:z',
            config_path: '/etc/ceph/ceph.conf:z',

    # wait for the service to become available
    def is_mon_available():
        # type: () -> bool
        timeout = ctx.timeout if ctx.timeout else 60  # seconds
        out, err, ret = call(ctx, c.run_cmd(),
        return ret == 0

    is_available(ctx, 'mon', is_mon_available)

def create_mgr(
    ctx: CephadmContext,
    uid: int, gid: int,
    fsid: str, mgr_id: str, mgr_key: str,
    config: str, clifunc: Callable
) -> None:
    logger.info('Creating mgr...')
    mgr_keyring = '[mgr.%s]\n\tkey = %s\n' % (mgr_id, mgr_key)
    mgr_c = get_container(ctx, fsid, 'mgr', mgr_id)
    # Note:the default port used by the Prometheus node exporter is opened in fw
    ctx.meta_properties = {'service_name': 'mgr'}
    endpoints = [EndPoint('', 9283), EndPoint('', 8765)]
    if not ctx.skip_monitoring_stack:
        endpoints.append(EndPoint('', 8443))
    deploy_daemon(ctx, fsid, 'mgr', mgr_id, mgr_c, uid, gid,
                  config=config, keyring=mgr_keyring, endpoints=endpoints)

    # wait for the service to become available
    logger.info('Waiting for mgr to start...')

    def is_mgr_available():
        # type: () -> bool
        timeout = ctx.timeout if ctx.timeout else 60  # seconds
            out = clifunc(['status', '-f', 'json-pretty'],
            j = json.loads(out)
            return j.get('mgrmap', {}).get('available', False)
        except Exception as e:
            logger.debug('status failed: %s' % e)
            return False

    is_available(ctx, 'mgr', is_mgr_available)

def prepare_ssh(
    ctx: CephadmContext,
    cli: Callable, wait_for_mgr_restart: Callable
) -> None:

    cli(['cephadm', 'set-user', ctx.ssh_user])

    if ctx.ssh_config:
        logger.info('Using provided ssh config...')
        mounts = {
            pathify(ctx.ssh_config.name): '/tmp/cephadm-ssh-config:z',
        cli(['cephadm', 'set-ssh-config', '-i', '/tmp/cephadm-ssh-config'], extra_mounts=mounts)

    if ctx.ssh_private_key and ctx.ssh_public_key:
        logger.info('Using provided ssh keys...')
        mounts = {
            pathify(ctx.ssh_private_key.name): '/tmp/cephadm-ssh-key:z',
            pathify(ctx.ssh_public_key.name): '/tmp/cephadm-ssh-key.pub:z'
        cli(['cephadm', 'set-priv-key', '-i', '/tmp/cephadm-ssh-key'], extra_mounts=mounts)
        cli(['cephadm', 'set-pub-key', '-i', '/tmp/cephadm-ssh-key.pub'], extra_mounts=mounts)
        ssh_pub = cli(['cephadm', 'get-pub-key'])
        authorize_ssh_key(ssh_pub, ctx.ssh_user)
    elif ctx.ssh_private_key and ctx.ssh_signed_cert:
        logger.info('Using provided ssh private key and signed cert ...')
        mounts = {
            pathify(ctx.ssh_private_key.name): '/tmp/cephadm-ssh-key:z',
            pathify(ctx.ssh_signed_cert.name): '/tmp/cephadm-ssh-key-cert.pub:z'
        cli(['cephadm', 'set-priv-key', '-i', '/tmp/cephadm-ssh-key'], extra_mounts=mounts)
        cli(['cephadm', 'set-signed-cert', '-i', '/tmp/cephadm-ssh-key-cert.pub'], extra_mounts=mounts)
        logger.info('Generating ssh key...')
        cli(['cephadm', 'generate-key'])
        ssh_pub = cli(['cephadm', 'get-pub-key'])
        with open(ctx.output_pub_ssh_key, 'w') as f:
        logger.info('Wrote public SSH key to %s' % ctx.output_pub_ssh_key)
        authorize_ssh_key(ssh_pub, ctx.ssh_user)

    host = get_hostname()
    logger.info('Adding host %s...' % host)
        args = ['orch', 'host', 'add', host]
        if ctx.mon_ip:
        elif ctx.mon_addrv:
            addrv_args = parse_mon_addrv(ctx.mon_addrv)
    except RuntimeError as e:
        raise Error('Failed to add host <%s>: %s' % (host, e))

    for t in ['mon', 'mgr']:
        if not ctx.orphan_initial_daemons:
            logger.info('Deploying %s service with default placement...' % t)
            cli(['orch', 'apply', t])
            logger.info('Deploying unmanaged %s service...' % t)
            cli(['orch', 'apply', t, '--unmanaged'])

    if not ctx.orphan_initial_daemons:
        logger.info('Deploying crash service with default placement...')
        cli(['orch', 'apply', 'crash'])

    if not ctx.skip_monitoring_stack:
        for t in ['ceph-exporter', 'prometheus', 'grafana', 'node-exporter', 'alertmanager']:
            logger.info('Deploying %s service with default placement...' % t)
                cli(['orch', 'apply', t])
            except RuntimeError:
                ctx.error_code = -errno.EINVAL
                logger.error(f'Failed to apply service type {t}. '
                             'Perhaps the ceph version being bootstrapped does not support it')

    if ctx.with_centralized_logging:
        for t in ['loki', 'promtail']:
            logger.info('Deploying %s service with default placement...' % t)
                cli(['orch', 'apply', t])
            except RuntimeError:
                ctx.error_code = -errno.EINVAL
                logger.error(f'Failed to apply service type {t}. '
                             'Perhaps the ceph version being bootstrapped does not support it')

def enable_cephadm_mgr_module(
    cli: Callable, wait_for_mgr_restart: Callable
) -> None:

    logger.info('Enabling cephadm module...')
    cli(['mgr', 'module', 'enable', 'cephadm'])
    logger.info('Setting orchestrator backend to cephadm...')
    cli(['orch', 'set', 'backend', 'cephadm'])

def prepare_dashboard(
    ctx: CephadmContext,
    uid: int, gid: int,
    cli: Callable, wait_for_mgr_restart: Callable
) -> None:

    # Configure SSL port (cephadm only allows to configure dashboard SSL port)
    # if the user does not want to use SSL he can change this setting once the cluster is up
    cli(['config', 'set', 'mgr', 'mgr/dashboard/ssl_server_port', str(ctx.ssl_dashboard_port)])

    # configuring dashboard parameters
    logger.info('Enabling the dashboard module...')
    cli(['mgr', 'module', 'enable', 'dashboard'])

    # dashboard crt and key
    if ctx.dashboard_key and ctx.dashboard_crt:
        logger.info('Using provided dashboard certificate...')
        mounts = {
            pathify(ctx.dashboard_crt.name): '/tmp/dashboard.crt:z',
            pathify(ctx.dashboard_key.name): '/tmp/dashboard.key:z'
        cli(['dashboard', 'set-ssl-certificate', '-i', '/tmp/dashboard.crt'], extra_mounts=mounts)
        cli(['dashboard', 'set-ssl-certificate-key', '-i', '/tmp/dashboard.key'], extra_mounts=mounts)
        logger.info('Generating a dashboard self-signed certificate...')
        cli(['dashboard', 'create-self-signed-cert'])

    logger.info('Creating initial admin user...')
    password = ctx.initial_dashboard_password or generate_password()
    tmp_password_file = write_tmp(password, uid, gid)
    cmd = ['dashboard', 'ac-user-create', ctx.initial_dashboard_user, '-i', '/tmp/dashboard.pw', 'administrator', '--force-password']
    if not ctx.dashboard_password_noupdate:
    cli(cmd, extra_mounts={pathify(tmp_password_file.name): '/tmp/dashboard.pw:z'})
    logger.info('Fetching dashboard port number...')
    out = cli(['config', 'get', 'mgr', 'mgr/dashboard/ssl_server_port'])
    port = int(out)

    # Open dashboard port
    if not ('skip_firewalld' in ctx and ctx.skip_firewalld):
        fw = Firewalld(ctx)

    logger.info('Ceph Dashboard is now available at:\n\n'
                '\t     URL: https://%s:%s/\n'
                '\t    User: %s\n'
                '\tPassword: %s\n' % (
                    get_fqdn(), port,

def prepare_bootstrap_config(
    ctx: CephadmContext,
    fsid: str, mon_addr: str, image: str

) -> str:

    cp = read_config(ctx.config)
    if not cp.has_section('global'):
    cp.set('global', 'fsid', fsid)
    cp.set('global', 'mon_host', mon_addr)
    cp.set('global', 'container_image', image)

    if not cp.has_section('mon'):
    if (
            not cp.has_option('mon', 'auth_allow_insecure_global_id_reclaim')
            and not cp.has_option('mon', 'auth allow insecure global id reclaim')
        cp.set('mon', 'auth_allow_insecure_global_id_reclaim', 'false')

    if ctx.single_host_defaults:
        logger.info('Adjusting default settings to suit single-host cluster...')
        # replicate across osds, not hosts
        if (
                not cp.has_option('global', 'osd_crush_chooseleaf_type')
                and not cp.has_option('global', 'osd crush chooseleaf type')
            cp.set('global', 'osd_crush_chooseleaf_type', '0')
        # replica 2x
        if (
                not cp.has_option('global', 'osd_pool_default_size')
                and not cp.has_option('global', 'osd pool default size')
            cp.set('global', 'osd_pool_default_size', '2')
        # disable mgr standby modules (so we can colocate multiple mgrs on one host)
        if not cp.has_section('mgr'):
        if (
                not cp.has_option('mgr', 'mgr_standby_modules')
                and not cp.has_option('mgr', 'mgr standby modules')
            cp.set('mgr', 'mgr_standby_modules', 'false')
    if ctx.log_to_file:
        cp.set('global', 'log_to_file', 'true')
        cp.set('global', 'log_to_stderr', 'false')
        cp.set('global', 'log_to_journald', 'false')
        cp.set('global', 'mon_cluster_log_to_file', 'true')
        cp.set('global', 'mon_cluster_log_to_stderr', 'false')
        cp.set('global', 'mon_cluster_log_to_journald', 'false')

    cpf = StringIO()
    config = cpf.getvalue()

    if ctx.registry_json or ctx.registry_url:

    return config

def finish_bootstrap_config(
    ctx: CephadmContext,
    fsid: str,
    config: str,
    mon_id: str, mon_dir: str,
    mon_network: Optional[str], ipv6: bool,
    cli: Callable,
    cluster_network: Optional[str], ipv6_cluster_network: bool

) -> None:
    if not ctx.no_minimize_config:
        logger.info('Assimilating anything we can from ceph.conf...')
            'config', 'assimilate-conf',
            '-i', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
        ], {
            mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
        logger.info('Generating new minimal ceph.conf...')
            'config', 'generate-minimal-conf',
            '-o', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
        ], {
            mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
        # re-read our minimized config
        with open(mon_dir + '/config', 'r') as f:
            config = f.read()
        logger.info('Restarting the monitor...')
        call_throws(ctx, [
            get_unit_name(fsid, 'mon', mon_id)
    elif 'image' in ctx and ctx.image:
        # we still want to assimilate the given container image if provided
        cli(['config', 'set', 'global', 'container_image', f'{ctx.image}'])

    if mon_network:
        cp = read_config(ctx.config)
        cfg_section = 'global' if cp.has_option('global', 'public_network') else 'mon'
        logger.info(f'Setting public_network to {mon_network} in {cfg_section} config section')
        cli(['config', 'set', cfg_section, 'public_network', mon_network])

    if cluster_network:
        logger.info(f'Setting cluster_network to {cluster_network}')
        cli(['config', 'set', 'global', 'cluster_network', cluster_network])

    if ipv6 or ipv6_cluster_network:
        logger.info('Enabling IPv6 (ms_bind_ipv6) binding')
        cli(['config', 'set', 'global', 'ms_bind_ipv6', 'true'])

    with open(ctx.output_config, 'w') as f:
    logger.info('Wrote config to %s' % ctx.output_config)

def _extract_host_info_from_applied_spec(f: Iterable[str]) -> List[Dict[str, str]]:
    # overall goal of this function is to go through an applied spec and find
    # the hostname (and addr is provided) for each host spec in the applied spec.
    # Generally, we should be able to just pass the spec to the mgr module where
    # proper yaml parsing can happen, but for host specs in particular we want to
    # be able to distribute ssh keys, which requires finding the hostname (and addr
    # if possible) for each potential host spec in the applied spec.

    specs: List[List[str]] = []
    current_spec: List[str] = []
    for line in f:
        if re.search(r'^---\s+', line):
            if current_spec:
            current_spec = []
            line = line.strip()
            if line:
    if current_spec:

    host_specs: List[List[str]] = []
    for spec in specs:
        for line in spec:
            if 'service_type' in line:
                    _, type = line.split(':')
                    type = type.strip()
                    if type == 'host':
                except ValueError as e:
                    spec_str = '\n'.join(spec)
                    logger.error(f'Failed to pull service_type from spec:\n{spec_str}. Got error: {e}')
            spec_str = '\n'.join(spec)
            logger.error(f'Failed to find service_type within spec:\n{spec_str}')

    host_dicts = []
    for s in host_specs:
        host_dict = _extract_host_info_from_spec(s)
        # if host_dict is empty here, we failed to pull the hostname
        # for the host from the spec. This should have already been logged
        # so at this point we just don't want to include it in our output
        if host_dict:

    return host_dicts

def _extract_host_info_from_spec(host_spec: List[str]) -> Dict[str, str]:
    # note:for our purposes here, we only really want the hostname
    # and address of the host from each of these specs in order to
    # be able to distribute ssh keys. We will later apply the spec
    # through the mgr module where proper yaml parsing can be done
    # The returned dicts from this function should only contain
    # one or two entries, one (required) for hostname, one (optional) for addr
    # {
    #   hostname: 
    #   addr: 
    # }
    # if we fail to find the hostname, an empty dict is returned

    host_dict = {}  # type: Dict[str, str]
    for line in host_spec:
        for field in ['hostname', 'addr']:
            if field in line:
                    _, field_value = line.split(':')
                    field_value = field_value.strip()
                    host_dict[field] = field_value
                except ValueError as e:
                    spec_str = '\n'.join(host_spec)
                    logger.error(f'Error trying to pull {field} from host spec:\n{spec_str}. Got error: {e}')

    if 'hostname' not in host_dict:
        spec_str = '\n'.join(host_spec)
        logger.error(f'Could not find hostname in host spec:\n{spec_str}')
        return {}
    return host_dict

def _distribute_ssh_keys(ctx: CephadmContext, host_info: Dict[str, str], bootstrap_hostname: str) -> int:
    # copy ssh key to hosts in host spec (used for apply spec)
    if ctx.ssh_public_key:
        ssh_key = ctx.ssh_public_key.name

    if bootstrap_hostname != host_info['hostname']:
        if 'addr' in host_info:
            addr = host_info['addr']
            addr = host_info['hostname']
        out, err, code = call(ctx, ['sudo', '-u', ctx.ssh_user, 'ssh-copy-id', '-f', '-i', ssh_key, '-o StrictHostKeyChecking=no', '%s@%s' % (ctx.ssh_user, addr)])
        if code:
            logger.error('\nCopying ssh key to host %s at address %s failed!\n' % (host_info['hostname'], addr))
            return 1
            logger.info('Added ssh key to host %s at address %s' % (host_info['hostname'], addr))
    return 0

def save_cluster_config(ctx: CephadmContext, uid: int, gid: int, fsid: str) -> None:
    """Save cluster configuration to the per fsid directory """
    def copy_file(src: str, dst: str) -> None:
        if src:
            shutil.copyfile(src, dst)

    conf_dir = f'{ctx.data_dir}/{fsid}/{CEPH_CONF_DIR}'
    makedirs(conf_dir, uid, gid, DATA_DIR_MODE)
    if os.path.exists(conf_dir):
        logger.info(f'Saving cluster configuration to {conf_dir} directory')
        copy_file(ctx.output_config, os.path.join(conf_dir, CEPH_CONF))
        copy_file(ctx.output_keyring, os.path.join(conf_dir, CEPH_KEYRING))
        # ctx.output_pub_ssh_key may not exist if user has provided custom ssh keys
        if (os.path.exists(ctx.output_pub_ssh_key)):
            copy_file(ctx.output_pub_ssh_key, os.path.join(conf_dir, CEPH_PUBKEY))
        logger.warning(f'Cannot create cluster configuration directory {conf_dir}')

def rollback(func: FuncT) -> FuncT:
    def _rollback(ctx: CephadmContext) -> Any:
            return func(ctx)
        except ClusterAlreadyExists:
            # another cluster with the provided fsid already exists: don't remove.
        except (KeyboardInterrupt, Exception) as e:
            logger.error(f'{type(e).__name__}: {e}')
            if ctx.cleanup_on_failure:
                            '\tCephadm hit an issue during cluster installation. Current cluster files will be deleted automatically,\n'
                            '\tto disable this behaviour do not pass the --cleanup-on-failure flag. In case of any previous\n'
                            '\tbroken installation user must use the following command to completely delete the broken cluster:\n\n'
                            '\t> cephadm rm-cluster --force --zap-osds --fsid \n\n'
                            '\tfor more information please refer to https://docs.ceph.com/en/latest/cephadm/operations/#purging-a-cluster\n'
                _rm_cluster(ctx, keep_logs=False, zap_osds=False)
                            '\tCephadm hit an issue during cluster installation. Current cluster files will NOT BE DELETED automatically to change\n'
                            '\tthis behaviour you can pass the --cleanup-on-failure. To remove this broken cluster manually please run:\n\n'
                            f'\t   > cephadm rm-cluster --force --fsid {ctx.fsid}\n\n'
                            '\tin case of any previous broken installation user must use the rm-cluster command to delete the broken cluster:\n\n'
                            '\t   > cephadm rm-cluster --force --zap-osds --fsid \n\n'
                            '\tfor more information please refer to https://docs.ceph.com/en/latest/cephadm/operations/#purging-a-cluster\n'
    return cast(FuncT, _rollback)

def command_bootstrap(ctx):
    # type: (CephadmContext) -> int

    ctx.error_code = 0

    if not ctx.output_config:
        ctx.output_config = os.path.join(ctx.output_dir, CEPH_CONF)
    if not ctx.output_keyring:
        ctx.output_keyring = os.path.join(ctx.output_dir, CEPH_KEYRING)
    if not ctx.output_pub_ssh_key:
        ctx.output_pub_ssh_key = os.path.join(ctx.output_dir, CEPH_PUBKEY)

    if (
        (bool(ctx.ssh_private_key) is not bool(ctx.ssh_public_key))
        and (bool(ctx.ssh_private_key) is not bool(ctx.ssh_signed_cert))
        raise Error('--ssh-private-key must be passed with either --ssh-public-key in the case of standard pubkey '
                    'authentication or with --ssh-signed-cert in the case of CA signed signed keys or not provided at all.')

    if (bool(ctx.ssh_public_key) and bool(ctx.ssh_signed_cert)):
        raise Error('--ssh-public-key and --ssh-signed-cert are mututally exclusive. --ssh-public-key is intended '
                    'for standard pubkey encryption where the public key is set as an authorized key on cluster hosts. '
                    '--ssh-signed-cert is intended for the CA signed keys use case where cluster hosts are configured to trust '
                    'a CA pub key and authentication during SSH is done by authenticating the signed cert, requiring no '
                    'public key to be installed on the cluster hosts.')

    if ctx.fsid:
        data_dir_base = os.path.join(ctx.data_dir, ctx.fsid)
        if os.path.exists(data_dir_base):
            raise ClusterAlreadyExists(f"A cluster with the same fsid '{ctx.fsid}' already exists.")
            logger.warning('Specifying an fsid for your cluster offers no advantages and may increase the likelihood of fsid conflicts.')

    # initial vars
    ctx.fsid = ctx.fsid or make_fsid()
    fsid = ctx.fsid
    if not is_fsid(fsid):
        raise Error('not an fsid: %s' % fsid)

    # verify output files
    for f in [ctx.output_config, ctx.output_keyring, ctx.output_pub_ssh_key]:
        if not ctx.allow_overwrite:
            if os.path.exists(f):
                raise ClusterAlreadyExists('%s already exists; delete or pass --allow-overwrite to overwrite' % f)
        dirname = os.path.dirname(f)
        if dirname and not os.path.exists(dirname):
            fname = os.path.basename(f)
            logger.info(f'Creating directory {dirname} for {fname}')
                # use makedirs to create intermediate missing dirs
                os.makedirs(dirname, 0o755)
            except PermissionError:
                raise Error(f'Unable to create {dirname} due to permissions failure. Retry with root, or sudo or preallocate the directory.')

    (user_conf, _) = get_config_and_keyring(ctx)

    if ctx.ssh_user != 'root':

    if not ctx.skip_prepare_host:
        logger.info('Skip prepare_host')

    logger.info('Cluster fsid: %s' % fsid)
    hostname = get_hostname()
    if '.' in hostname and not ctx.allow_fqdn_hostname:
        raise Error('hostname is a fully qualified domain name (%s); either fix (e.g., "sudo hostname %s" or similar) or pass --allow-fqdn-hostname' % (hostname, hostname.split('.')[0]))
    mon_id = ctx.mon_id or get_short_hostname()
    mgr_id = ctx.mgr_id or generate_service_id()

    lock = FileLock(ctx, fsid)

    (addr_arg, ipv6, mon_network) = prepare_mon_addresses(ctx)
    cluster_network, ipv6_cluster_network = prepare_cluster_network(ctx)

    config = prepare_bootstrap_config(ctx, fsid, addr_arg, ctx.image)

    if not ctx.skip_pull:
            _pull_image(ctx, ctx.image)
        except UnauthorizedRegistryError:
            err_str = 'Failed to pull container image. Check that correct registry credentials are provided in bootstrap by --registry-url, --registry-username, --registry-password, or supply --registry-json with credentials'
            logger.debug(f'Pulling image for bootstrap on {hostname} failed: {err_str}')
            raise Error(err_str)

    image_ver = CephContainer(ctx, ctx.image, 'ceph', ['--version']).run().strip()
    logger.info(f'Ceph version: {image_ver}')

    if not ctx.allow_mismatched_release:
        image_release = image_ver.split()[4]
        if image_release not in \
            raise Error(
                f'Container release {image_release} != cephadm release {DEFAULT_IMAGE_RELEASE};'
                ' please use matching version of cephadm (pass --allow-mismatched-release to continue anyway)'

    logger.info('Extracting ceph user uid/gid from container image...')
    (uid, gid) = extract_uid_gid(ctx)

    # create some initial keys
    (mon_key, mgr_key, admin_key, bootstrap_keyring, admin_keyring) = create_initial_keys(ctx, uid, gid, mgr_id)

    monmap = create_initial_monmap(ctx, uid, gid, fsid, mon_id, addr_arg)
    (mon_dir, log_dir) = prepare_create_mon(ctx, uid, gid, fsid, mon_id,
                                            bootstrap_keyring.name, monmap.name)

    with write_new(mon_dir + '/config', owner=(uid, gid)) as f:

    make_var_run(ctx, fsid, uid, gid)
    create_mon(ctx, uid, gid, fsid, mon_id)

    # config to issue various CLI commands
    tmp_config = write_tmp(config, uid, gid)

    # a CLI helper to reduce our typing
    def cli(cmd, extra_mounts={}, timeout=DEFAULT_TIMEOUT, verbosity=CallVerbosity.VERBOSE_ON_FAILURE):
        # type: (List[str], Dict[str, str], Optional[int], CallVerbosity) -> str
        mounts = {
            log_dir: '/var/log/ceph:z',
            admin_keyring.name: '/etc/ceph/ceph.client.admin.keyring:z',
            tmp_config.name: '/etc/ceph/ceph.conf:z',
        for k, v in extra_mounts.items():
            mounts[k] = v
        timeout = timeout or ctx.timeout
        return CephContainer(
        ).run(timeout=timeout, verbosity=verbosity)

    wait_for_mon(ctx, mon_id, mon_dir, admin_keyring.name, tmp_config.name)

    finish_bootstrap_config(ctx, fsid, config, mon_id, mon_dir,
                            mon_network, ipv6, cli,
                            cluster_network, ipv6_cluster_network)

    # output files
    with write_new(ctx.output_keyring) as f:
                '\tkey = ' + admin_key + '\n')
    logger.info('Wrote keyring to %s' % ctx.output_keyring)

    # create mgr
    create_mgr(ctx, uid, gid, fsid, mgr_id, mgr_key, config, cli)

    if user_conf:
        # user given config settings were already assimilated earlier
        # but if the given settings contained any attributes in
        # the mgr (e.g. mgr/cephadm/container_image_prometheus)
        # they don't seem to be stored if there isn't a mgr yet.
        # Since re-assimilating the same conf settings should be
        # idempotent we can just do it again here.
        with tempfile.NamedTemporaryFile(buffering=0) as tmp:
            cli(['config', 'assimilate-conf',
                 '-i', '/var/lib/ceph/user.conf'],
                {tmp.name: '/var/lib/ceph/user.conf:z'})

    # wait for mgr to restart (after enabling a module)
    def wait_for_mgr_restart() -> None:
        # first get latest mgrmap epoch from the mon.  try newer 'mgr
        # stat' command first, then fall back to 'mgr dump' if
        # necessary
            j = json_loads_retry(lambda: cli(['mgr', 'stat'], verbosity=CallVerbosity.QUIET_UNLESS_ERROR))
        except Exception:
            j = json_loads_retry(lambda: cli(['mgr', 'dump'], verbosity=CallVerbosity.QUIET_UNLESS_ERROR))
        epoch = j['epoch']

        # wait for mgr to have it
        logger.info('Waiting for the mgr to restart...')

        def mgr_has_latest_epoch():
            # type: () -> bool
                out = cli(['tell', 'mgr', 'mgr_status'])
                j = json.loads(out)
                return j['mgrmap_epoch'] >= epoch
            except Exception as e:
                logger.debug('tell mgr mgr_status failed: %s' % e)
                return False
        is_available(ctx, 'mgr epoch %d' % epoch, mgr_has_latest_epoch)

    enable_cephadm_mgr_module(cli, wait_for_mgr_restart)

    # ssh
    if not ctx.skip_ssh:
        prepare_ssh(ctx, cli, wait_for_mgr_restart)

    if ctx.registry_url and ctx.registry_username and ctx.registry_password:
        registry_credentials = {'url': ctx.registry_url, 'username': ctx.registry_username, 'password': ctx.registry_password}
        cli(['config-key', 'set', 'mgr/cephadm/registry_credentials', json.dumps(registry_credentials)])

    cli(['config', 'set', 'mgr', 'mgr/cephadm/container_init', str(ctx.container_init), '--force'])

    if not ctx.skip_dashboard:
        prepare_dashboard(ctx, uid, gid, cli, wait_for_mgr_restart)

    if ctx.output_config == CEPH_DEFAULT_CONF and not ctx.skip_admin_label and not ctx.no_minimize_config:
        logger.info('Enabling client.admin keyring and conf on hosts with "admin" label')
            cli(['orch', 'client-keyring', 'set', 'client.admin', 'label:_admin'])
            cli(['orch', 'host', 'label', 'add', get_hostname(), '_admin'])
        except Exception:
            logger.info('Unable to set up "admin" label; assuming older version of Ceph')

    if ctx.apply_spec:
        logger.info('Applying %s to cluster' % ctx.apply_spec)
        # copy ssh key to hosts in spec file
        with open(ctx.apply_spec) as f:
            host_dicts = _extract_host_info_from_applied_spec(f)
            for h in host_dicts:
                if ctx.ssh_signed_cert:
                    logger.info('Key distribution is not supported for signed CA key setups. Skipping ...')
                    _distribute_ssh_keys(ctx, h, hostname)

        mounts = {}
        mounts[pathify(ctx.apply_spec)] = '/tmp/spec.yml:ro'
            out = cli(['orch', 'apply', '-i', '/tmp/spec.yml'], extra_mounts=mounts)
        except Exception:
            ctx.error_code = -errno.EINVAL
            logger.info('\nApplying %s to cluster failed!\n' % ctx.apply_spec)

    save_cluster_config(ctx, uid, gid, fsid)

    # enable autotune for osd_memory_target
    logger.info('Enabling autotune for osd_memory_target')
    cli(['config', 'set', 'osd', 'osd_memory_target_autotune', 'true'])

    # Notify the Dashboard to show the 'Expand cluster' page on first log in.
    cli(['config-key', 'set', 'mgr/dashboard/cluster/status', 'INSTALLED'])

    logger.info('You can access the Ceph CLI as following in case of multi-cluster or non-default config:\n\n'
                '\tsudo %s shell --fsid %s -c %s -k %s\n' % (

    logger.info('Or, if you are only running a single cluster on this host:\n\n\tsudo %s shell \n' % (sys.argv[0]))

    logger.info('Please consider enabling telemetry to help improve Ceph:\n\n'
                '\tceph telemetry on\n\n'
                'For more information see:\n\n'
    logger.info('Bootstrap complete.')
    return ctx.error_code


def command_registry_login(ctx: CephadmContext) -> int:
    if ctx.registry_json:
        logger.info('Pulling custom registry login info from %s.' % ctx.registry_json)
        d = get_parm(ctx.registry_json)
        if d.get('url') and d.get('username') and d.get('password'):
            ctx.registry_url = d.get('url')
            ctx.registry_username = d.get('username')
            ctx.registry_password = d.get('password')
            registry_login(ctx, ctx.registry_url, ctx.registry_username, ctx.registry_password)
            raise Error('json provided for custom registry login did not include all necessary fields. '
                        'Please setup json file as\n'
                        ' "url": "REGISTRY_URL",\n'
                        ' "username": "REGISTRY_USERNAME",\n'
                        ' "password": "REGISTRY_PASSWORD"\n'
    elif ctx.registry_url and ctx.registry_username and ctx.registry_password:
        registry_login(ctx, ctx.registry_url, ctx.registry_username, ctx.registry_password)
        raise Error('Invalid custom registry arguments received. To login to a custom registry include '
                    '--registry-url, --registry-username and --registry-password '
                    'options or --registry-json option')
    return 0

def registry_login(ctx: CephadmContext, url: Optional[str], username: Optional[str], password: Optional[str]) -> None:
    logger.info('Logging into custom registry.')
        engine = ctx.container_engine
        cmd = [engine.path, 'login',
               '-u', username, '-p', password,
        if isinstance(engine, Podman):
        out, _, _ = call_throws(ctx, cmd)
        if isinstance(engine, Podman):
            os.chmod('/etc/ceph/podman-auth.json', DEFAULT_MODE)
    except Exception:
        raise Error('Failed to login to custom registry @ %s as %s with given password' % (ctx.registry_url, ctx.registry_username))


def extract_uid_gid_monitoring(ctx, daemon_type):
    # type: (CephadmContext, str) -> Tuple[int, int]

    if daemon_type == 'prometheus':
        uid, gid = extract_uid_gid(ctx, file_path='/etc/prometheus')
    elif daemon_type == 'node-exporter':
        uid, gid = 65534, 65534
    elif daemon_type == 'grafana':
        uid, gid = extract_uid_gid(ctx, file_path='/var/lib/grafana')
    elif daemon_type == 'loki':
        uid, gid = extract_uid_gid(ctx, file_path='/etc/loki')
    elif daemon_type == 'promtail':
        uid, gid = extract_uid_gid(ctx, file_path='/etc/promtail')
    elif daemon_type == 'alertmanager':
        uid, gid = extract_uid_gid(ctx, file_path=['/etc/alertmanager', '/etc/prometheus'])
        raise Error('{} not implemented yet'.format(daemon_type))
    return uid, gid

def get_deployment_container(ctx: CephadmContext,
                             fsid: str, daemon_type: str, daemon_id: Union[int, str],
                             privileged: bool = False,
                             ptrace: bool = False,
                             container_args: Optional[List[str]] = None) -> 'CephContainer':
    # wrapper for get_container specifically for containers made during the `cephadm deploy`
    # command. Adds some extra things such as extra container args and custom config files
    c = get_container(ctx, fsid, daemon_type, daemon_id, privileged, ptrace, container_args)
    if 'extra_container_args' in ctx and ctx.extra_container_args:
    if 'extra_entrypoint_args' in ctx and ctx.extra_entrypoint_args:
    ccfiles = fetch_custom_config_files(ctx)
    if ccfiles:
        mandatory_keys = ['mount_path', 'content']
        for conf in ccfiles:
            if all(k in conf for k in mandatory_keys):
                mount_path = conf['mount_path']
                file_path = os.path.join(
                c.volume_mounts[file_path] = mount_path
    return c

def get_deployment_type(ctx: CephadmContext, daemon_type: str, daemon_id: str) -> DeploymentType:
    deployment_type: DeploymentType = DeploymentType.DEFAULT
    if ctx.reconfig:
        deployment_type = DeploymentType.RECONFIG
    unit_name = get_unit_name(ctx.fsid, daemon_type, daemon_id)
    (_, state, _) = check_unit(ctx, unit_name)
    if state == 'running' or is_container_running(ctx, CephContainer.for_daemon(ctx, ctx.fsid, daemon_type, daemon_id, 'bash')):
        # if reconfig was set, that takes priority over redeploy. If
        # this is considered a fresh deployment at this stage,
        # mark it as a redeploy to avoid port checking
        if deployment_type == DeploymentType.DEFAULT:
            deployment_type = DeploymentType.REDEPLOY

    logger.info(f'{deployment_type.value} daemon {ctx.name} ...')

    return deployment_type

def command_deploy(ctx):
    # type: (CephadmContext) -> None

def read_configuration_source(ctx: CephadmContext) -> Dict[str, Any]:
    """Read a JSON configuration based on the `ctx.source` value."""
    source = '-'
    if 'source' in ctx and ctx.source:
        source = ctx.source
    if source == '-':
        config_data = json.load(sys.stdin)
        with open(source, 'rb') as fh:
            config_data = json.load(fh)
    logger.debug('Loaded deploy configuration: %r', config_data)
    return config_data

def apply_deploy_config_to_ctx(
    config_data: Dict[str, Any],
    ctx: CephadmContext,
) -> None:
    """Bind properties taken from the config_data dictionary to our ctx,
    similar to how cli options on `deploy` are bound to the context.
    ctx.name = config_data['name']
    image = config_data.get('image', '')
    if image:
        ctx.image = image
    if 'fsid' in config_data:
        ctx.fsid = config_data['fsid']
    if 'meta' in config_data:
        ctx.meta_properties = config_data['meta']
    if 'config_blobs' in config_data:
        ctx.config_blobs = config_data['config_blobs']

    # many functions don't check that an attribute is set on the ctx
    # (with getattr or the '__contains__' func on ctx).
    # This reuses the defaults from the CLI options so we don't
    # have to repeat things and they can stay in sync.
    facade = ArgumentFacade()
    for key, value in config_data.get('params', {}).items():
        if key not in facade.defaults:
            logger.warning('unexpected parameter: %r=%r', key, value)
        setattr(ctx, key, value)
    logger.debug('Determined image: %r', ctx.image)

def command_deploy_from(ctx: CephadmContext) -> None:
    """The deploy-from command is similar to deploy but sources nearly all
    configuration parameters from an input JSON configuration file.
    config_data = read_configuration_source(ctx)
    apply_deploy_config_to_ctx(config_data, ctx)

def _common_deploy(ctx: CephadmContext) -> None:
    daemon_type, daemon_id = ctx.name.split('.', 1)
    if daemon_type not in get_supported_daemons():
        raise Error('daemon type %s not recognized' % daemon_type)

    lock = FileLock(ctx, ctx.fsid)

    deployment_type = get_deployment_type(ctx, daemon_type, daemon_id)

    # Migrate sysctl conf files from /usr/lib to /etc
    migrate_sysctl_dir(ctx, ctx.fsid)

    # Get and check ports explicitly required to be opened
    endpoints = fetch_tcp_ports(ctx)
    _dispatch_deploy(ctx, daemon_type, daemon_id, endpoints, deployment_type)

def _dispatch_deploy(
    ctx: CephadmContext,
    daemon_type: str,
    daemon_id: str,
    daemon_endpoints: List[EndPoint],
    deployment_type: DeploymentType,
) -> None:
    if daemon_type in Ceph.daemons:
        config, keyring = get_config_and_keyring(ctx)
        uid, gid = extract_uid_gid(ctx)
        make_var_run(ctx, ctx.fsid, uid, gid)

        config_json = fetch_configs(ctx)

        c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id,

        if daemon_type == 'mon' and config_json is not None:
            if 'crush_location' in config_json:
                c_loc = config_json['crush_location']
                # was originally "c.args.extend(['--set-crush-location', c_loc])"
                # but that doesn't seem to persist in the object after it's passed
                # in further function calls
                c.args = c.args + ['--set-crush-location', c_loc]

        deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
                      config=config, keyring=keyring,

    elif daemon_type in Monitoring.components:
        # monitoring daemon - prometheus, grafana, alertmanager, node-exporter
        # Default Checks
        # make sure provided config-json is sufficient
        config = fetch_configs(ctx)  # type: ignore
        required_files = Monitoring.components[daemon_type].get('config-json-files', list())
        required_args = Monitoring.components[daemon_type].get('config-json-args', list())
        if required_files:
            if not config or not all(c in config.get('files', {}).keys() for c in required_files):  # type: ignore
                raise Error('{} deployment requires config-json which must '
                            'contain file content for {}'.format(daemon_type.capitalize(), ', '.join(required_files)))
        if required_args:
            if not config or not all(c in config.keys() for c in required_args):  # type: ignore
                raise Error('{} deployment requires config-json which must '
                            'contain arg for {}'.format(daemon_type.capitalize(), ', '.join(required_args)))

        uid, gid = extract_uid_gid_monitoring(ctx, daemon_type)
        c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id)
        deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,

    elif daemon_type == NFSGanesha.daemon_type:
        # only check ports if this is a fresh deployment
        if deployment_type == DeploymentType.DEFAULT and not daemon_endpoints:
            nfs_ports = list(NFSGanesha.port_map.values())
            daemon_endpoints = [EndPoint('', p) for p in nfs_ports]

        config, keyring = get_config_and_keyring(ctx)
        # TODO: extract ganesha uid/gid (997, 994) ?
        uid, gid = extract_uid_gid(ctx)
        c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id)
        deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
                      config=config, keyring=keyring,

    elif daemon_type == CephIscsi.daemon_type:
        config, keyring = get_config_and_keyring(ctx)
        uid, gid = extract_uid_gid(ctx)
        c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id)
        deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
                      config=config, keyring=keyring,
    elif daemon_type == CephNvmeof.daemon_type:
        config, keyring = get_config_and_keyring(ctx)
        uid, gid = 167, 167  # TODO: need to get properly the uid/gid
        c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id)
        deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
                      config=config, keyring=keyring,
    elif daemon_type in Tracing.components:
        uid, gid = 65534, 65534
        c = get_container(ctx, ctx.fsid, daemon_type, daemon_id)
        deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
    elif daemon_type == HAproxy.daemon_type:
        haproxy = HAproxy.init(ctx, ctx.fsid, daemon_id)
        uid, gid = haproxy.extract_uid_gid_haproxy()
        c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id)
        deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,

    elif daemon_type == Keepalived.daemon_type:
        keepalived = Keepalived.init(ctx, ctx.fsid, daemon_id)
        uid, gid = keepalived.extract_uid_gid_keepalived()
        c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id)
        deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,

    elif daemon_type == CustomContainer.daemon_type:
        cc = CustomContainer.init(ctx, ctx.fsid, daemon_id)
        # only check ports if this is a fresh deployment
        if deployment_type == DeploymentType.DEFAULT:
            daemon_endpoints.extend([EndPoint('', p) for p in cc.ports])
        c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id,
        deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c,
                      uid=cc.uid, gid=cc.gid, config=None,

    elif daemon_type == CephadmAgent.daemon_type:
        # get current user gid and uid
        uid = os.getuid()
        gid = os.getgid()
        deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, None,
                      uid, gid,

    elif daemon_type == SNMPGateway.daemon_type:
        sc = SNMPGateway.init(ctx, ctx.fsid, daemon_id)
        c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id)
        deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c,
                      sc.uid, sc.gid,

        raise Error('daemon type {} not implemented in command_deploy function'


def command_run(ctx):
    # type: (CephadmContext) -> int
    (daemon_type, daemon_id) = ctx.name.split('.', 1)
    c = get_container(ctx, ctx.fsid, daemon_type, daemon_id)
    command = c.run_cmd()
    return call_timeout(ctx, command, ctx.timeout)


def command_shell(ctx):
    # type: (CephadmContext) -> int
    cp = read_config(ctx.config)
    if cp.has_option('global', 'fsid') and \
       cp.get('global', 'fsid') != ctx.fsid:
        raise Error('fsid does not match ceph.conf')

    if ctx.name:
        if '.' in ctx.name:
            (daemon_type, daemon_id) = ctx.name.split('.', 1)
            daemon_type = ctx.name
            daemon_id = None
        daemon_type = 'osd'  # get the most mounts
        daemon_id = None

    if ctx.fsid and daemon_type in Ceph.daemons:
        make_log_dir(ctx, ctx.fsid)

    if daemon_id and not ctx.fsid:
        raise Error('must pass --fsid to specify cluster')

    # in case a dedicated keyring for the specified fsid is found we us it.
    # Otherwise, use /etc/ceph files by default, if present.  We do this instead of
    # making these defaults in the arg parser because we don't want an error
    # if they don't exist.
    if not ctx.keyring:
        keyring_file = f'{ctx.data_dir}/{ctx.fsid}/{CEPH_CONF_DIR}/{CEPH_KEYRING}'
        if os.path.exists(keyring_file):
            ctx.keyring = keyring_file
        elif os.path.exists(CEPH_DEFAULT_KEYRING):
            ctx.keyring = CEPH_DEFAULT_KEYRING

    container_args: List[str] = ['-i']
    mounts = get_container_mounts(ctx, ctx.fsid, daemon_type, daemon_id,
                                  no_config=True if ctx.config else False)
    binds = get_container_binds(ctx, ctx.fsid, daemon_type, daemon_id)
    if ctx.config:
        mounts[pathify(ctx.config)] = '/etc/ceph/ceph.conf:z'
    if ctx.keyring:
        mounts[pathify(ctx.keyring)] = '/etc/ceph/ceph.keyring:z'
    if ctx.mount:
        for _mount in ctx.mount:
            split_src_dst = _mount.split(':')
            mount = pathify(split_src_dst[0])
            filename = os.path.basename(split_src_dst[0])
            if len(split_src_dst) > 1:
                dst = split_src_dst[1]
                if len(split_src_dst) == 3:
                    dst = '{}:{}'.format(dst, split_src_dst[2])
                mounts[mount] = dst
                mounts[mount] = '/mnt/{}'.format(filename)
    if ctx.command:
        command = ctx.command
        command = ['bash']
        container_args += [
            '-e', 'LANG=C',
            '-e', 'PS1=%s' % CUSTOM_PS1,
        if ctx.fsid:
            home = os.path.join(ctx.data_dir, ctx.fsid, 'home')
            if not os.path.exists(home):
                logger.debug('Creating root home at %s' % home)
                makedirs(home, 0, 0, 0o660)
                if os.path.exists('/etc/skel'):
                    for f in os.listdir('/etc/skel'):
                        if f.startswith('.bash'):
                            shutil.copyfile(os.path.join('/etc/skel', f),
                                            os.path.join(home, f))
            mounts[home] = '/root'

    for i in ctx.volume:
        a, b = i.split(':', 1)
        mounts[a] = b

    c = CephContainer(
    command = c.shell_cmd(command)

    if ctx.dry_run:
        print(' '.join(shlex.quote(arg) for arg in command))
        return 0

    return call_timeout(ctx, command, ctx.timeout)


def command_enter(ctx):
    # type: (CephadmContext) -> int
    if not ctx.fsid:
        raise Error('must pass --fsid to specify cluster')
    (daemon_type, daemon_id) = ctx.name.split('.', 1)
    container_args = ['-i']  # type: List[str]
    if ctx.command:
        command = ctx.command
        command = ['sh']
        container_args += [
            '-e', 'LANG=C',
            '-e', 'PS1=%s' % CUSTOM_PS1,
    c = CephContainer(
        cname='ceph-%s-%s.%s' % (ctx.fsid, daemon_type, daemon_id),
    command = c.exec_cmd(command)
    return call_timeout(ctx, command, ctx.timeout)


def command_ceph_volume(ctx):
    # type: (CephadmContext) -> None
    cp = read_config(ctx.config)
    if cp.has_option('global', 'fsid') and \
       cp.get('global', 'fsid') != ctx.fsid:
        raise Error('fsid does not match ceph.conf')

    if ctx.fsid:
        make_log_dir(ctx, ctx.fsid)

        lock = FileLock(ctx, ctx.fsid)

    (uid, gid) = (0, 0)  # ceph-volume runs as root
    mounts = get_container_mounts(ctx, ctx.fsid, 'osd', None)

    tmp_config = None
    tmp_keyring = None

    (config, keyring) = get_config_and_keyring(ctx)

    if config:
        # tmp config file
        tmp_config = write_tmp(config, uid, gid)
        mounts[tmp_config.name] = '/etc/ceph/ceph.conf:z'

    if keyring:
        # tmp keyring file
        tmp_keyring = write_tmp(keyring, uid, gid)
        mounts[tmp_keyring.name] = '/var/lib/ceph/bootstrap-osd/ceph.keyring:z'

    c = get_ceph_volume_container(

    out, err, code = call_throws(ctx, c.run_cmd(), verbosity=CallVerbosity.QUIET_UNLESS_ERROR)
    if not code:


def command_unit(ctx):
    # type: (CephadmContext) -> int
    if not ctx.fsid:
        raise Error('must pass --fsid to specify cluster')

    unit_name = get_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name)

    _, _, code = call(
        ['systemctl', ctx.command, unit_name],
    return code


def command_logs(ctx):
    # type: (CephadmContext) -> None
    if not ctx.fsid:
        raise Error('must pass --fsid to specify cluster')

    unit_name = get_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name)

    cmd = [find_program('journalctl')]
    cmd.extend(['-u', unit_name])
    if ctx.command:

    # call this directly, without our wrapper, so that we get an unmolested
    # stdout with logger prefixing.
    logger.debug('Running command: %s' % ' '.join(cmd))
    subprocess.call(cmd, env=os.environ.copy())  # type: ignore


def list_networks(ctx):
    # type: (CephadmContext) -> Dict[str,Dict[str, Set[str]]]

    # sadly, 18.04's iproute2 4.15.0-2ubun doesn't support the -j flag,
    # so we'll need to use a regex to parse 'ip' command output.
    # out, _, _ = call_throws(['ip', '-j', 'route', 'ls'])
    # j = json.loads(out)
    # for x in j:
    res = _list_ipv4_networks(ctx)
    return res

def _list_ipv4_networks(ctx: CephadmContext) -> Dict[str, Dict[str, Set[str]]]:
    execstr: Optional[str] = find_executable('ip')
    if not execstr:
        raise FileNotFoundError("unable to find 'ip' command")
    out, _, _ = call_throws(ctx, [execstr, 'route', 'ls'], verbosity=CallVerbosity.QUIET_UNLESS_ERROR)
    return _parse_ipv4_route(out)

def _parse_ipv4_route(out: str) -> Dict[str, Dict[str, Set[str]]]:
    r = {}  # type: Dict[str, Dict[str, Set[str]]]
    p = re.compile(r'^(\S+) (?:via \S+)? ?dev (\S+) (.*)scope link (.*)src (\S+)')
    for line in out.splitlines():
        m = p.findall(line)
        if not m:
        net = m[0][0]
        if '/' not in net:  # aggregate /32 mask for single host sub-networks
            net += '/32'
        iface = m[0][1]
        ip = m[0][4]
        if net not in r:
            r[net] = {}
        if iface not in r[net]:
            r[net][iface] = set()
    return r

def _list_ipv6_networks(ctx: CephadmContext) -> Dict[str, Dict[str, Set[str]]]:
    execstr: Optional[str] = find_executable('ip')
    if not execstr:
        raise FileNotFoundError("unable to find 'ip' command")
    routes, _, _ = call_throws(ctx, [execstr, '-6', 'route', 'ls'], verbosity=CallVerbosity.QUIET_UNLESS_ERROR)
    ips, _, _ = call_throws(ctx, [execstr, '-6', 'addr', 'ls'], verbosity=CallVerbosity.QUIET_UNLESS_ERROR)
    return _parse_ipv6_route(routes, ips)

def _parse_ipv6_route(routes: str, ips: str) -> Dict[str, Dict[str, Set[str]]]:
    r = {}  # type: Dict[str, Dict[str, Set[str]]]
    route_p = re.compile(r'^(\S+) dev (\S+) proto (\S+) metric (\S+) .*pref (\S+)$')
    ip_p = re.compile(r'^\s+inet6 (\S+)/(.*)scope (.*)$')
    iface_p = re.compile(r'^(\d+): (\S+): (.*)$')
    for line in routes.splitlines():
        m = route_p.findall(line)
        if not m or m[0][0].lower() == 'default':
        net = m[0][0]
        if '/' not in net:  # aggregate /128 mask for single host sub-networks
            net += '/128'
        iface = m[0][1]
        if iface == 'lo':  # skip loopback devices
        if net not in r:
            r[net] = {}
        if iface not in r[net]:
            r[net][iface] = set()

    iface = None
    for line in ips.splitlines():
        m = ip_p.findall(line)
        if not m:
            m = iface_p.findall(line)
            if m:
                # drop @... suffix, if present
                iface = m[0][1].split('@')[0]
        ip = m[0][0]
        # find the network it belongs to
        net = [n for n in r.keys()
               if ipaddress.ip_address(ip) in ipaddress.ip_network(n)]
        if net and iface in r[net[0]]:
            assert iface

    return r

def command_list_networks(ctx):
    # type: (CephadmContext) -> None
    r = list_networks(ctx)

    def serialize_sets(obj: Any) -> Any:
        return list(obj) if isinstance(obj, set) else obj

    print(json.dumps(r, indent=4, default=serialize_sets))


def command_ls(ctx):
    # type: (CephadmContext) -> None
    ls = list_daemons(ctx, detail=not ctx.no_detail,
    print(json.dumps(ls, indent=4))

def with_units_to_int(v: str) -> int:
    if v.endswith('iB'):
        v = v[:-2]
    elif v.endswith('B'):
        v = v[:-1]
    mult = 1
    if v[-1].upper() == 'K':
        mult = 1024
        v = v[:-1]
    elif v[-1].upper() == 'M':
        mult = 1024 * 1024
        v = v[:-1]
    elif v[-1].upper() == 'G':
        mult = 1024 * 1024 * 1024
        v = v[:-1]
    elif v[-1].upper() == 'T':
        mult = 1024 * 1024 * 1024 * 1024
        v = v[:-1]
    return int(float(v) * mult)

def list_daemons(ctx, detail=True, legacy_dir=None):
    # type: (CephadmContext, bool, Optional[str]) -> List[Dict[str, str]]
    host_version: Optional[str] = None
    ls = []
    container_path = ctx.container_engine.path

    data_dir = ctx.data_dir
    if legacy_dir is not None:
        data_dir = os.path.abspath(legacy_dir + data_dir)

    # keep track of ceph versions we see
    seen_versions = {}  # type: Dict[str, Optional[str]]

    # keep track of image digests
    seen_digests = {}   # type: Dict[str, List[str]]

    # keep track of memory and cpu usage we've seen
    seen_memusage = {}  # type: Dict[str, int]
    seen_cpuperc = {}  # type: Dict[str, str]
    out, err, code = call(
        [container_path, 'stats', '--format', '{{.ID}},{{.MemUsage}}', '--no-stream'],
    seen_memusage_cid_len, seen_memusage = _parse_mem_usage(code, out)

    out, err, code = call(
        [container_path, 'stats', '--format', '{{.ID}},{{.CPUPerc}}', '--no-stream'],
    seen_cpuperc_cid_len, seen_cpuperc = _parse_cpu_perc(code, out)

    # /var/lib/ceph
    if os.path.exists(data_dir):
        for i in os.listdir(data_dir):
            if i in ['mon', 'osd', 'mds', 'mgr']:
                daemon_type = i
                for j in os.listdir(os.path.join(data_dir, i)):
                    if '-' not in j:
                    (cluster, daemon_id) = j.split('-', 1)
                    fsid = get_legacy_daemon_fsid(ctx,
                                                  cluster, daemon_type, daemon_id,
                    legacy_unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id)
                    val: Dict[str, Any] = {
                        'style': 'legacy',
                        'name': '%s.%s' % (daemon_type, daemon_id),
                        'fsid': fsid if fsid is not None else 'unknown',
                        'systemd_unit': legacy_unit_name,
                    if detail:
                        (val['enabled'], val['state'], _) = check_unit(ctx, legacy_unit_name)
                        if not host_version:
                                out, err, code = call(ctx,
                                                      ['ceph', '-v'],
                                if not code and out.startswith('ceph version '):
                                    host_version = out.split(' ')[2]
                            except Exception:
                        val['host_version'] = host_version
            elif is_fsid(i):
                fsid = str(i)  # convince mypy that fsid is a str here
                for j in os.listdir(os.path.join(data_dir, i)):
                    if '.' in j and os.path.isdir(os.path.join(data_dir, fsid, j)):
                        name = j
                        (daemon_type, daemon_id) = j.split('.', 1)
                        unit_name = get_unit_name(fsid,
                    val = {
                        'style': 'cephadm:v1',
                        'name': name,
                        'fsid': fsid,
                        'systemd_unit': unit_name,
                    if detail:
                        # get container id
                        (val['enabled'], val['state'], _) = check_unit(ctx, unit_name)
                        container_id = None
                        image_name = None
                        image_id = None
                        image_digests = None
                        version = None
                        start_stamp = None

                        out, err, code = get_container_stats(ctx, container_path, fsid, daemon_type, daemon_id)
                        if not code:
                            (container_id, image_name, image_id, start,
                             version) = out.strip().split(',')
                            image_id = normalize_container_id(image_id)
                            daemon_type = name.split('.', 1)[0]
                            start_stamp = try_convert_datetime(start)

                            # collect digests for this image id
                            image_digests = seen_digests.get(image_id)
                            if not image_digests:
                                out, err, code = call(
                                        container_path, 'image', 'inspect', image_id,
                                        '--format', '{{.RepoDigests}}',
                                if not code:
                                    image_digests = list(set(map(
                                        out.strip()[1:-1].split(' '))))
                                    seen_digests[image_id] = image_digests

                            # identify software version inside the container (if we can)
                            if not version or '.' not in version:
                                version = seen_versions.get(image_id, None)
                            if daemon_type == NFSGanesha.daemon_type:
                                version = NFSGanesha.get_version(ctx, container_id)
                            if daemon_type == CephIscsi.daemon_type:
                                version = CephIscsi.get_version(ctx, container_id)
                            if daemon_type == CephNvmeof.daemon_type:
                                version = CephNvmeof.get_version(ctx, container_id)
                            elif not version:
                                if daemon_type in Ceph.daemons:
                                    out, err, code = call(ctx,
                                                          [container_path, 'exec', container_id,
                                                           'ceph', '-v'],
                                    if not code and \
                                       out.startswith('ceph version '):
                                        version = out.split(' ')[2]
                                        seen_versions[image_id] = version
                                elif daemon_type == 'grafana':
                                    out, err, code = call(ctx,
                                                          [container_path, 'exec', container_id,
                                                           'grafana-server', '-v'],
                                    if not code and \
                                       out.startswith('Version '):
                                        version = out.split(' ')[1]
                                        seen_versions[image_id] = version
                                elif daemon_type in ['prometheus',
                                    version = Monitoring.get_version(ctx, container_id, daemon_type)
                                    seen_versions[image_id] = version
                                elif daemon_type == 'haproxy':
                                    out, err, code = call(ctx,
                                                          [container_path, 'exec', container_id,
                                                           'haproxy', '-v'],
                                    if not code and \
                                       out.startswith('HA-Proxy version ') or \
                                       out.startswith('HAProxy version '):
                                        version = out.split(' ')[2]
                                        seen_versions[image_id] = version
                                elif daemon_type == 'keepalived':
                                    out, err, code = call(ctx,
                                                          [container_path, 'exec', container_id,
                                                           'keepalived', '--version'],
                                    if not code and \
                                       err.startswith('Keepalived '):
                                        version = err.split(' ')[1]
                                        if version[0] == 'v':
                                            version = version[1:]
                                        seen_versions[image_id] = version
                                elif daemon_type == CustomContainer.daemon_type:
                                    # Because a custom container can contain
                                    # everything, we do not know which command
                                    # to execute to get the version.
                                elif daemon_type == SNMPGateway.daemon_type:
                                    version = SNMPGateway.get_version(ctx, fsid, daemon_id)
                                    seen_versions[image_id] = version
                                    logger.warning('version for unknown daemon type %s' % daemon_type)
                            vfile = os.path.join(data_dir, fsid, j, 'unit.image')  # type: ignore
                                with open(vfile, 'r') as f:
                                    image_name = f.read().strip() or None
                            except IOError:

                        # unit.meta?
                        mfile = os.path.join(data_dir, fsid, j, 'unit.meta')  # type: ignore
                            with open(mfile, 'r') as f:
                                meta = json.loads(f.read())
                        except IOError:

                        val['container_id'] = container_id
                        val['container_image_name'] = image_name
                        val['container_image_id'] = image_id
                        val['container_image_digests'] = image_digests
                        if container_id:
                            val['memory_usage'] = seen_memusage.get(container_id[0:seen_memusage_cid_len])
                            val['cpu_percentage'] = seen_cpuperc.get(container_id[0:seen_cpuperc_cid_len])
                        val['version'] = version
                        val['started'] = start_stamp
                        val['created'] = get_file_timestamp(
                            os.path.join(data_dir, fsid, j, 'unit.created')
                        val['deployed'] = get_file_timestamp(
                            os.path.join(data_dir, fsid, j, 'unit.image'))
                        val['configured'] = get_file_timestamp(
                            os.path.join(data_dir, fsid, j, 'unit.configured'))

    return ls

def _parse_mem_usage(code: int, out: str) -> Tuple[int, Dict[str, int]]:
    # keep track of memory usage we've seen
    seen_memusage = {}  # type: Dict[str, int]
    seen_memusage_cid_len = 0
    if not code:
        for line in out.splitlines():
            (cid, usage) = line.split(',')
            (used, limit) = usage.split(' / ')
                seen_memusage[cid] = with_units_to_int(used)
                if not seen_memusage_cid_len:
                    seen_memusage_cid_len = len(cid)
            except ValueError:
                logger.info('unable to parse memory usage line\n>{}'.format(line))
    return seen_memusage_cid_len, seen_memusage

def _parse_cpu_perc(code: int, out: str) -> Tuple[int, Dict[str, str]]:
    seen_cpuperc = {}
    seen_cpuperc_cid_len = 0
    if not code:
        for line in out.splitlines():
            (cid, cpuperc) = line.split(',')
                seen_cpuperc[cid] = cpuperc
                if not seen_cpuperc_cid_len:
                    seen_cpuperc_cid_len = len(cid)
            except ValueError:
                logger.info('unable to parse cpu percentage line\n>{}'.format(line))
    return seen_cpuperc_cid_len, seen_cpuperc

def get_daemon_description(ctx, fsid, name, detail=False, legacy_dir=None):
    # type: (CephadmContext, str, str, bool, Optional[str]) -> Dict[str, str]

    for d in list_daemons(ctx, detail=detail, legacy_dir=legacy_dir):
        if d['fsid'] != fsid:
        if d['name'] != name:
        return d
    raise Error('Daemon not found: {}. See `cephadm ls`'.format(name))

def get_container_stats(ctx: CephadmContext, container_path: str, fsid: str, daemon_type: str, daemon_id: str) -> Tuple[str, str, int]:
    c = CephContainer.for_daemon(ctx, fsid, daemon_type, daemon_id, 'bash')
    out, err, code = '', '', -1
    for name in (c.cname, c.old_cname):
        cmd = [
            container_path, 'inspect',
            '--format', '{{.Id}},{{.Config.Image}},{{.Image}},{{.Created}},{{index .Config.Labels "io.ceph.version"}}',
        out, err, code = call(ctx, cmd, verbosity=CallVerbosity.QUIET)
        if not code:
    return out, err, code


def command_adopt(ctx):
    # type: (CephadmContext) -> None

    if not ctx.skip_pull:
            _pull_image(ctx, ctx.image)
        except UnauthorizedRegistryError:
            err_str = 'Failed to pull container image. Host may not be logged into container registry. Try `cephadm registry-login --registry-url  --registry-username  --registry-password ` or supply login info via a json file with `cephadm registry-login --registry-json `'
            logger.debug(f'Pulling image for `command_adopt` failed: {err_str}')
            raise Error(err_str)

    (daemon_type, daemon_id) = ctx.name.split('.', 1)

    # legacy check
    if ctx.style != 'legacy':
        raise Error('adoption of style %s not implemented' % ctx.style)

    # lock
    fsid = get_legacy_daemon_fsid(ctx,
    if not fsid:
        raise Error('could not detect legacy fsid; set fsid in ceph.conf')
    lock = FileLock(ctx, fsid)

    # call correct adoption
    if daemon_type in Ceph.daemons:
        command_adopt_ceph(ctx, daemon_type, daemon_id, fsid)
    elif daemon_type == 'prometheus':
        command_adopt_prometheus(ctx, daemon_id, fsid)
    elif daemon_type == 'grafana':
        command_adopt_grafana(ctx, daemon_id, fsid)
    elif daemon_type == 'node-exporter':
        raise Error('adoption of node-exporter not implemented')
    elif daemon_type == 'alertmanager':
        command_adopt_alertmanager(ctx, daemon_id, fsid)
        raise Error('daemon type %s not recognized' % daemon_type)

class AdoptOsd(object):
    def __init__(self, ctx, osd_data_dir, osd_id):
        # type: (CephadmContext, str, str) -> None
        self.ctx = ctx
        self.osd_data_dir = osd_data_dir
        self.osd_id = osd_id

    def check_online_osd(self):
        # type: () -> Tuple[Optional[str], Optional[str]]

        osd_fsid, osd_type = None, None

        path = os.path.join(self.osd_data_dir, 'fsid')
            with open(path, 'r') as f:
                osd_fsid = f.read().strip()
            logger.info('Found online OSD at %s' % path)
        except IOError:
            logger.info('Unable to read OSD fsid from %s' % path)
        if os.path.exists(os.path.join(self.osd_data_dir, 'type')):
            with open(os.path.join(self.osd_data_dir, 'type')) as f:
                osd_type = f.read().strip()
            logger.info('"type" file missing for OSD data dir')

        return osd_fsid, osd_type

    def check_offline_lvm_osd(self):
        # type: () -> Tuple[Optional[str], Optional[str]]
        osd_fsid, osd_type = None, None

        c = get_ceph_volume_container(
            args=['lvm', 'list', '--format=json'],
        out, err, code = call_throws(self.ctx, c.run_cmd())
        if not code:
                js = json.loads(out)
                if self.osd_id in js:
                    logger.info('Found offline LVM OSD {}'.format(self.osd_id))
                    osd_fsid = js[self.osd_id][0]['tags']['ceph.osd_fsid']
                    for device in js[self.osd_id]:
                        if device['tags']['ceph.type'] == 'block':
                            osd_type = 'bluestore'
                        if device['tags']['ceph.type'] == 'data':
                            osd_type = 'filestore'
            except ValueError as e:
                logger.info('Invalid JSON in ceph-volume lvm list: {}'.format(e))

        return osd_fsid, osd_type

    def check_offline_simple_osd(self):
        # type: () -> Tuple[Optional[str], Optional[str]]
        osd_fsid, osd_type = None, None

        osd_file = glob('/etc/ceph/osd/{}-[a-f0-9-]*.json'.format(self.osd_id))
        if len(osd_file) == 1:
            with open(osd_file[0], 'r') as f:
                    js = json.loads(f.read())
                    logger.info('Found offline simple OSD {}'.format(self.osd_id))
                    osd_fsid = js['fsid']
                    osd_type = js['type']
                    if osd_type != 'filestore':
                        # need this to be mounted for the adopt to work, as it
                        # needs to move files from this directory
                        call_throws(self.ctx, ['mount', js['data']['path'], self.osd_data_dir])
                except ValueError as e:
                    logger.info('Invalid JSON in {}: {}'.format(osd_file, e))

        return osd_fsid, osd_type

    def change_cluster_name(self) -> None:
        logger.info('Attempting to convert osd cluster name to ceph . . .')
        c = get_ceph_volume_container(
            args=['lvm', 'list', '{}'.format(self.osd_id), '--format=json'],
        out, err, code = call_throws(self.ctx, c.run_cmd())
        if code:
            raise Exception(f'Failed to get list of LVs: {err}\nceph-volume failed with rc {code}')
            js = json.loads(out)
            if not js:
                raise RuntimeError(f'Failed to find osd.{self.osd_id}')
            device: Optional[Dict[Any, Any]] = None
            for d in js[self.osd_id]:
                if d['type'] == 'block':
                    device = d
            if not device:
                raise RuntimeError(f'Failed to find block device for osd.{self.osd_id}')
            vg = device['vg_name']
            out, err, code = call_throws(self.ctx, ['lvchange', '--deltag', f'ceph.cluster_name={self.ctx.cluster}', vg])
            if code:
                raise RuntimeError(f"Can't delete tag ceph.cluster_name={self.ctx.cluster} on osd.{self.osd_id}.\nlvchange failed with rc {code}")
            out, err, code = call_throws(self.ctx, ['lvchange', '--addtag', 'ceph.cluster_name=ceph', vg])
            if code:
                raise RuntimeError(f"Can't add tag ceph.cluster_name=ceph on osd.{self.osd_id}.\nlvchange failed with rc {code}")
            logger.info('Successfully converted osd cluster name')
        except (Exception, RuntimeError) as e:
            logger.info(f'Failed to convert osd cluster name: {e}')

def command_adopt_ceph(ctx, daemon_type, daemon_id, fsid):
    # type: (CephadmContext, str, str, str) -> None

    (uid, gid) = extract_uid_gid(ctx)

    data_dir_src = ('/var/lib/ceph/%s/%s-%s' %
                    (daemon_type, ctx.cluster, daemon_id))
    data_dir_src = os.path.abspath(ctx.legacy_dir + data_dir_src)

    if not os.path.exists(data_dir_src):
        raise Error("{}.{} data directory '{}' does not exist.  "
                    'Incorrect ID specified, or daemon already adopted?'.format(
                        daemon_type, daemon_id, data_dir_src))

    osd_fsid = None
    if daemon_type == 'osd':
        adopt_osd = AdoptOsd(ctx, data_dir_src, daemon_id)
        osd_fsid, osd_type = adopt_osd.check_online_osd()
        if not osd_fsid:
            osd_fsid, osd_type = adopt_osd.check_offline_lvm_osd()
        if not osd_fsid:
            osd_fsid, osd_type = adopt_osd.check_offline_simple_osd()
        if not osd_fsid:
            raise Error('Unable to find OSD {}'.format(daemon_id))
        elif ctx.cluster != 'ceph':
        logger.info('objectstore_type is %s' % osd_type)
        assert osd_type
        if osd_type == 'filestore':
            raise Error('FileStore is not supported by cephadm')

    # NOTE: implicit assumption here that the units correspond to the
    # cluster we are adopting based on the /etc/{defaults,sysconfig}/ceph
    # CLUSTER field.
    unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id)
    (enabled, state, _) = check_unit(ctx, unit_name)
    if state == 'running':
        logger.info('Stopping old systemd unit %s...' % unit_name)
        call_throws(ctx, ['systemctl', 'stop', unit_name])
    if enabled:
        logger.info('Disabling old systemd unit %s...' % unit_name)
        call_throws(ctx, ['systemctl', 'disable', unit_name])

    # data
    logger.info('Moving data...')
    data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id,
                                 uid=uid, gid=gid)
    move_files(ctx, glob(os.path.join(data_dir_src, '*')),
               uid=uid, gid=gid)
    logger.debug('Remove dir `%s`' % (data_dir_src))
    if os.path.ismount(data_dir_src):
        call_throws(ctx, ['umount', data_dir_src])

    logger.info('Chowning content...')
    call_throws(ctx, ['chown', '-c', '-R', '%d.%d' % (uid, gid), data_dir_dst])

    if daemon_type == 'mon':
        # rename *.ldb -> *.sst, in case they are coming from ubuntu
        store = os.path.join(data_dir_dst, 'store.db')
        num_renamed = 0
        if os.path.exists(store):
            for oldf in os.listdir(store):
                if oldf.endswith('.ldb'):
                    newf = oldf.replace('.ldb', '.sst')
                    oldp = os.path.join(store, oldf)
                    newp = os.path.join(store, newf)
                    logger.debug('Renaming %s -> %s' % (oldp, newp))
                    os.rename(oldp, newp)
        if num_renamed:
            logger.info('Renamed %d leveldb *.ldb files to *.sst',
    if daemon_type == 'osd':
        for n in ['block', 'block.db', 'block.wal']:
            p = os.path.join(data_dir_dst, n)
            if os.path.exists(p):
                logger.info('Chowning %s...' % p)
                os.chown(p, uid, gid)
        # disable the ceph-volume 'simple' mode files on the host
        simple_fn = os.path.join('/etc/ceph/osd',
                                 '%s-%s.json' % (daemon_id, osd_fsid))
        if os.path.exists(simple_fn):
            new_fn = simple_fn + '.adopted-by-cephadm'
            logger.info('Renaming %s -> %s', simple_fn, new_fn)
            os.rename(simple_fn, new_fn)
            logger.info('Disabling host unit ceph-volume@ simple unit...')
            call(ctx, ['systemctl', 'disable',
                       'ceph-volume@simple-%s-%s.service' % (daemon_id, osd_fsid)])
            # assume this is an 'lvm' c-v for now, but don't error
            # out if it's not.
            logger.info('Disabling host unit ceph-volume@ lvm unit...')
            call(ctx, ['systemctl', 'disable',
                       'ceph-volume@lvm-%s-%s.service' % (daemon_id, osd_fsid)])

    # config
    config_src = '/etc/ceph/%s.conf' % (ctx.cluster)
    config_src = os.path.abspath(ctx.legacy_dir + config_src)
    config_dst = os.path.join(data_dir_dst, 'config')
    copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid)

    # logs
    logger.info('Moving logs...')
    log_dir_src = ('/var/log/ceph/%s-%s.%s.log*' %
                   (ctx.cluster, daemon_type, daemon_id))
    log_dir_src = os.path.abspath(ctx.legacy_dir + log_dir_src)
    log_dir_dst = make_log_dir(ctx, fsid, uid=uid, gid=gid)
    move_files(ctx, glob(log_dir_src),
               uid=uid, gid=gid)

    logger.info('Creating new units...')
    make_var_run(ctx, fsid, uid, gid)
    c = get_container(ctx, fsid, daemon_type, daemon_id)
    deploy_daemon_units(ctx, fsid, uid, gid, daemon_type, daemon_id, c,
                        enable=True,  # unconditionally enable the new unit
                        start=(state == 'running' or ctx.force_start),
    update_firewalld(ctx, daemon_type)

def command_adopt_prometheus(ctx, daemon_id, fsid):
    # type: (CephadmContext, str, str) -> None
    daemon_type = 'prometheus'
    (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type)
    # should try to set the ports we know cephadm defaults
    # to for these services in the firewall.
    ports = Monitoring.port_map['prometheus']
    endpoints = [EndPoint('', p) for p in ports]

    _stop_and_disable(ctx, 'prometheus')

    data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id,
                                 uid=uid, gid=gid)

    # config
    config_src = '/etc/prometheus/prometheus.yml'
    config_src = os.path.abspath(ctx.legacy_dir + config_src)
    config_dst = os.path.join(data_dir_dst, 'etc/prometheus')
    makedirs(config_dst, uid, gid, 0o755)
    copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid)

    # data
    data_src = '/var/lib/prometheus/metrics/'
    data_src = os.path.abspath(ctx.legacy_dir + data_src)
    data_dst = os.path.join(data_dir_dst, 'data')
    copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid)

    make_var_run(ctx, fsid, uid, gid)
    c = get_container(ctx, fsid, daemon_type, daemon_id)
    deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid,
                  deployment_type=DeploymentType.REDEPLOY, endpoints=endpoints)
    update_firewalld(ctx, daemon_type)

def command_adopt_grafana(ctx, daemon_id, fsid):
    # type: (CephadmContext, str, str) -> None

    daemon_type = 'grafana'
    (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type)
    # should try to set the ports we know cephadm defaults
    # to for these services in the firewall.
    ports = Monitoring.port_map['grafana']
    endpoints = [EndPoint('', p) for p in ports]

    _stop_and_disable(ctx, 'grafana-server')

    data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id,
                                 uid=uid, gid=gid)

    # config
    config_src = '/etc/grafana/grafana.ini'
    config_src = os.path.abspath(ctx.legacy_dir + config_src)
    config_dst = os.path.join(data_dir_dst, 'etc/grafana')
    makedirs(config_dst, uid, gid, 0o755)
    copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid)

    prov_src = '/etc/grafana/provisioning/'
    prov_src = os.path.abspath(ctx.legacy_dir + prov_src)
    prov_dst = os.path.join(data_dir_dst, 'etc/grafana')
    copy_tree(ctx, [prov_src], prov_dst, uid=uid, gid=gid)

    # cert
    cert = '/etc/grafana/grafana.crt'
    key = '/etc/grafana/grafana.key'
    if os.path.exists(cert) and os.path.exists(key):
        cert_src = '/etc/grafana/grafana.crt'
        cert_src = os.path.abspath(ctx.legacy_dir + cert_src)
        makedirs(os.path.join(data_dir_dst, 'etc/grafana/certs'), uid, gid, 0o755)
        cert_dst = os.path.join(data_dir_dst, 'etc/grafana/certs/cert_file')
        copy_files(ctx, [cert_src], cert_dst, uid=uid, gid=gid)

        key_src = '/etc/grafana/grafana.key'
        key_src = os.path.abspath(ctx.legacy_dir + key_src)
        key_dst = os.path.join(data_dir_dst, 'etc/grafana/certs/cert_key')
        copy_files(ctx, [key_src], key_dst, uid=uid, gid=gid)

        _adjust_grafana_ini(os.path.join(config_dst, 'grafana.ini'))
        logger.debug('Skipping ssl, missing cert {} or key {}'.format(cert, key))

    # data - possible custom dashboards/plugins
    data_src = '/var/lib/grafana/'
    data_src = os.path.abspath(ctx.legacy_dir + data_src)
    data_dst = os.path.join(data_dir_dst, 'data')
    copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid)

    make_var_run(ctx, fsid, uid, gid)
    c = get_container(ctx, fsid, daemon_type, daemon_id)
    deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid,
                  deployment_type=DeploymentType.REDEPLOY, endpoints=endpoints)
    update_firewalld(ctx, daemon_type)

def command_adopt_alertmanager(ctx, daemon_id, fsid):
    # type: (CephadmContext, str, str) -> None

    daemon_type = 'alertmanager'
    (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type)
    # should try to set the ports we know cephadm defaults
    # to for these services in the firewall.
    ports = Monitoring.port_map['alertmanager']
    endpoints = [EndPoint('', p) for p in ports]

    _stop_and_disable(ctx, 'prometheus-alertmanager')

    data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id,
                                 uid=uid, gid=gid)

    # config
    config_src = '/etc/prometheus/alertmanager.yml'
    config_src = os.path.abspath(ctx.legacy_dir + config_src)
    config_dst = os.path.join(data_dir_dst, 'etc/alertmanager')
    makedirs(config_dst, uid, gid, 0o755)
    copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid)

    # data
    data_src = '/var/lib/prometheus/alertmanager/'
    data_src = os.path.abspath(ctx.legacy_dir + data_src)
    data_dst = os.path.join(data_dir_dst, 'etc/alertmanager/data')
    copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid)

    make_var_run(ctx, fsid, uid, gid)
    c = get_container(ctx, fsid, daemon_type, daemon_id)
    deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid,
                  deployment_type=DeploymentType.REDEPLOY, endpoints=endpoints)
    update_firewalld(ctx, daemon_type)

def _adjust_grafana_ini(filename):
    # type: (str) -> None

    # Update cert_file, cert_key pathnames in server section
    # ConfigParser does not preserve comments
        with open(filename, 'r') as grafana_ini:
            lines = grafana_ini.readlines()
        with write_new(filename, perms=None) as grafana_ini:
            server_section = False
            for line in lines:
                if line.startswith('['):
                    server_section = False
                if line.startswith('[server]'):
                    server_section = True
                if server_section:
                    line = re.sub(r'^cert_file.*',
                                  'cert_file = /etc/grafana/certs/cert_file', line)
                    line = re.sub(r'^cert_key.*',
                                  'cert_key = /etc/grafana/certs/cert_key', line)
    except OSError as err:
        raise Error('Cannot update {}: {}'.format(filename, err))

def _stop_and_disable(ctx, unit_name):
    # type: (CephadmContext, str) -> None

    (enabled, state, _) = check_unit(ctx, unit_name)
    if state == 'running':
        logger.info('Stopping old systemd unit %s...' % unit_name)
        call_throws(ctx, ['systemctl', 'stop', unit_name])
    if enabled:
        logger.info('Disabling old systemd unit %s...' % unit_name)
        call_throws(ctx, ['systemctl', 'disable', unit_name])


def command_rm_daemon(ctx):
    # type: (CephadmContext) -> None
    lock = FileLock(ctx, ctx.fsid)

    (daemon_type, daemon_id) = ctx.name.split('.', 1)
    unit_name = get_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name)

    if daemon_type in ['mon', 'osd'] and not ctx.force:
        raise Error('must pass --force to proceed: '
                    'this command may destroy precious data!')

    call(ctx, ['systemctl', 'stop', unit_name],
    call(ctx, ['systemctl', 'reset-failed', unit_name],
    call(ctx, ['systemctl', 'disable', unit_name],

    # force remove rgw admin socket file if leftover
    if daemon_type in ['rgw']:
        rgw_asok_path = f'/var/run/ceph/{ctx.fsid}/ceph-client.{ctx.name}.*.asok'
        call(ctx, ['rm', '-rf', rgw_asok_path],

    data_dir = get_data_dir(ctx.fsid, ctx.data_dir, daemon_type, daemon_id)
    if daemon_type in ['mon', 'osd', 'prometheus'] and \
       not ctx.force_delete_data:
        # rename it out of the way -- do not delete
        backup_dir = os.path.join(ctx.data_dir, ctx.fsid, 'removed')
        if not os.path.exists(backup_dir):
            makedirs(backup_dir, 0, 0, DATA_DIR_MODE)
        dirname = '%s.%s_%s' % (daemon_type, daemon_id,
                  os.path.join(backup_dir, dirname))
        call_throws(ctx, ['rm', '-rf', data_dir])

    endpoints = fetch_tcp_ports(ctx)
    ports: List[int] = [e.port for e in endpoints]
    if ports:
            fw = Firewalld(ctx)
        except RuntimeError as e:
            # in case we cannot close the ports we will remove
            # the daemon but keep them open.
            logger.warning(f' Error when trying to close ports: {e}')


def _zap(ctx: CephadmContext, what: str) -> None:
    mounts = get_container_mounts(ctx, ctx.fsid, 'clusterless-ceph-volume', None)
    c = get_ceph_volume_container(ctx,
                                  args=['lvm', 'zap', '--destroy', what],
    logger.info(f'Zapping {what}...')
    out, err, code = call_throws(ctx, c.run_cmd())

def _zap_osds(ctx: CephadmContext) -> None:
    # assume fsid lock already held

    # list
    mounts = get_container_mounts(ctx, ctx.fsid, 'clusterless-ceph-volume', None)
    c = get_ceph_volume_container(ctx,
                                  args=['inventory', '--format', 'json'],
    out, err, code = call_throws(ctx, c.run_cmd())
    if code:
        raise Error('failed to list osd inventory')
        ls = json.loads(out)
    except ValueError as e:
        raise Error(f'Invalid JSON in ceph-volume inventory: {e}')

    for i in ls:
        matches = [lv.get('cluster_fsid') == ctx.fsid and i.get('ceph_device') for lv in i.get('lvs', [])]
        if any(matches) and all(matches):
            _zap(ctx, i.get('path'))
        elif any(matches):
            lv_names = [lv['name'] for lv in i.get('lvs', [])]
            # TODO: we need to map the lv_names back to device paths (the vg
            # id isn't part of the output here!)
            logger.warning(f'Not zapping LVs (not implemented): {lv_names}')

def command_zap_osds(ctx: CephadmContext) -> None:
    if not ctx.force:
        raise Error('must pass --force to proceed: '
                    'this command may destroy precious data!')

    lock = FileLock(ctx, ctx.fsid)



def get_ceph_cluster_count(ctx: CephadmContext) -> int:
    return len([c for c in os.listdir(ctx.data_dir) if is_fsid(c)])

def command_rm_cluster(ctx: CephadmContext) -> None:
    if not ctx.force:
        raise Error('must pass --force to proceed: '
                    'this command may destroy precious data!')

    lock = FileLock(ctx, ctx.fsid)
    _rm_cluster(ctx, ctx.keep_logs, ctx.zap_osds)

def _rm_cluster(ctx: CephadmContext, keep_logs: bool, zap_osds: bool) -> None:

    if not ctx.fsid:
        raise Error('must select the cluster to delete by passing --fsid to proceed')

    def disable_systemd_service(unit_name: str) -> None:
        call(ctx, ['systemctl', 'stop', unit_name],
        call(ctx, ['systemctl', 'reset-failed', unit_name],
        call(ctx, ['systemctl', 'disable', unit_name],

    logger.info(f'Deleting cluster with fsid: {ctx.fsid}')

    # stop + disable individual daemon units
    for d in list_daemons(ctx, detail=False):
        if d['fsid'] != ctx.fsid:
        if d['style'] != 'cephadm:v1':
        disable_systemd_service(get_unit_name(ctx.fsid, d['name']))

    # cluster units
    for unit_name in ['ceph-%s.target' % ctx.fsid]:

    slice_name = 'system-ceph\\x2d{}.slice'.format(ctx.fsid.replace('-', '\\x2d'))
    call(ctx, ['systemctl', 'stop', slice_name],

    # osds?
    if zap_osds:

    # rm units
    call_throws(ctx, ['rm', '-f', ctx.unit_dir
                      + '/ceph-%s@.service' % ctx.fsid])
    call_throws(ctx, ['rm', '-f', ctx.unit_dir
                      + '/ceph-%s.target' % ctx.fsid])
    call_throws(ctx, ['rm', '-rf',
                      ctx.unit_dir + '/ceph-%s.target.wants' % ctx.fsid])
    # rm data
    call_throws(ctx, ['rm', '-rf', ctx.data_dir + '/' + ctx.fsid])

    if not keep_logs:
        # rm logs
        call_throws(ctx, ['rm', '-rf', ctx.log_dir + '/' + ctx.fsid])
        call_throws(ctx, ['rm', '-rf', ctx.log_dir
                          + '/*.wants/ceph-%s@*' % ctx.fsid])

    # rm logrotate config
    call_throws(ctx, ['rm', '-f', ctx.logrotate_dir + '/ceph-%s' % ctx.fsid])

    # if last cluster on host remove shared files
    if get_ceph_cluster_count(ctx) == 0:

        # rm shared ceph target files
        call_throws(ctx, ['rm', '-f', ctx.unit_dir + '/multi-user.target.wants/ceph.target'])
        call_throws(ctx, ['rm', '-f', ctx.unit_dir + '/ceph.target'])

        # rm cephadm logrotate config
        call_throws(ctx, ['rm', '-f', ctx.logrotate_dir + '/cephadm'])

        if not keep_logs:
            # remove all cephadm logs
            for fname in glob(f'{ctx.log_dir}/cephadm.log*'):

    # rm sysctl settings
    sysctl_dirs: List[Path] = [Path(ctx.sysctl_dir), Path('/usr/lib/sysctl.d')]

    for sysctl_dir in sysctl_dirs:
        for p in sysctl_dir.glob(f'90-ceph-{ctx.fsid}-*.conf'):

    # cleanup remaining ceph directories
    ceph_dirs = [f'/run/ceph/{ctx.fsid}', f'/tmp/cephadm-{ctx.fsid}', f'/var/run/ceph/{ctx.fsid}']
    for dd in ceph_dirs:
        shutil.rmtree(dd, ignore_errors=True)

    # clean up config, keyring, and pub key files
    if os.path.exists(files[0]):
        valid_fsid = False
        with open(files[0]) as f:
            if ctx.fsid in f.read():
                valid_fsid = True
        if valid_fsid:
            # rm configuration files on /etc/ceph
            for n in range(0, len(files)):
                if os.path.exists(files[n]):


def check_time_sync(ctx, enabler=None):
    # type: (CephadmContext, Optional[Packager]) -> bool
    units = [
        'chrony.service',  # 18.04 (at least)
        'chronyd.service',  # el / opensuse
        'ntpd.service',  # el7 (at least)
        'ntp.service',  # 18.04 (at least)
        'ntpsec.service',  # 20.04 (at least) / buster
        'openntpd.service',  # ubuntu / debian
    if not check_units(ctx, units, enabler):
        logger.warning('No time sync service is running; checked for %s' % units)
        return False
    return True

def command_check_host(ctx: CephadmContext) -> None:
    errors = []
    commands = ['systemctl', 'lvcreate']

        engine = check_container_engine(ctx)
        logger.info(f'{engine} is present')
    except Error as e:

    for command in commands:
            logger.info('%s is present' % command)
        except ValueError:
            errors.append('%s binary does not appear to be installed' % command)

    # check for configured+running chronyd or ntp
    if not check_time_sync(ctx):
        errors.append('No time synchronization is active')

    if 'expect_hostname' in ctx and ctx.expect_hostname:
        if get_hostname().lower() != ctx.expect_hostname.lower():
            errors.append('hostname "%s" does not match expected hostname "%s"' % (
                get_hostname(), ctx.expect_hostname))
            logger.info('Hostname "%s" matches what is expected.',

    if errors:
        raise Error('\nERROR: '.join(errors))

    logger.info('Host looks OK')


def get_ssh_vars(ssh_user: str) -> Tuple[int, int, str]:
        s_pwd = pwd.getpwnam(ssh_user)
    except KeyError:
        raise Error('Cannot find uid/gid for ssh-user: %s' % (ssh_user))

    ssh_uid = s_pwd.pw_uid
    ssh_gid = s_pwd.pw_gid
    ssh_dir = os.path.join(s_pwd.pw_dir, '.ssh')
    return ssh_uid, ssh_gid, ssh_dir

def authorize_ssh_key(ssh_pub_key: str, ssh_user: str) -> bool:
    """Authorize the public key for the provided ssh user"""

    def key_in_file(path: str, key: str) -> bool:
        if not os.path.exists(path):
            return False
        with open(path) as f:
            lines = f.readlines()
            for line in lines:
                if line.strip() == key.strip():
                    return True
        return False

    logger.info(f'Adding key to {ssh_user}@localhost authorized_keys...')
    if ssh_pub_key is None or ssh_pub_key.isspace():
        raise Error('Trying to authorize an empty ssh key')

    ssh_pub_key = ssh_pub_key.strip()
    ssh_uid, ssh_gid, ssh_dir = get_ssh_vars(ssh_user)
    if not os.path.exists(ssh_dir):
        makedirs(ssh_dir, ssh_uid, ssh_gid, 0o700)

    auth_keys_file = '%s/authorized_keys' % ssh_dir
    if key_in_file(auth_keys_file, ssh_pub_key):
        logger.info(f'key already in {ssh_user}@localhost authorized_keys...')
        return False

    add_newline = False
    if os.path.exists(auth_keys_file):
        with open(auth_keys_file, 'r') as f:
            f.seek(0, os.SEEK_END)
            if f.tell() > 0:
                f.seek(f.tell() - 1, os.SEEK_SET)  # go to last char
                if f.read() != '\n':
                    add_newline = True

    with open(auth_keys_file, 'a') as f:
        os.fchown(f.fileno(), ssh_uid, ssh_gid)  # just in case we created it
        os.fchmod(f.fileno(), DEFAULT_MODE)  # just in case we created it
        if add_newline:
        f.write(ssh_pub_key + '\n')

    return True

def revoke_ssh_key(key: str, ssh_user: str) -> None:
    """Revoke the public key authorization for the ssh user"""
    ssh_uid, ssh_gid, ssh_dir = get_ssh_vars(ssh_user)
    auth_keys_file = '%s/authorized_keys' % ssh_dir
    deleted = False
    if os.path.exists(auth_keys_file):
        with open(auth_keys_file, 'r') as f:
            lines = f.readlines()
        _, filename = tempfile.mkstemp()
        with open(filename, 'w') as f:
            os.fchown(f.fileno(), ssh_uid, ssh_gid)
            os.fchmod(f.fileno(), DEFAULT_MODE)  # secure access to the keys file
            for line in lines:
                if line.strip() == key.strip():
                    deleted = True

    if deleted:
        shutil.move(filename, auth_keys_file)
        logger.warning('Cannot find the ssh key to be deleted')

def check_ssh_connectivity(ctx: CephadmContext) -> None:

    def cmd_is_available(cmd: str) -> bool:
        if shutil.which(cmd) is None:
            logger.warning(f'Command not found: {cmd}')
            return False
        return True

    if not cmd_is_available('ssh') or not cmd_is_available('ssh-keygen'):
        logger.warning('Cannot check ssh connectivity. Skipping...')

    ssh_priv_key_path = ''
    ssh_pub_key_path = ''
    ssh_signed_cert_path = ''
    if ctx.ssh_private_key and ctx.ssh_public_key:
        # let's use the keys provided by the user
        ssh_priv_key_path = pathify(ctx.ssh_private_key.name)
        ssh_pub_key_path = pathify(ctx.ssh_public_key.name)
    elif ctx.ssh_private_key and ctx.ssh_signed_cert:
        # CA signed keys use case
        ssh_priv_key_path = pathify(ctx.ssh_private_key.name)
        ssh_signed_cert_path = pathify(ctx.ssh_signed_cert.name)
        # no custom keys, let's generate some random keys just for this check
        ssh_priv_key_path = f'/tmp/ssh_key_{uuid.uuid1()}'
        ssh_pub_key_path = f'{ssh_priv_key_path}.pub'
        ssh_key_gen_cmd = ['ssh-keygen', '-q', '-t', 'rsa', '-N', '', '-C', '', '-f', ssh_priv_key_path]
        _, _, code = call(ctx, ssh_key_gen_cmd)
        if code != 0:
            logger.warning('Cannot generate keys to check ssh connectivity.')

    if ssh_signed_cert_path:
        logger.info('Verification for CA signed keys authentication not implemented. Skipping ...')
    elif ssh_pub_key_path:
        logger.info('Verifying ssh connectivity using standard pubkey authentication ...')
        with open(ssh_pub_key_path, 'r') as f:
            key = f.read().strip()
        new_key = authorize_ssh_key(key, ctx.ssh_user)
        ssh_cfg_file_arg = ['-F', pathify(ctx.ssh_config.name)] if ctx.ssh_config else []
        _, _, code = call(ctx, ['ssh', '-o StrictHostKeyChecking=no',
                                *ssh_cfg_file_arg, '-i', ssh_priv_key_path,
                                '-o PasswordAuthentication=no',
                                'sudo echo'])

        # we only remove the key if it's a new one. In case the user has provided
        # some already existing key then we don't alter authorized_keys file
        if new_key:
            revoke_ssh_key(key, ctx.ssh_user)

        pub_key_msg = '- The public key file configured by --ssh-public-key is valid\n' if ctx.ssh_public_key else ''
        prv_key_msg = '- The private key file configured by --ssh-private-key is valid\n' if ctx.ssh_private_key else ''
        ssh_cfg_msg = '- The ssh configuration file configured by --ssh-config is valid\n' if ctx.ssh_config else ''
        err_msg = f"""
** Please verify your user's ssh configuration and make sure:
- User {ctx.ssh_user} must have passwordless sudo access
        if code != 0:
            raise Error(err_msg)

def command_prepare_host(ctx: CephadmContext) -> None:
    logger.info('Verifying podman|docker is present...')
    pkg = None
    except Error as e:
        if not pkg:
            pkg = create_packager(ctx)

    logger.info('Verifying lvm2 is present...')
    if not find_executable('lvcreate'):
        if not pkg:
            pkg = create_packager(ctx)

    logger.info('Verifying time synchronization is in place...')
    if not check_time_sync(ctx):
        if not pkg:
            pkg = create_packager(ctx)
        # check again, and this time try to enable
        # the service
        check_time_sync(ctx, enabler=pkg)

    if 'expect_hostname' in ctx and ctx.expect_hostname and ctx.expect_hostname != get_hostname():
        logger.warning('Adjusting hostname from %s -> %s...' % (get_hostname(), ctx.expect_hostname))
        call_throws(ctx, ['hostname', ctx.expect_hostname])
        with open('/etc/hostname', 'w') as f:
            f.write(ctx.expect_hostname + '\n')

    logger.info('Repeating the final host check...')


class CustomValidation(argparse.Action):

    def _check_name(self, values: str) -> None:
            (daemon_type, daemon_id) = values.split('.', 1)
        except ValueError:
            raise argparse.ArgumentError(self,
                                         'must be of the format .. For example, osd.1 or prometheus.myhost.com')

        daemons = get_supported_daemons()
        if daemon_type not in daemons:
            raise argparse.ArgumentError(self,
                                         'name must declare the type of daemon e.g. '
                                         '{}'.format(', '.join(daemons)))

    def __call__(self, parser: argparse.ArgumentParser, namespace: argparse.Namespace, values: Union[str, Sequence[Any], None],
                 option_string: Optional[str] = None) -> None:
        assert isinstance(values, str)
        if self.dest == 'name':
            setattr(namespace, self.dest, values)


def get_distro():
    # type: () -> Tuple[Optional[str], Optional[str], Optional[str]]
    distro = None
    distro_version = None
    distro_codename = None
    with open('/etc/os-release', 'r') as f:
        for line in f.readlines():
            line = line.strip()
            if '=' not in line or line.startswith('#'):
            (var, val) = line.split('=', 1)
            if val[0] == '"' and val[-1] == '"':
                val = val[1:-1]
            if var == 'ID':
                distro = val.lower()
            elif var == 'VERSION_ID':
                distro_version = val.lower()
            elif var == 'VERSION_CODENAME':
                distro_codename = val.lower()
    return distro, distro_version, distro_codename

class Packager(object):
    def __init__(self, ctx: CephadmContext,
                 stable: Optional[str] = None, version: Optional[str] = None,
                 branch: Optional[str] = None, commit: Optional[str] = None):
        assert \
            (stable and not version and not branch and not commit) or \
            (not stable and version and not branch and not commit) or \
            (not stable and not version and branch) or \
            (not stable and not version and not branch and not commit)
        self.ctx = ctx
        self.stable = stable
        self.version = version
        self.branch = branch
        self.commit = commit

    def validate(self) -> None:
        """Validate parameters before writing any state to disk."""

    def add_repo(self) -> None:
        raise NotImplementedError

    def rm_repo(self) -> None:
        raise NotImplementedError

    def install(self, ls: List[str]) -> None:
        raise NotImplementedError

    def install_podman(self) -> None:
        raise NotImplementedError

    def query_shaman(self, distro: str, distro_version: Any, branch: Optional[str], commit: Optional[str]) -> str:
        # query shaman
        logger.info('Fetching repo metadata from shaman and chacra...')
        shaman_url = 'https://shaman.ceph.com/api/repos/ceph/{branch}/{sha1}/{distro}/{distro_version}/repo/?arch={arch}'.format(
            sha1=commit or 'latest',
            shaman_response = urlopen(shaman_url)
        except HTTPError as err:
            logger.error('repository not found in shaman (might not be available yet)')
            raise Error('%s, failed to fetch %s' % (err, shaman_url))
        chacra_url = ''
            chacra_url = shaman_response.geturl()
            chacra_response = urlopen(chacra_url)
        except HTTPError as err:
            logger.error('repository not found in chacra (might not be available yet)')
            raise Error('%s, failed to fetch %s' % (err, chacra_url))
        return chacra_response.read().decode('utf-8')

    def repo_gpgkey(self) -> Tuple[str, str]:
        if self.ctx.gpg_url:
            return self.ctx.gpg_url, 'manual'
        if self.stable or self.version:
            return 'https://download.ceph.com/keys/release.gpg', 'release'
            return 'https://download.ceph.com/keys/autobuild.gpg', 'autobuild'

    def enable_service(self, service: str) -> None:
        Start and enable the service (typically using systemd).
        call_throws(self.ctx, ['systemctl', 'enable', '--now', service])

class Apt(Packager):
        'ubuntu': 'ubuntu',
        'debian': 'debian',

    def __init__(self, ctx: CephadmContext,
                 stable: Optional[str], version: Optional[str], branch: Optional[str], commit: Optional[str],
                 distro: Optional[str], distro_version: Optional[str], distro_codename: Optional[str]) -> None:
        super(Apt, self).__init__(ctx, stable=stable, version=version,
                                  branch=branch, commit=commit)
        assert distro
        self.ctx = ctx
        self.distro = self.DISTRO_NAMES[distro]
        self.distro_codename = distro_codename
        self.distro_version = distro_version

    def repo_path(self) -> str:
        return '/etc/apt/sources.list.d/ceph.list'

    def add_repo(self) -> None:

        url, name = self.repo_gpgkey()
        logger.info('Installing repo GPG key from %s...' % url)
            response = urlopen(url)
        except HTTPError as err:
            logger.error('failed to fetch GPG repo key from %s: %s' % (
                url, err))
            raise Error('failed to fetch GPG key')
        key = response.read()
        with open('/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name, 'wb') as f:

        if self.version:
            content = 'deb %s/debian-%s/ %s main\n' % (
                self.ctx.repo_url, self.version, self.distro_codename)
        elif self.stable:
            content = 'deb %s/debian-%s/ %s main\n' % (
                self.ctx.repo_url, self.stable, self.distro_codename)
            content = self.query_shaman(self.distro, self.distro_codename, self.branch,

        logger.info('Installing repo file at %s...' % self.repo_path())
        with open(self.repo_path(), 'w') as f:


    def rm_repo(self) -> None:
        for name in ['autobuild', 'release', 'manual']:
            p = '/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name
            if os.path.exists(p):
                logger.info('Removing repo GPG key %s...' % p)
        if os.path.exists(self.repo_path()):
            logger.info('Removing repo at %s...' % self.repo_path())

        if self.distro == 'ubuntu':

    def install(self, ls: List[str]) -> None:
        logger.info('Installing packages %s...' % ls)
        call_throws(self.ctx, ['apt-get', 'install', '-y'] + ls)

    def update(self) -> None:
        logger.info('Updating package list...')
        call_throws(self.ctx, ['apt-get', 'update'])

    def install_podman(self) -> None:
        if self.distro == 'ubuntu':
            logger.info('Setting up repo for podman...')

        logger.info('Attempting podman install...')
        except Error:
            logger.info('Podman did not work.  Falling back to docker...')

    def kubic_repo_url(self) -> str:
        return 'https://download.opensuse.org/repositories/devel:/kubic:/' \
               'libcontainers:/stable/xUbuntu_%s/' % self.distro_version

    def kubic_repo_path(self) -> str:
        return '/etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list'

    def kubic_repo_gpgkey_url(self) -> str:
        return '%s/Release.key' % self.kubic_repo_url()

    def kubic_repo_gpgkey_path(self) -> str:
        return '/etc/apt/trusted.gpg.d/kubic.release.gpg'

    def add_kubic_repo(self) -> None:
        url = self.kubic_repo_gpgkey_url()
        logger.info('Installing repo GPG key from %s...' % url)
            response = urlopen(url)
        except HTTPError as err:
            logger.error('failed to fetch GPG repo key from %s: %s' % (
                url, err))
            raise Error('failed to fetch GPG key')
        key = response.read().decode('utf-8')
        tmp_key = write_tmp(key, 0, 0)
        keyring = self.kubic_repo_gpgkey_path()
        call_throws(self.ctx, ['apt-key', '--keyring', keyring, 'add', tmp_key.name])

        logger.info('Installing repo file at %s...' % self.kubic_repo_path())
        content = 'deb %s /\n' % self.kubic_repo_url()
        with open(self.kubic_repo_path(), 'w') as f:

    def rm_kubic_repo(self) -> None:
        keyring = self.kubic_repo_gpgkey_path()
        if os.path.exists(keyring):
            logger.info('Removing repo GPG key %s...' % keyring)

        p = self.kubic_repo_path()
        if os.path.exists(p):
            logger.info('Removing repo at %s...' % p)

class YumDnf(Packager):
        'centos': ('centos', 'el'),
        'rhel': ('centos', 'el'),
        'scientific': ('centos', 'el'),
        'rocky': ('centos', 'el'),
        'almalinux': ('centos', 'el'),
        'ol': ('centos', 'el'),
        'fedora': ('fedora', 'fc'),
        'mariner': ('mariner', 'cm'),

    def __init__(self, ctx: CephadmContext,
                 stable: Optional[str], version: Optional[str], branch: Optional[str], commit: Optional[str],
                 distro: Optional[str], distro_version: Optional[str]) -> None:
        super(YumDnf, self).__init__(ctx, stable=stable, version=version,
                                     branch=branch, commit=commit)
        assert distro
        assert distro_version
        self.ctx = ctx
        self.major = int(distro_version.split('.')[0])
        self.distro_normalized = self.DISTRO_NAMES[distro][0]
        self.distro_code = self.DISTRO_NAMES[distro][1] + str(self.major)
        if (self.distro_code == 'fc' and self.major >= 30) or \
           (self.distro_code == 'el' and self.major >= 8):
            self.tool = 'dnf'
        elif (self.distro_code == 'cm'):
            self.tool = 'tdnf'
            self.tool = 'yum'

    def custom_repo(self, **kw: Any) -> str:
        Repo files need special care in that a whole line should not be present
        if there is no value for it. Because we were using `format()` we could
        not conditionally add a line for a repo file. So the end result would
        contain a key with a missing value (say if we were passing `None`).

        For example, it could look like::

        [ceph repo]
        name= ceph repo

        Which breaks. This function allows us to conditionally add lines,
        preserving an order and be more careful.

        Previously, and for historical purposes, this is how the template used
        to look::

        custom_repo =

        lines = []

        # by using tuples (vs a dict) we preserve the order of what we want to
        # return, like starting with a [repo name]
        tmpl = (
            ('reponame', '[%s]'),
            ('name', 'name=%s'),
            ('baseurl', 'baseurl=%s'),
            ('enabled', 'enabled=%s'),
            ('gpgcheck', 'gpgcheck=%s'),
            ('_type', 'type=%s'),
            ('gpgkey', 'gpgkey=%s'),
            ('proxy', 'proxy=%s'),
            ('priority', 'priority=%s'),

        for line in tmpl:
            tmpl_key, tmpl_value = line  # key values from tmpl

            # ensure that there is an actual value (not None nor empty string)
            if tmpl_key in kw and kw.get(tmpl_key) not in (None, ''):
                lines.append(tmpl_value % kw.get(tmpl_key))

        return '\n'.join(lines)

    def repo_path(self) -> str:
        return '/etc/yum.repos.d/ceph.repo'

    def repo_baseurl(self) -> str:
        assert self.stable or self.version
        if self.version:
            return '%s/rpm-%s/%s' % (self.ctx.repo_url, self.version,
            return '%s/rpm-%s/%s' % (self.ctx.repo_url, self.stable,

    def validate(self) -> None:
        if self.distro_code.startswith('fc'):
            raise Error('Ceph team does not build Fedora specific packages and therefore cannot add repos for this distro')
        if self.distro_code == 'el7':
            if self.stable and self.stable >= 'pacific':
                raise Error('Ceph does not support pacific or later for this version of this linux distro and therefore cannot add a repo for it')
            if self.version and self.version.split('.')[0] >= '16':
                raise Error('Ceph does not support 16.y.z or later for this version of this linux distro and therefore cannot add a repo for it')

        if self.stable or self.version:
            # we know that yum & dnf require there to be a
            # $base_url/$arch/repodata/repomd.xml so we can test if this URL
            # is gettable in order to validate the inputs
            test_url = self.repo_baseurl() + '/noarch/repodata/repomd.xml'
            except HTTPError as err:
                logger.error('unable to fetch repo metadata: %r', err)
                raise Error('failed to fetch repository metadata. please check'
                            ' the provided parameters are correct and try again')

    def add_repo(self) -> None:
        if self.stable or self.version:
            content = ''
            for n, t in {
                    'Ceph': '$basearch',
                    'Ceph-noarch': 'noarch',
                    'Ceph-source': 'SRPMS'}.items():
                content += '[%s]\n' % (n)
                content += self.custom_repo(
                    name='Ceph %s' % t,
                    baseurl=self.repo_baseurl() + '/' + t,
                content += '\n\n'
            content = self.query_shaman(self.distro_normalized, self.major,

        logger.info('Writing repo to %s...' % self.repo_path())
        with open(self.repo_path(), 'w') as f:

        if self.distro_code.startswith('el'):
            logger.info('Enabling EPEL...')
            call_throws(self.ctx, [self.tool, 'install', '-y', 'epel-release'])

    def rm_repo(self) -> None:
        if os.path.exists(self.repo_path()):

    def install(self, ls: List[str]) -> None:
        logger.info('Installing packages %s...' % ls)
        call_throws(self.ctx, [self.tool, 'install', '-y'] + ls)

    def install_podman(self) -> None:

class Zypper(Packager):

    def __init__(self, ctx: CephadmContext,
                 stable: Optional[str], version: Optional[str], branch: Optional[str], commit: Optional[str],
                 distro: Optional[str], distro_version: Optional[str]) -> None:
        super(Zypper, self).__init__(ctx, stable=stable, version=version,
                                     branch=branch, commit=commit)
        assert distro is not None
        self.ctx = ctx
        self.tool = 'zypper'
        self.distro = 'opensuse'
        self.distro_version = '15.1'
        if 'tumbleweed' not in distro and distro_version is not None:
            self.distro_version = distro_version

    def custom_repo(self, **kw: Any) -> str:
        See YumDnf for format explanation.
        lines = []

        # by using tuples (vs a dict) we preserve the order of what we want to
        # return, like starting with a [repo name]
        tmpl = (
            ('reponame', '[%s]'),
            ('name', 'name=%s'),
            ('baseurl', 'baseurl=%s'),
            ('enabled', 'enabled=%s'),
            ('gpgcheck', 'gpgcheck=%s'),
            ('_type', 'type=%s'),
            ('gpgkey', 'gpgkey=%s'),
            ('proxy', 'proxy=%s'),
            ('priority', 'priority=%s'),

        for line in tmpl:
            tmpl_key, tmpl_value = line  # key values from tmpl

            # ensure that there is an actual value (not None nor empty string)
            if tmpl_key in kw and kw.get(tmpl_key) not in (None, ''):
                lines.append(tmpl_value % kw.get(tmpl_key))

        return '\n'.join(lines)

    def repo_path(self) -> str:
        return '/etc/zypp/repos.d/ceph.repo'

    def repo_baseurl(self) -> str:
        assert self.stable or self.version
        if self.version:
            return '%s/rpm-%s/%s' % (self.ctx.repo_url,
                                     self.stable, self.distro)
            return '%s/rpm-%s/%s' % (self.ctx.repo_url,
                                     self.stable, self.distro)

    def add_repo(self) -> None:
        if self.stable or self.version:
            content = ''
            for n, t in {
                    'Ceph': '$basearch',
                    'Ceph-noarch': 'noarch',
                    'Ceph-source': 'SRPMS'}.items():
                content += '[%s]\n' % (n)
                content += self.custom_repo(
                    name='Ceph %s' % t,
                    baseurl=self.repo_baseurl() + '/' + t,
                content += '\n\n'
            content = self.query_shaman(self.distro, self.distro_version,

        logger.info('Writing repo to %s...' % self.repo_path())
        with open(self.repo_path(), 'w') as f:

    def rm_repo(self) -> None:
        if os.path.exists(self.repo_path()):

    def install(self, ls: List[str]) -> None:
        logger.info('Installing packages %s...' % ls)
        call_throws(self.ctx, [self.tool, 'in', '-y'] + ls)

    def install_podman(self) -> None:

def create_packager(ctx: CephadmContext,
                    stable: Optional[str] = None, version: Optional[str] = None,
                    branch: Optional[str] = None, commit: Optional[str] = None) -> Packager:
    distro, distro_version, distro_codename = get_distro()
    if distro in YumDnf.DISTRO_NAMES:
        return YumDnf(ctx, stable=stable, version=version,
                      branch=branch, commit=commit,
                      distro=distro, distro_version=distro_version)
    elif distro in Apt.DISTRO_NAMES:
        return Apt(ctx, stable=stable, version=version,
                   branch=branch, commit=commit,
                   distro=distro, distro_version=distro_version,
    elif distro in Zypper.DISTRO_NAMES:
        return Zypper(ctx, stable=stable, version=version,
                      branch=branch, commit=commit,
                      distro=distro, distro_version=distro_version)
    raise Error('Distro %s version %s not supported' % (distro, distro_version))

def command_add_repo(ctx: CephadmContext) -> None:
    if ctx.version and ctx.release:
        raise Error('you can specify either --release or --version but not both')
    if not ctx.version and not ctx.release and not ctx.dev and not ctx.dev_commit:
        raise Error('please supply a --release, --version, --dev or --dev-commit argument')
    if ctx.version:
            (x, y, z) = ctx.version.split('.')
        except Exception:
            raise Error('version must be in the form x.y.z (e.g., 15.2.0)')
    if ctx.release:
        # Pacific =/= pacific in this case, set to undercase to avoid confusion
        ctx.release = ctx.release.lower()

    pkg = create_packager(ctx, stable=ctx.release,
    logger.info('Completed adding repo.')

def command_rm_repo(ctx: CephadmContext) -> None:
    pkg = create_packager(ctx)

def command_install(ctx: CephadmContext) -> None:
    pkg = create_packager(ctx)

def command_rescan_disks(ctx: CephadmContext) -> str:

    def probe_hba(scan_path: str) -> None:
        """Tell the adapter to rescan"""
        with open(scan_path, 'w') as f:
            f.write('- - -')

    cmd = ctx.func.__name__.replace('command_', '')
    logger.info(f'{cmd}: starting')
    start = time.time()

    all_scan_files = glob('/sys/class/scsi_host/*/scan')
    scan_files = []
    skipped = []
    for scan_path in all_scan_files:
        adapter_name = os.path.basename(os.path.dirname(scan_path))
        proc_name = read_file([os.path.join(os.path.dirname(scan_path), 'proc_name')])
        if proc_name in ['unknown', 'usb-storage']:
            logger.info(f'{cmd}: rescan skipping incompatible host adapter {adapter_name} : {proc_name}')


    if not scan_files:
        logger.info(f'{cmd}: no compatible HBAs found')
        return 'Ok. No compatible HBAs found'

    responses = async_run(concurrent_tasks(probe_hba, scan_files))
    failures = [r for r in responses if r]

    logger.info(f'{cmd}: Complete. {len(scan_files)} adapters rescanned, {len(failures)} failures, {len(skipped)} skipped')

    elapsed = time.time() - start
    if failures:
        plural = 's' if len(failures) > 1 else ''
        if len(failures) == len(scan_files):
            return f'Failed. All {len(scan_files)} rescan requests failed'
            return f'Partial. {len(scan_files) - len(failures)} successful, {len(failures)} failure{plural} against: {", ".join(failures)}'

    return f'Ok. {len(all_scan_files)} adapters detected: {len(scan_files)} rescanned, {len(skipped)} skipped, {len(failures)} failed ({elapsed:.2f}s)'


def get_ipv4_address(ifname):
    # type: (str) -> str
    def _extract(sock: socket.socket, offset: int) -> str:
        return socket.inet_ntop(
                struct.pack('256s', bytes(ifname[:15], 'utf-8'))

    s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        addr = _extract(s, 35093)  # '0x8915' = SIOCGIFADDR
        dq_mask = _extract(s, 35099)  # 0x891b = SIOCGIFNETMASK
    except OSError:
        # interface does not have an ipv4 address
        return ''

    dec_mask = sum([bin(int(i)).count('1')
                    for i in dq_mask.split('.')])
    return '{}/{}'.format(addr, dec_mask)

def get_ipv6_address(ifname):
    # type: (str) -> str
    if not os.path.exists('/proc/net/if_inet6'):
        return ''

    raw = read_file(['/proc/net/if_inet6'])
    data = raw.splitlines()
    # based on docs @ https://www.tldp.org/HOWTO/Linux+IPv6-HOWTO/ch11s04.html
    # field 0 is ipv6, field 2 is scope
    for iface_setting in data:
        field = iface_setting.split()
        if field[-1] == ifname:
            ipv6_raw = field[0]
            ipv6_fmtd = ':'.join([ipv6_raw[_p:_p + 4] for _p in range(0, len(field[0]), 4)])
            # apply naming rules using ipaddress module
            ipv6 = ipaddress.ip_address(ipv6_fmtd)
            return '{}/{}'.format(str(ipv6), int('0x{}'.format(field[2]), 16))
    return ''

def bytes_to_human(num, mode='decimal'):
    # type: (float, str) -> str
    """Convert a bytes value into it's human-readable form.

    :param num: number, in bytes, to convert
    :param mode: Either decimal (default) or binary to determine divisor
    :returns: string representing the bytes value in a more readable format
    unit_list = ['', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB']
    divisor = 1000.0
    yotta = 'YB'

    if mode == 'binary':
        unit_list = ['', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB']
        divisor = 1024.0
        yotta = 'YiB'

    for unit in unit_list:
        if abs(num) < divisor:
            return '%3.1f%s' % (num, unit)
        num /= divisor
    return '%.1f%s' % (num, yotta)

def read_file(path_list, file_name=''):
    # type: (List[str], str) -> str
    """Returns the content of the first file found within the `path_list`

    :param path_list: list of file paths to search
    :param file_name: optional file_name to be applied to a file path
    :returns: content of the file or 'Unknown'
    for path in path_list:
        if file_name:
            file_path = os.path.join(path, file_name)
            file_path = path
        if os.path.exists(file_path):
            with open(file_path, 'rb') as f:
                    content = f.read().decode('utf-8', 'ignore').strip()
                except OSError:
                    # sysfs may populate the file, but for devices like
                    # virtio reads can fail
                    return 'Unknown'
                    return content
    return 'Unknown'


class Enclosure:
    def __init__(self, enc_id: str, enc_path: str, dev_path: str):
        """External disk enclosure metadata

        :param enc_id: enclosure id (normally a WWN)
        :param enc_path: sysfs path to HBA attached to the enclosure
                         e.g. /sys/class/scsi_generic/sg11/device/enclosure/0:0:9:0
        :param dev_path: sysfs path to the generic scsi device for the enclosure HBA
                         e.g. /sys/class/scsi_generic/sg2
        self._path: str = dev_path
        self._dev_path: str = os.path.join(dev_path, 'device')
        self._enc_path: str = enc_path
        self.ses_paths: List[str] = []
        self.path_count: int = 0
        self.vendor: str = ''
        self.model: str = ''
        self.enc_id: str = enc_id
        self.components: Union[int, str] = 0
        self.device_lookup: Dict[str, str] = {}
        self.device_count: int = 0
        self.slot_map: Dict[str, Dict[str, str]] = {}


    def _probe(self) -> None:
        """Analyse the dev paths to identify enclosure related information"""

        self.vendor = read_file([os.path.join(self._dev_path, 'vendor')])
        self.model = read_file([os.path.join(self._dev_path, 'model')])
        self.components = read_file([os.path.join(self._enc_path, 'components')])
        slot_paths = glob(os.path.join(self._enc_path, '*', 'slot'))
        for slot_path in slot_paths:
            slot = read_file([slot_path])
            serial_path = os.path.join(os.path.dirname(slot_path), 'device', 'vpd_pg80')
            serial = ''
            if os.path.exists(serial_path):
                serial_raw = read_file([serial_path])
                serial = (''.join(char for char in serial_raw if char in string.printable)).strip()
                self.device_lookup[serial] = slot
            slot_dir = os.path.dirname(slot_path)
            self.slot_map[slot] = {
                'status': read_file([os.path.join(slot_dir, 'status')]),
                'fault': read_file([os.path.join(slot_dir, 'fault')]),
                'locate': read_file([os.path.join(slot_dir, 'locate')]),
                'serial': serial,

        self.device_count = len(self.device_lookup)

    def update(self, dev_id: str) -> None:
        """Update an enclosure object with a related sg device name

        :param dev_id (str): device name e.g. sg2
        self.path_count = len(self.ses_paths)

    def _dump(self) -> Dict[str, Any]:
        """Return a dict representation of the object"""
        return {k: v for k, v in self.__dict__.items() if not k.startswith('_')}

    def __str__(self) -> str:
        """Return a formatted json representation of the object as a string"""
        return json.dumps(self._dump(), indent=2)

    def __repr__(self) -> str:
        """Return a json representation of the object as a string"""
        return json.dumps(self._dump())

    def as_json(self) -> Dict[str, Any]:
        """Return a dict representing the object"""
        return self._dump()

class HostFacts():
    _dmi_path_list = ['/sys/class/dmi/id']
    _nic_path_list = ['/sys/class/net']
    _apparmor_path_list = ['/etc/apparmor']
    _disk_vendor_workarounds = {
        '0x1af4': 'Virtio Block Device'
    _excluded_block_devices = ('sr', 'zram', 'dm-', 'loop', 'md')
    _sg_generic_glob = '/sys/class/scsi_generic/*'

    def __init__(self, ctx: CephadmContext):
        self.ctx: CephadmContext = ctx
        self.cpu_model: str = 'Unknown'
        self.sysctl_options: Dict[str, str] = self._populate_sysctl_options()
        self.cpu_count: int = 0
        self.cpu_cores: int = 0
        self.cpu_threads: int = 0
        self.interfaces: Dict[str, Any] = {}

        self._meminfo: List[str] = read_file(['/proc/meminfo']).splitlines()
        self.arch: str = platform.processor()
        self.kernel: str = platform.release()
        self._enclosures = self._discover_enclosures()
        self._block_devices = self._get_block_devs()
        self._device_list = self._get_device_info()

    def _populate_sysctl_options(self) -> Dict[str, str]:
        sysctl_options = {}
        out, _, _ = call_throws(self.ctx, ['sysctl', '-a'], verbosity=CallVerbosity.QUIET_UNLESS_ERROR)
        if out:
            for line in out.splitlines():
                option, value = line.split('=')
                sysctl_options[option.strip()] = value.strip()
        return sysctl_options

    def _discover_enclosures(self) -> Dict[str, Enclosure]:
        """Build a dictionary of discovered scsi enclosures

        Enclosures are detected by walking the scsi generic sysfs hierarchy.
        Any device tree that holds an 'enclosure' subdirectory is interpreted as
        an enclosure. Once identified the enclosure directory is analysis to
        identify key descriptors that will help relate disks to enclosures and
        disks to enclosure slots.

        :return: Dict[str, Enclosure]: a map of enclosure id (hex) to enclosure object
        sg_paths: List[str] = glob(HostFacts._sg_generic_glob)
        enclosures: Dict[str, Enclosure] = {}

        for sg_path in sg_paths:
            enc_path = os.path.join(sg_path, 'device', 'enclosure')
            if os.path.exists(enc_path):
                enc_dirs = glob(os.path.join(enc_path, '*'))
                if len(enc_dirs) != 1:
                    # incomplete enclosure spec - expecting ONE dir in the format
                    # host(adapter):bus:target:lun e.g. 16:0:0:0
                enc_path = enc_dirs[0]
                enc_id = read_file([os.path.join(enc_path, 'id')])
                if enc_id in enclosures:

                enclosure = Enclosure(enc_id, enc_path, sg_path)
                enclosures[enc_id] = enclosure

        return enclosures

    def enclosures(self) -> Dict[str, Dict[str, Any]]:
        """Dump the enclosure objects as dicts"""
        return {k: v._dump() for k, v in self._enclosures.items()}

    def enclosure_count(self) -> int:
        """Return the number of enclosures detected"""
        return len(self._enclosures.keys())

    def _get_cpuinfo(self):
        # type: () -> None
        """Determine cpu information via /proc/cpuinfo"""
        raw = read_file(['/proc/cpuinfo'])
        output = raw.splitlines()
        cpu_set = set()

        for line in output:
            field = [f.strip() for f in line.split(':')]
            if 'model name' in line:
                self.cpu_model = field[1]
            if 'physical id' in line:
            if 'siblings' in line:
                self.cpu_threads = int(field[1].strip())
            if 'cpu cores' in line:
                self.cpu_cores = int(field[1].strip())
        self.cpu_count = len(cpu_set)

    def _get_block_devs(self):
        # type: () -> List[str]
        """Determine the list of block devices by looking at /sys/block"""
        return [dev for dev in os.listdir('/sys/block')
                if not dev.startswith(HostFacts._excluded_block_devices)]

    def operating_system(self):
        # type: () -> str
        """Determine OS version"""
        raw_info = read_file(['/etc/os-release'])
        os_release = raw_info.splitlines()
        rel_str = 'Unknown'
        rel_dict = dict()

        for line in os_release:
            if '=' in line:
                var_name, var_value = line.split('=')
                rel_dict[var_name] = var_value.strip('"')

        # Would normally use PRETTY_NAME, but NAME and VERSION are more
        # consistent
        if all(_v in rel_dict for _v in ['NAME', 'VERSION']):
            rel_str = '{} {}'.format(rel_dict['NAME'], rel_dict['VERSION'])
        return rel_str

    def hostname(self):
        # type: () -> str
        """Return the hostname"""
        return platform.node()

    def shortname(self) -> str:
        return platform.node().split('.', 1)[0]

    def fqdn(self) -> str:
        return get_fqdn()

    def subscribed(self):
        # type: () -> str
        """Highlevel check to see if the host is subscribed to receive updates/support"""
        def _red_hat():
            # type: () -> str
            # RHEL 7 and RHEL 8
            entitlements_dir = '/etc/pki/entitlement'
            if os.path.exists(entitlements_dir):
                pems = glob('{}/*.pem'.format(entitlements_dir))
                if len(pems) >= 2:
                    return 'Yes'

            return 'No'

        os_name = self.operating_system
        if os_name.upper().startswith('RED HAT'):
            return _red_hat()

        return 'Unknown'

    def hdd_count(self):
        # type: () -> int
        """Return a count of HDDs (spinners)"""
        return len(self.hdd_list)

    def _get_capacity(self, dev):
        # type: (str) -> int
        """Determine the size of a given device

        The kernel always bases device size calculations based on a 512 byte
        sector. For more information see
        size_path = os.path.join('/sys/block', dev, 'size')
        size_blocks = int(read_file([size_path]))
        return size_blocks * 512

    def _get_capacity_by_type(self, disk_type='hdd'):
        # type: (str) -> int
        """Return the total capacity of a category of device (flash or hdd)"""
        capacity: int = 0
        for dev in self._device_list:
            if dev['disk_type'] == disk_type:
                disk_capacity = cast(int, dev.get('disk_size_bytes', 0))
                capacity += disk_capacity
        return capacity

    def _get_device_info(self):
        # type: () -> List[Dict[str, object]]
        """Return a 'pretty' name list for each unique device in the `dev_list`"""
        disk_list = list()

        # serial_num_lookup is a dict of serial number -> List of devices with that serial number
        serial_num_lookup: Dict[str, List[str]] = {}

        # make a map of devname -> disk path. this path name may indicate the physical slot
        # of a drive (phyXX)
        disk_path_map: Dict[str, str] = {}
        for path in glob('/dev/disk/by-path/*'):
            tgt_raw = Path(path).resolve()
            tgt = os.path.basename(str(tgt_raw))
            disk_path_map[tgt] = path

        # make a map of holder (dm-XX) -> full mpath name
        dm_device_map: Dict[str, str] = {}
        for mpath in glob('/dev/mapper/mpath*'):
            tgt_raw = Path(mpath).resolve()
            tgt = os.path.basename(str(tgt_raw))
            dm_device_map[tgt] = mpath

        # main loop to process all eligible block devices
        for dev in self._block_devices:
            enclosure_id = ''
            enclosure_slot = ''
            scsi_addr = ''
            mpath = ''

            disk_model = read_file(['/sys/block/{}/device/model'.format(dev)]).strip()
            disk_rev = read_file(['/sys/block/{}/device/rev'.format(dev)]).strip()
            disk_wwid = read_file(['/sys/block/{}/device/wwid'.format(dev)]).strip()
            vendor = read_file(['/sys/block/{}/device/vendor'.format(dev)]).strip()
            rotational = read_file(['/sys/block/{}/queue/rotational'.format(dev)])
            holders_raw = glob('/sys/block/{}/holders/*'.format(dev))
            if len(holders_raw) == 1:
                # mpath will have 1 holder entry
                holder = os.path.basename(holders_raw[0])
                mpath = dm_device_map.get(holder, '')

            disk_type = 'hdd' if rotational == '1' else 'flash'
            scsi_addr_path = glob('/sys/block/{}/device/bsg/*'.format(dev))
            if len(scsi_addr_path) == 1:
                scsi_addr = os.path.basename(scsi_addr_path[0])

            # vpd_pg80 isn't guaranteed (libvirt, vmware for example)
            serial_raw = read_file(['/sys/block/{}/device/vpd_pg80'.format(dev)])
            serial = (''.join(i for i in serial_raw if i in string.printable)).strip()
            if serial.lower() == 'unknown':
                serial = ''
                if serial in serial_num_lookup:
                    serial_num_lookup[serial] = [dev]
                for enc_id, enclosure in self._enclosures.items():
                    if serial in enclosure.device_lookup.keys():
                        enclosure_id = enc_id
                        enclosure_slot = enclosure.device_lookup[serial]

            disk_vendor = HostFacts._disk_vendor_workarounds.get(vendor, vendor)
            disk_size_bytes = self._get_capacity(dev)
                'description': '{} {} ({})'.format(disk_vendor, disk_model, bytes_to_human(disk_size_bytes)),
                'vendor': disk_vendor,
                'model': disk_model,
                'rev': disk_rev,
                'wwid': disk_wwid,
                'dev_name': dev,
                'disk_size_bytes': disk_size_bytes,
                'disk_type': disk_type,
                'serial': serial,
                'alt_dev_name': '',
                'scsi_addr': scsi_addr,
                'enclosure_id': enclosure_id,
                'enclosure_slot': enclosure_slot,
                'path_id': disk_path_map.get(dev, ''),
                'mpath': mpath,

        # process the devices to drop duplicate physical devs based on matching
        # the unique serial number
        disk_list_unique: List[Dict[str, Any]] = []
        serials_seen: List[str] = []
        for dev in disk_list:
            serial = str(dev['serial'])
            if serial:
                if serial in serials_seen:
                    devs = serial_num_lookup[serial].copy()
                    dev['alt_dev_name'] = ','.join(devs)

        return disk_list_unique

    def hdd_list(self):
        # type: () -> List[Dict[str, object]]
        """Return a list of devices that are HDDs (spinners)"""
        return [dev for dev in self._device_list if dev['disk_type'] == 'hdd']

    def flash_list(self):
        # type: () -> List[Dict[str, object]]
        """Return a list of devices that are flash based (SSD, NVMe)"""
        return [dev for dev in self._device_list if dev['disk_type'] == 'flash']

    def hdd_capacity_bytes(self):
        # type: () -> int
        """Return the total capacity for all HDD devices (bytes)"""
        return self._get_capacity_by_type(disk_type='hdd')

    def hdd_capacity(self):
        # type: () -> str
        """Return the total capacity for all HDD devices (human readable format)"""
        return bytes_to_human(self.hdd_capacity_bytes)

    def cpu_load(self):
        # type: () -> Dict[str, float]
        """Return the cpu load average data for the host"""
        raw = read_file(['/proc/loadavg']).strip()
        data = raw.split()
        return {
            '1min': float(data[0]),
            '5min': float(data[1]),
            '15min': float(data[2]),

    def flash_count(self):
        # type: () -> int
        """Return the number of flash devices in the system (SSD, NVMe)"""
        return len(self.flash_list)

    def flash_capacity_bytes(self):
        # type: () -> int
        """Return the total capacity for all flash devices (bytes)"""
        return self._get_capacity_by_type(disk_type='flash')

    def flash_capacity(self):
        # type: () -> str
        """Return the total capacity for all Flash devices (human readable format)"""
        return bytes_to_human(self.flash_capacity_bytes)

    def _process_nics(self):
        # type: () -> None
        """Look at the NIC devices and extract network related metadata"""
        # from https://github.com/torvalds/linux/blob/master/include/uapi/linux/if_arp.h
        hw_lookup = {
            '1': 'ethernet',
            '32': 'infiniband',
            '772': 'loopback',

        for nic_path in HostFacts._nic_path_list:
            if not os.path.exists(nic_path):
            for iface in os.listdir(nic_path):

                if os.path.exists(os.path.join(nic_path, iface, 'bridge')):
                    nic_type = 'bridge'
                elif os.path.exists(os.path.join(nic_path, iface, 'bonding')):
                    nic_type = 'bonding'
                    nic_type = hw_lookup.get(read_file([os.path.join(nic_path, iface, 'type')]), 'Unknown')

                if nic_type == 'loopback':  # skip loopback devices

                lower_devs_list = [os.path.basename(link.replace('lower_', '')) for link in glob(os.path.join(nic_path, iface, 'lower_*'))]
                upper_devs_list = [os.path.basename(link.replace('upper_', '')) for link in glob(os.path.join(nic_path, iface, 'upper_*'))]

                    mtu = int(read_file([os.path.join(nic_path, iface, 'mtu')]))
                except ValueError:
                    mtu = 0

                operstate = read_file([os.path.join(nic_path, iface, 'operstate')])
                    speed = int(read_file([os.path.join(nic_path, iface, 'speed')]))
                except (OSError, ValueError):
                    # OSError : device doesn't support the ethtool get_link_ksettings
                    # ValueError : raised when the read fails, and returns Unknown
                    # Either way, we show a -1 when speed isn't available
                    speed = -1

                dev_link = os.path.join(nic_path, iface, 'device')
                if os.path.exists(dev_link):
                    iftype = 'physical'
                    driver_path = os.path.join(dev_link, 'driver')
                    if os.path.exists(driver_path):
                        driver = os.path.basename(os.path.realpath(driver_path))
                        driver = 'Unknown'

                    iftype = 'logical'
                    driver = ''

                self.interfaces[iface] = {
                    'mtu': mtu,
                    'upper_devs_list': upper_devs_list,
                    'lower_devs_list': lower_devs_list,
                    'operstate': operstate,
                    'iftype': iftype,
                    'nic_type': nic_type,
                    'driver': driver,
                    'speed': speed,
                    'ipv4_address': get_ipv4_address(iface),
                    'ipv6_address': get_ipv6_address(iface),

    def nic_count(self):
        # type: () -> int
        """Return a total count of all physical NICs detected in the host"""
        phys_devs = []
        for iface in self.interfaces:
            if self.interfaces[iface]['iftype'] == 'physical':
        return len(phys_devs)

    def _get_mem_data(self, field_name):
        # type: (str) -> int
        for line in self._meminfo:
            if line.startswith(field_name):
                _d = line.split()
                return int(_d[1])
        return 0

    def memory_total_kb(self):
        # type: () -> int
        """Determine the memory installed (kb)"""
        return self._get_mem_data('MemTotal')

    def memory_free_kb(self):
        # type: () -> int
        """Determine the memory free (not cache, immediately usable)"""
        return self._get_mem_data('MemFree')

    def memory_available_kb(self):
        # type: () -> int
        """Determine the memory available to new applications without swapping"""
        return self._get_mem_data('MemAvailable')

    def vendor(self):
        # type: () -> str
        """Determine server vendor from DMI data in sysfs"""
        return read_file(HostFacts._dmi_path_list, 'sys_vendor')

    def model(self):
        # type: () -> str
        """Determine server model information from DMI data in sysfs"""
        family = read_file(HostFacts._dmi_path_list, 'product_family')
        product = read_file(HostFacts._dmi_path_list, 'product_name')
        if family == 'Unknown' and product:
            return '{}'.format(product)

        return '{} ({})'.format(family, product)

    def bios_version(self):
        # type: () -> str
        """Determine server BIOS version from  DMI data in sysfs"""
        return read_file(HostFacts._dmi_path_list, 'bios_version')

    def bios_date(self):
        # type: () -> str
        """Determine server BIOS date from  DMI data in sysfs"""
        return read_file(HostFacts._dmi_path_list, 'bios_date')

    def chassis_serial(self):
        # type: () -> str
        """Determine chassis serial number from DMI data in sysfs"""
        return read_file(HostFacts._dmi_path_list, 'chassis_serial')

    def board_serial(self):
        # type: () -> str
        """Determine mainboard serial number from DMI data in sysfs"""
        return read_file(HostFacts._dmi_path_list, 'board_serial')

    def product_serial(self):
        # type: () -> str
        """Determine server's serial number from DMI data in sysfs"""
        return read_file(HostFacts._dmi_path_list, 'product_serial')

    def timestamp(self):
        # type: () -> float
        """Return the current time as Epoch seconds"""
        return time.time()

    def system_uptime(self):
        # type: () -> float
        """Return the system uptime (in secs)"""
        raw_time = read_file(['/proc/uptime'])
        up_secs, _ = raw_time.split()
        return float(up_secs)

    def kernel_security(self):
        # type: () -> Dict[str, str]
        """Determine the security features enabled in the kernel - SELinux, AppArmor"""
        def _fetch_selinux() -> Dict[str, str]:
            """Get the selinux status"""
            security = {}
                out, err, code = call(self.ctx, ['sestatus'],
                security['type'] = 'SELinux'
                status, mode, policy = '', '', ''
                for line in out.split('\n'):
                    if line.startswith('SELinux status:'):
                        k, v = line.split(':')
                        status = v.strip()
                    elif line.startswith('Current mode:'):
                        k, v = line.split(':')
                        mode = v.strip()
                    elif line.startswith('Loaded policy name:'):
                        k, v = line.split(':')
                        policy = v.strip()
                if status == 'disabled':
                    security['description'] = 'SELinux: Disabled'
                    security['description'] = 'SELinux: Enabled({}, {})'.format(mode, policy)
            except Exception as e:
                logger.info('unable to get selinux status: %s' % e)
            return security

        def _fetch_apparmor() -> Dict[str, str]:
            """Read the apparmor profiles directly, returning an overview of AppArmor status"""
            security = {}
            for apparmor_path in HostFacts._apparmor_path_list:
                if os.path.exists(apparmor_path):
                    security['type'] = 'AppArmor'
                    security['description'] = 'AppArmor: Enabled'
                        profiles = read_file(['/sys/kernel/security/apparmor/profiles'])
                        if len(profiles) == 0:
                            return {}
                    except OSError:
                        summary = {}  # type: Dict[str, int]
                        for line in profiles.split('\n'):
                            item, mode = line.split(' ')
                            mode = mode.strip('()')
                            if mode in summary:
                                summary[mode] += 1
                                summary[mode] = 0
                        summary_str = ','.join(['{} {}'.format(v, k) for k, v in summary.items()])
                        security = {**security, **summary}  # type: ignore
                        security['description'] += '({})'.format(summary_str)

                    return security
            return {}

        ret = {}
        if os.path.exists('/sys/kernel/security/lsm'):
            lsm = read_file(['/sys/kernel/security/lsm']).strip()
            if 'selinux' in lsm:
                ret = _fetch_selinux()
            elif 'apparmor' in lsm:
                ret = _fetch_apparmor()
                return {
                    'type': 'Unknown',
                    'description': 'Linux Security Module framework is active, but is not using SELinux or AppArmor'

        if ret:
            return ret

        return {
            'type': 'None',
            'description': 'Linux Security Module framework is not available'

    def selinux_enabled(self) -> bool:
        return (self.kernel_security['type'] == 'SELinux') and \
               (self.kernel_security['description'] != 'SELinux: Disabled')

    def kernel_parameters(self):
        # type: () -> Dict[str, str]
        """Get kernel parameters required/used in Ceph clusters"""

        k_param = {}
        out, _, _ = call_throws(self.ctx, ['sysctl', '-a'], verbosity=CallVerbosity.SILENT)
        if out:
            param_list = out.split('\n')
            param_dict = {param.split(' = ')[0]: param.split(' = ')[-1] for param in param_list}

            # return only desired parameters
            if 'net.ipv4.ip_nonlocal_bind' in param_dict:
                k_param['net.ipv4.ip_nonlocal_bind'] = param_dict['net.ipv4.ip_nonlocal_bind']

        return k_param

    def _process_net_data(tcp_file: str, protocol: str = 'tcp') -> List[int]:
        listening_ports = []
        # Connections state documentation
        # tcp - https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/net/tcp_states.h
        # udp - uses 07 (TCP_CLOSE or UNCONN, since udp is stateless. test with netcat -ul )
        listening_state = {
            'tcp': '0A',
            'udp': '07'

        if protocol not in listening_state.keys():
            return []

        if os.path.exists(tcp_file):
            with open(tcp_file) as f:
                tcp_data = f.readlines()[1:]

            for con in tcp_data:
                con_info = con.strip().split()
                if con_info[3] == listening_state[protocol]:
                    local_port = int(con_info[1].split(':')[1], 16)

        return listening_ports

    def tcp_ports_used(self) -> List[int]:
        return HostFacts._process_net_data('/proc/net/tcp')

    def tcp6_ports_used(self) -> List[int]:
        return HostFacts._process_net_data('/proc/net/tcp6')

    def udp_ports_used(self) -> List[int]:
        return HostFacts._process_net_data('/proc/net/udp', 'udp')

    def udp6_ports_used(self) -> List[int]:
        return HostFacts._process_net_data('/proc/net/udp6', 'udp')

    def dump(self):
        # type: () -> str
        """Return the attributes of this HostFacts object as json"""
        data = {
            k: getattr(self, k) for k in dir(self)
            if not k.startswith('_')
            and isinstance(getattr(self, k), (float, int, str, list, dict, tuple))
        return json.dumps(data, indent=2, sort_keys=True)


def command_gather_facts(ctx: CephadmContext) -> None:
    """gather_facts is intended to provide host related metadata to the caller"""
    host = HostFacts(ctx)


def systemd_target_state(ctx: CephadmContext, target_name: str, subsystem: str = 'ceph') -> bool:
    return os.path.exists(

def target_exists(ctx: CephadmContext) -> bool:
    return os.path.exists(ctx.unit_dir + '/ceph.target')

def command_maintenance(ctx: CephadmContext) -> str:
    if not ctx.fsid:
        raise Error('failed - must pass --fsid to specify cluster')

    target = f'ceph-{ctx.fsid}.target'

    if ctx.maintenance_action.lower() == 'enter':
        logger.info('Requested to place host into maintenance')
        if systemd_target_state(ctx, target):
            _out, _err, code = call(ctx,
                                    ['systemctl', 'disable', target],
            if code:
                logger.error(f'Failed to disable the {target} target')
                return 'failed - to disable the target'
                # stopping a target waits by default
                _out, _err, code = call(ctx,
                                        ['systemctl', 'stop', target],
                if code:
                    logger.error(f'Failed to stop the {target} target')
                    return 'failed - to disable the target'
                    return f'success - systemd target {target} disabled'

            return 'skipped - target already disabled'

        logger.info('Requested to exit maintenance state')
        # if we've never deployed a daemon on this host there will be no systemd
        # target to disable so attempting a disable will fail. We still need to
        # return success here or host will be permanently stuck in maintenance mode
        # as no daemons can be deployed so no systemd target will ever exist to disable.
        if not target_exists(ctx):
            return 'skipped - systemd target not present on this host. Host removed from maintenance mode.'
        # exit maintenance request
        if not systemd_target_state(ctx, target):
            _out, _err, code = call(ctx,
                                    ['systemctl', 'enable', target],
            if code:
                logger.error(f'Failed to enable the {target} target')
                return 'failed - unable to enable the target'
                # starting a target waits by default
                _out, _err, code = call(ctx,
                                        ['systemctl', 'start', target],
                if code:
                    logger.error(f'Failed to start the {target} target')
                    return 'failed - unable to start the target'
                    return f'success - systemd target {target} enabled and started'
        return f'success - systemd target {target} enabled and started'


class ArgumentFacade:
    def __init__(self) -> None:
        self.defaults: Dict[str, Any] = {}

    def add_argument(self, *args: Any, **kwargs: Any) -> None:
        if not args:
            raise ValueError('expected at least one argument')
        name = args[0]
        if not name.startswith('--'):
            raise ValueError(f'expected long option, got: {name!r}')
        name = name[2:].replace('-', '_')
        value = kwargs.pop('default', None)
        self.defaults[name] = value

    def apply(self, ctx: CephadmContext) -> None:
        for key, value in self.defaults.items():
            setattr(ctx, key, value)

def _add_deploy_parser_args(
    parser_deploy: Union[argparse.ArgumentParser, ArgumentFacade],
) -> None:
        '--config', '-c',
        help='config file for new daemon')
        help='Additional configuration information in JSON format')
        help='keyring for new daemon')
        help='key for new daemon')
        help='OSD uuid, if creating an OSD container')
        help='Do not configure firewalld')
        help='List of tcp ports to open in the host firewall')
        help='JSON dict mapping ports to IPs they need to be bound on'
        help='Reconfigure a previously deployed daemon')
        help='Allow SYS_PTRACE on daemon container')
        help='Container memory request/target'
        help='Container memory hard limit'
        help='JSON dict of additional metadata'
        help='Additional container arguments to apply to daemon'
        help='Additional entrypoint arguments to apply to deamon'

def _get_parser():
    # type: () -> argparse.ArgumentParser
    parser = argparse.ArgumentParser(
        description='Bootstrap Ceph daemons with systemd and containers.',
        help='container image. Can also be set via the "CEPHADM_IMAGE" '
        'env var')
        help='use docker instead of podman')
        help='base directory for daemon data')
        help='base directory for daemon logs')
        help='location of logrotate configuration files')
        help='location of sysctl configuration files')
        help='base directory for systemd units')
        '--verbose', '-v',
        help='Show debug-level log messages')
        help='timeout in seconds')
        help='max number of retries')
        '--env', '-e',
        help='set environment variable')
        default=not CONTAINER_INIT,
        help='Do not run podman/docker with `--init`')
        help='Do not run containers with --cgroups=split (currently only relevant when using podman)')

    subparsers = parser.add_subparsers(help='sub-command')

    parser_version = subparsers.add_parser(
        'version', help='get cephadm version')

    parser_pull = subparsers.add_parser(
        'pull', help='pull the default container image')

    parser_inspect_image = subparsers.add_parser(
        'inspect-image', help='inspect local container image')

    parser_ls = subparsers.add_parser(
        'ls', help='list daemon instances on this host')
        help='Do not include daemon status')
        help='base directory for legacy daemon data')

    parser_list_networks = subparsers.add_parser(
        'list-networks', help='list IP networks')

    parser_adopt = subparsers.add_parser(
        'adopt', help='adopt daemon deployed with a different tool')
        '--name', '-n',
        help='daemon name (type.id)')
        help='deployment style (legacy, ...)')
        help='cluster name')
        help='base directory for legacy daemon data')
        help='Additional configuration information in JSON format')
        help='Do not configure firewalld')
        help='do not pull the default image before adopting')
        help='start newly adopted daemon, even if it was not running previously')

    parser_rm_daemon = subparsers.add_parser(
        'rm-daemon', help='remove daemon instance')
        '--name', '-n',
        help='daemon name (type.id)')
        help='List of tcp ports to close in the host firewall')
        help='cluster FSID')
        help='proceed, even though this may destroy valuable data')
        help='delete valuable daemon data instead of making a backup')

    parser_rm_cluster = subparsers.add_parser(
        'rm-cluster', help='remove all daemons for a cluster')
        help='cluster FSID')
        help='proceed, even though this may destroy valuable data')
        help='do not remove log files')
        help='zap OSD devices for this cluster')

    parser_run = subparsers.add_parser(
        'run', help='run a ceph daemon, in a container, in the foreground')
        '--name', '-n',
        help='daemon name (type.id)')
        help='cluster FSID')

    parser_shell = subparsers.add_parser(
        'shell', help='run an interactive shell inside a daemon container')
        help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder')
        help='cluster FSID')
        '--name', '-n',
        help='daemon name (type.id)')
        '--config', '-c',
        help='ceph.conf to pass through to the container')
        '--keyring', '-k',
        help='ceph.keyring to pass through to the container')
        '--mount', '-m',
        help=('mount a file or directory in the container. '
              'Support multiple mounts. '
              'ie: `--mount /foo /bar:/bar`. '
              'When no destination is passed, default is /mnt'),
        '--env', '-e',
        help='set environment variable')
        '--volume', '-v',
        help='set environment variable')
        'command', nargs=argparse.REMAINDER,
        help='command (optional)')
        help='dont pass /etc/hosts through to the container')
        help='print, but do not execute, the container command to start the shell')

    parser_enter = subparsers.add_parser(
        'enter', help='run an interactive shell inside a running daemon container')
        help='cluster FSID')
        '--name', '-n',
        help='daemon name (type.id)')
        'command', nargs=argparse.REMAINDER,

    parser_ceph_volume = subparsers.add_parser(
        'ceph-volume', help='run ceph-volume inside a container')
        help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder')
        help='cluster FSID')
        help='JSON file with config and (client.bootstrap-osd) key')
        '--config', '-c',
        help='ceph conf file')
        '--keyring', '-k',
        help='ceph.keyring to pass through to the container')
        'command', nargs=argparse.REMAINDER,

    parser_zap_osds = subparsers.add_parser(
        'zap-osds', help='zap all OSDs associated with a particular fsid')
        help='cluster FSID')
        help='proceed, even though this may destroy valuable data')

    parser_unit = subparsers.add_parser(
        'unit', help="operate on the daemon's systemd unit")
        help='systemd command (start, stop, restart, enable, disable, ...)')
        help='cluster FSID')
        '--name', '-n',
        help='daemon name (type.id)')

    parser_logs = subparsers.add_parser(
        'logs', help='print journald logs for a daemon container')
        help='cluster FSID')
        '--name', '-n',
        help='daemon name (type.id)')
        'command', nargs='*',
        help='additional journalctl args')

    parser_bootstrap = subparsers.add_parser(
        'bootstrap', help='bootstrap a cluster (mon + mgr daemons)')
        '--config', '-c',
        help='ceph conf file to incorporate')
        help='mon id (default: local hostname)')
    group = parser_bootstrap.add_mutually_exclusive_group()
        help='mon IPs (e.g., [v2:localipaddr:3300,v1:localipaddr:6789])')
        help='mon IP')
        help='mgr id (default: randomly generated)')
        help='cluster FSID')
        help='directory to write config, keyring, and pub key files')
        help='location to write keyring file with new cluster admin and mon keys')
        help='location to write conf file to connect to new cluster')
        help="location to write the cluster's public SSH key")
        help='do not create admin label for ceph.conf and client.admin keyring distribution')
        help='skip setup of ssh key on local host')
        help='Initial user for the dashboard')
        help='Initial password for the initial dashboard user')
        help='Port number used to connect with dashboard using SSL')
        help='Dashboard key')
        help='Dashboard certificate')

        help='SSH config')
        help='SSH private key')
        help='SSH public key')
        help='Signed cert for setups using CA signed SSH keys')
        help='set user for SSHing to cluster hosts, passwordless sudo will be needed for non-root users')
        help='set mon public_network based on bootstrap mon ip')
        help='do not enable the Ceph Dashboard')
        help='stop forced dashboard password change')
        help='do not assimilate and minimize the config file')
        help='do not verify that mon IP is pingable')
        help='do not pull the default image before bootstrapping')
        help='Do not configure firewalld')
        help='allow overwrite of existing --output-* config/keyring/ssh files')
        help='Delete cluster files in case of a failed installation')
        help='allow hostname that is fully-qualified (contains ".")')
        help="allow bootstrap of ceph that doesn't match this version of cephadm")
        help='Do not prepare host')
        help='Set mon and mgr service to `unmanaged`, Do not create the crash service')
        help='Do not automatically provision monitoring stack (prometheus, grafana, alertmanager, node-exporter)')
        help='Automatically provision centralized logging (promtail, loki)')
        help='Apply cluster spec after bootstrap (copy ssh key, add hosts and apply services)')
        help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder')

        help='url for custom registry')
        help='username for custom registry')
        help='password for custom registry')
        help='json file with custom registry login info (URL, Username, Password)')
        help='subnet to use for cluster replication, recovery and heartbeats (in CIDR notation network/mask)')
        help='adjust configuration defaults to suit a single-host cluster')
        help='configure cluster to log to traditional log files in /var/log/ceph/$fsid')

    parser_deploy = subparsers.add_parser(
        'deploy', help='deploy a daemon')
        help='daemon name (type.id)')
        help='cluster FSID')

    parser_orch = subparsers.add_parser(
    subparsers_orch = parser_orch.add_subparsers(
        title='Orchestrator Driven Commands',
        description='Commands that are typically only run by cephadm mgr module',

    parser_deploy_from = subparsers_orch.add_parser(
        'deploy', help='deploy a daemon')
    # currently cephadm mgr module passes an fsid option on the CLI too
    # TODO: remove this and always source fsid from the JSON?
        help='cluster FSID')
        help='Configuration input source file',

    parser_check_host = subparsers.add_parser(
        'check-host', help='check host configuration')
        help='Check that hostname matches an expected value')

    parser_prepare_host = subparsers.add_parser(
        'prepare-host', help='prepare a host for cephadm use')
        help='Set hostname')

    parser_add_repo = subparsers.add_parser(
        'add-repo', help='configure package repository')
        help='use latest version of a named release (e.g., {})'.format(LATEST_STABLE_RELEASE))
        help='use specific upstream version (x.y.z)')
        help='use specified bleeding edge build from git branch or tag')
        help='use specified bleeding edge build from git commit')
        help='specify alternative GPG key location')
        help='specify alternative repo location')
    # TODO: proxy?

    parser_rm_repo = subparsers.add_parser(
        'rm-repo', help='remove package repository configuration')

    parser_install = subparsers.add_parser(
        'install', help='install ceph package(s)')
        'packages', nargs='*',

    parser_registry_login = subparsers.add_parser(
        'registry-login', help='log host into authenticated registry')
        help='url for custom registry')
        help='username for custom registry')
        help='password for custom registry')
        help='json file with custom registry login info (URL, Username, Password)')
        help='cluster FSID')

    parser_gather_facts = subparsers.add_parser(
        'gather-facts', help='gather and return host related information (JSON format)')

    parser_maintenance = subparsers.add_parser(
        'host-maintenance', help='Manage the maintenance state of a host')
        help='cluster FSID')
        choices=['enter', 'exit'],
        help='Maintenance action - enter maintenance, or exit maintenance')

    parser_agent = subparsers.add_parser(
        'agent', help='start cephadm agent')
        help='cluster FSID')
        help='daemon id for agent')

    parser_disk_rescan = subparsers.add_parser(
        'disk-rescan', help='rescan all HBAs to detect new/removed devices')

    return parser

def _parse_args(av: List[str]) -> argparse.Namespace:
    parser = _get_parser()

    args = parser.parse_args(av)
    if 'command' in args and args.command and args.command[0] == '--':

    # workaround argparse to deprecate the subparser `--container-init` flag
    # container_init and no_container_init must always be mutually exclusive
    container_init_args = ('--container-init', '--no-container-init')
    if set(container_init_args).issubset(av):
        parser.error('argument %s: not allowed with argument %s' % (container_init_args))
    elif '--container-init' in av:
        args.no_container_init = not args.container_init
        args.container_init = not args.no_container_init
    assert args.container_init is not args.no_container_init

    return args

def cephadm_init_ctx(args: List[str]) -> CephadmContext:
    ctx = CephadmContext()
    return ctx

def cephadm_init_logging(ctx: CephadmContext, args: List[str]) -> None:
    """Configure the logging for cephadm as well as updating the system
    to have the expected log dir and logrotate configuration.
    logging.addLevelName(QUIET_LOG_LEVEL, 'QUIET')
    global logger
    if not os.path.exists(LOG_DIR):
    operations = ['bootstrap', 'rm-cluster']
    if any(op in args for op in operations):

    logger = logging.getLogger()

    if not os.path.exists(ctx.logrotate_dir + '/cephadm'):
        with open(ctx.logrotate_dir + '/cephadm', 'w') as f:
            f.write("""# created by cephadm
/var/log/ceph/cephadm.log {
    rotate 7
    su root root

    if ctx.verbose:
        for handler in logger.handlers:
            if handler.name in ['console', 'log_file', 'console_stdout']:
    logger.debug('%s\ncephadm %s' % ('-' * 80, args))

def cephadm_require_root() -> None:
    """Exit if the process is not running as root."""
    if os.geteuid() != 0:
        sys.stderr.write('ERROR: cephadm should be run as root\n')

def main() -> None:
    av: List[str] = []
    av = sys.argv[1:]

    ctx = cephadm_init_ctx(av)
    if not ctx.has_function():
        sys.stderr.write('No command specified; pass -h or --help for usage\n')

    if ctx.has_function() and getattr(ctx.func, '_execute_early', False):
        except Error as e:
            if ctx.verbose:
            logger.error('ERROR: %s' % e)

    cephadm_init_logging(ctx, av)
        # podman or docker?
        ctx.container_engine = find_container_engine(ctx)
        if ctx.func not in \
        # command handler
        r = ctx.func(ctx)
    except (Error, ClusterAlreadyExists) as e:
        if ctx.verbose:
        logger.error('ERROR: %s' % e)
    if not r:
        r = 0

if __name__ == '__main__':
