This commit is contained in:
Zack Meier
2026-04-15 15:45:50 -05:00
commit 1d304511b8
613 changed files with 140998 additions and 0 deletions
@@ -0,0 +1,109 @@
import json
import sys
import certifi
import urllib3
import time
import pymsteams
from datetime import date, timedelta
sys.path.insert(0, './classes/')
import cohesityAPI as cohesity
import serviceNowAPI as serviceNow
debug=0
failureLimit=1
iteration=0
errorCount = 0
itdTeamsMessage = pymsteams.connectorcard("https://ndgov.webhook.office.com/webhookb2/aad89030-8f69-4a4d-a853-c882cbb01a10@2dea0464-da51-4a88-bae2-b3db94bc0c54/IncomingWebhook/d3c07921419c4920810d0c32558c05e3/edcc1502-1ff0-4c3e-9fe2-1ce3f9f7981a")
try:
# Create the API object for Cohesity
mdn = cohesity.API('itdmdndpc01.nd.gov')
mdnToken = mdn.GetAuthToken()
mdn.UpdateHeaders(mdnToken['accessToken'])
# Create the API object for ServiceNow
ticketGenerator = serviceNow.API('northdakota.service-now.com')
# Get all SQL resgistered SQL servers from Cohesity
vms = mdn.GetFilteredRequest("/public/protectionSources/registrationInfo","?environments=kSQL")
# Loop through all servers and check for health status issues
for vm in vms['rootNodes']:
for check in vm['registrationInfo']['registeredAppsInfo'][0]['hostSettingsCheckResults']:
if check['resultType'] == "kFail":
errorCount += 1
# If/Else statement for Debug only, added iteration limit to prevent debug of multiple servers at once
if (debug == 1) and (iteration < failureLimit):
print("Hostname: " + vm['rootNode']['name'])
print("NodeID: " + str(vm['rootNode']['id']))
print(check['resultType'] + ": " + check['userMessage'])
iteration += 1
#Try to refresh the source to see if the error goes away
print("Attempting to refresh " + vm['rootNode']['name'])
mdn.RefreshSource(vm['rootNode']['id'])
time.sleep(15)
#Pull the source registration information again and output
refreshedData = mdn.GetFilteredRequest("/public/protectionSources/registrationInfo","?ids=" + str(vm['rootNode']['id']))
print("Refreshed Data")
#print(json.dumps(refreshedData,indent=4))
for refreshedVM in refreshedData['rootNodes']:
persistantError = 0
for refreshedCheck in refreshedVM['registrationInfo']['registeredAppsInfo'][0]['hostSettingsCheckResults']:
if (refreshedCheck['resultType']=="kFail"):
print("Hostname: " + refreshedVM['rootNode']['name'])
print("NodeID: " + str(refreshedVM['rootNode']['id']))
print(refreshedCheck['resultType'] + ": " + refreshedCheck['userMessage'])
persistantError += 1
# Open a service now ticket for any server that has a health issue
shortDesc = 'Cohesity: SQL Registration Error for ' + vm['rootNode']['name']
description = 'Please assign this ticket to the SQL Admins to invesigate the following issue(s)\n' + check['userMessage']
print("Opening Ticket")
if persistantError == 0:
print("Refreshing the source " + vm['rootNode']['name'] + " resolved the registration error.")
# Already in a 'failure' if statement, else is only for non-debug operation
elif (debug == 0):
#Try to refresh the source to see if the error goes away
mdn.RefreshSource(vm['rootNode']['id'])
time.sleep(15)
#Pull the source registration information again and output
refreshedData = mdn.GetFilteredRequest("/public/protectionSources/registrationInfo","?ids=" + str(vm['rootNode']['id']))
for refreshedVM in refreshedData['rootNodes']:
persistantError = 0
for refreshedCheck in refreshedVM['registrationInfo']['registeredAppsInfo'][0]['hostSettingsCheckResults']:
if (refreshedCheck['resultType']=="kFail"):
# Open a service now ticket for any server that has a health issue
shortDesc = 'Cohesity: SQL Registration Error for ' + vm['rootNode']['name']
description = 'Please assign this ticket to the SQL Admins to invesigate the following issue(s)\n' + check['userMessage']
snResponse=ticketGenerator.submitTicket(shortDesc, description)
itdTeamsMessage.text("SQL Registration error found. A ServiceNow ticket has been opened to investigate " + vm['rootNode']['name'])
itdTeamsMessage.send()
# Ticket has been opened, break the loop to prevent opening multiple tickets for the same host.
break
#End refreshed error/message checks loop
#End refreshed VM data loop
#End debug if/elif block
# End if failure message found
# End for loop of all error/warning message checks
# End for loop of all VMs
itdTeamsMessage.text("Finished checking Cohesity SQL sources for registration errors, found: " + str(errorCount))
itdTeamsMessage.send()
except OSError as cohesityError:
print('Cohesity Error: ' + cohesityError)
+30
View File
@@ -0,0 +1,30 @@
#!/usr/bin/python
import sys,argparse,json,time,yaml
import re
def GetArgs():
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument('--server', '-s', type=str, action='store')
parser.add_argument('--vCenter', '-v', type=str, action='store')
return (parser.parse_args())
args = GetArgs()
with open("./exemptions.yml", "r") as exemptionJobData:
try:
exemptionJobData = yaml.load(exemptionJobData, Loader=yaml.FullLoader)
except:
print("Unable to load exemptions file.")
exemptServers = []
for i in exemptionJobData:
for e in exemptionJobData[i]['vms']:
exemptServers.append([exemptionJobData[i]['id'],e.lower()])
for entry in exemptServers:
parent = entry[0]
pattern = entry[1]
if (parent == int(args.vCenter) and re.search(pattern, args.server)):
print("Found match with pattern: " + pattern)
@@ -0,0 +1,62 @@
import sys
# Import NDIT storage classes
sys.path.insert(0, './classes/')
import cohesityAPI as cohesity
import serviceNowAPI as serviceNow
def main():
# Instantiate the ServiceNow API
svcNowInterface = serviceNow.API('northdakota.service-now.com')
# Instantiate Cohesity APIs
mdnCluster = cohesity.API('itdmdndpc01.nd.gov')
clusters = [mdnCluster]
replicationFailures = []
sqlFailures = []
backupFailures = []
undetermined = []
for cluster in clusters:
cluster.Authenticate()
alertID = 0
openAlerts = cluster.GetFilteredRequest("/public/alerts","?alertStateList=kOpen&maxAlerts=1000")
if len(openAlerts) > 1:
for alert in openAlerts:
if (alert['severity'] != "kInfo"):
# Open a unique ticket for every alert
if (alert['alertCategory'] == "kRemoteReplication"):
replicationFailures.append(alert)
continue
if (alert['alertCategory'] == "kBackupRestore"):
for item in alert['propertyList']:
if (item['value'] == "kSQL"):
sqlFailures.append(alert)
continue
shortDescription = "Backup Failure on " + str(cluster.GetClusterName()) + ": " + alert['alertDocument']['alertName']
longDescription = alert['alertDocument']['alertCause']
print("Opening incident for Cohesity alert:" + str(alertID))
print("\tShort Description: " + shortDescription)
print("\tLong Description: " + longDescription)
alertID += 1
#End if alert['severity'] != kInfo
#End for alert in alerts
# End if len(openAlerts) > 1
#End for cluster in clusters
print("Total replication failures: " + str(len(replicationFailures)))
print("Total SQL Backup failures: " + str(len(sqlFailures)))
#End main()
# Run the program
main()
@@ -0,0 +1,372 @@
import sys,argparse,json,time
sys.path.insert(0, './classes/')
import cohesityAPI as cohesity
# Global variables that will be used across all functions
global tagid
tagid = None
global tagName
tagName = None
# Begin Functions
def GetArgs():
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument('--cluster', '-c', type=str, action='store')
parser.add_argument('--vcenter', '-v', type=str, action='store')
parser.add_argument('--tag', '-t', type=str, action='store')
parser.add_argument('--group', '-g', type=str, action='store')
parser.add_argument('--action', '-a', type=str, action='store')
parser.add_argument('--list', '-l', type=str, action='store')
return (parser.parse_args())
def GetTagID(sources):
globalList = globals()
for source in sources:
if globalList['tagid'] is None:
GetNode(sources)
def GetNode(record):
globalList = globals()
# A non 'node' element was found, check each child key to see if one contains 'kTag'
if "protectionSource" in record:
if "vmWareProtectionSource" in record['protectionSource']:
if record['protectionSource']['vmWareProtectionSource']['type'] == "kTag":
# Tag elements have been found, look for the correct tag and assign the id to the global variable
if record['protectionSource']['vmWareProtectionSource']['name'] == globalList['tagName']:
# Tag was found based on the name
globalList['tagid'] = record['protectionSource']['id']
# Recurse through all 'nodes' elements looking for kTag
if "nodes" in record:
for node in record['nodes']:
# The tag hasn't been found, search the next record
if globalList['tagid'] is None:
GetNode(node)
# The tag was found and the global id value was set, stop iterating.
else:
break
def UpdateProtectionGroupExcludes(group, action):
globalList = globals()
isExcluded = False
tagArray = []
if "excludeVmTagIds" in group:
for index in group['excludeVmTagIds']:
for tag in index:
tagArray.append(tag)
if tag == globalList['tagid']:
isExcluded = True
# Add the tag to the exclusion
if action == 'add':
if isExcluded == True:
print(globalList['tagName'] + " tag is already excluded for " + group['name'])
else:
print("Adding exclusion tag '" + globalList['tagName'] + "' to group " + group['name'])
tagArray.append(globalList['tagid'])
# Remove the tag from the exclusion
elif action == 'remove':
if isExcluded == True:
print("Removing exclusion tag '" + globalList['tagName'] + "' from group " + group['name'])
tagArray.remove(globalList['tagid'])
else:
print(globalList['tagName'] + " tag is not excluded for " + group['name'])
return(tagArray)
def UpdateProtectionGroupIncludes(group, action):
globalList = globals()
isIncluded = False
tagArray = []
if "vmTagIds" in group:
for index in group['vmTagIds']:
for tag in index:
tagArray.append(tag)
if tag == globalList['tagid']:
isIncluded = True
# Add the tag to the inclusion
if action == 'add':
if isIncluded == True:
print(globalList['tagName'] + " tag is already included for " + group['name'])
else:
print("Adding inclusion tag '" + globalList['tagName'] + "' to group " + group['name'])
tagArray.append(globalList['tagid'])
# Remove the tag from the inclusion
elif action == 'remove':
if isIncluded == True:
print("Removing inclusion tag '" + globalList['tagName'] + "' from group " + group['name'])
tagArray.remove(globalList['tagid'])
else:
print(globalList['tagName'] + " tag is not included for " + group['name'])
return(tagArray)
# Begin Main
globalList = globals()
sourceFound = False
vCenter = None
vCenterID = None
tagIds = []
args = GetArgs()
# Validate arguments & assign variables if necessary
if not args.cluster:
sys.exit("Error: Specify a Cohesity cluster fqdn with -c parameter.")
else:
globalList['tagName'] = args.tag
if not args.vcenter:
sys.exit("Error: Specify a vCenter with -v parameter")
if not args.tag:
sys.exit("Error: Specify a tag with the -t parameter.")
if not args.action:
sys.exit("Error: Specify an action [add|remove] with -a parameter.")
elif args.action != 'add' and args.action != 'remove':
sys.exit("Error: Use only 'add' or 'remove' with -a parameter.")
if not args.list:
sys.exit("Error: Specify a list [include|exclude] with -l parameter.")
elif args.list != 'exclude' and args.list != 'include':
sys.exit("Error: Use only 'include' or 'exclude' with -l parameter.")
# Connect to the Cohesity cluster
cluster = cohesity.API(args.cluster)
authToken = cluster.GetAuthToken()
cluster.UpdateHeaders(authToken['accessToken'])
# Locate the correct vCenter and capture the id, this will be useful to validate child objects
vmSources = cluster.GetFilteredRequest("/public/protectionSources", "?environments=kVMware")
for source in vmSources:
if source['protectionSource']['name'] == args.vcenter:
sourceFound = True
vCenter = source
vCenterID = source['protectionSource']['id']
break
# The vCenter we are looking for was not in the list of kVMware environments, stop processing.
if sourceFound == False:
sys.exit("Error: " + args.vcenter + " is not registered to " + args.cluster)
# Look up the tag id based on the tagName and set a global variable when found
# IMPROVEMENT TO GET AWAY FROM GLOBAL VAR
GetTagID(vCenter)
if globalList['tagid'] is None:
sys.exit("Error: The tag '" + args.tag + "' does not exist in " + vCenter['protectionSource']['name'])
if args.group:
# Replace '@' symbol with '%40' for URL encoding in the REST API
protectionGroup = args.group.replace("@","%40")
job = cluster.GetFilteredRequest("/public/protectionJobs", "?names=" + protectionGroup)
print(json.dumps(job[0], indent=4))
if job[0]['parentSourceId'] != vCenterID:
sys.exit(job[0]['name'] + " does not belong to source " + vCenter['protectionSource']['name'])
else:
if (args.list == 'exclude'):
excludeTagIds = UpdateProtectionGroupExcludes(job[0], args.action)
if "excludeVmTagIds" in job[0]:
job[0]['excludeVmTagIds'][0] = excludeTagIds
else:
job[0].update({"excludeVmTagIds": [ excludeTagIds ]})
else:
includeTagIds = UpdateProtectionGroupIncludes(job[0], args.action)
if "vmTagIds" in job[0]:
job[0]['vmTagIds'][0] = includeTagIds
else:
job[0].update({"includeVmTagIds": [ includeTagIds ]})
# FIX the screw up
# Temp code to grab start time from previous runs
run = cluster.GetFilteredRequest("/public/protectionRuns","?numRuns=10&jobId=" + str(job[0]['id']))
try:
hour = round(run[7]['backupRun']['stats']['startTimeUsecs']/1000000,0)
hour = time.strftime("%H", time.localtime(hour))
minute = round(run[7]['backupRun']['stats']['startTimeUsecs']/1000000,0)
minute = time.strftime("%M", time.localtime(minute))
hour = int(hour)
minute = int(minute)
except:
hour = 21
minute = 00
job[0]['startTime']['hour'] = hour
job[0]['startTime']['minute'] = minute
# EndTime
# Temp code to build indexing
job[0].update({"indexingPolicy":{
"disableIndexing": False,
"allowPrefixes": [
"/"
],
"denyPrefixes": [
"/$Recycle.Bin",
"/Windows",
"/ProgramData",
"/System Volume Information",
"/Users/*/AppData",
"/Recovery",
"/usr",
"/sys",
"/proc",
"/lib",
"/grub",
"/grub2",
"/opt/splunk",
"/splunk",
]
}})
# Temp code to set SLA
job[0].update({"incrementalProtectionSlaTimeMins": 480})
job[0].update({"fullProtectionSlaTimeMins": 480})
job[0].update({"abortInBlackoutPeriod": False})
job[0].update({"quiesce": False})
job[0].update({"qosType": "kBackupHDD"})
job[0].update({"environmentParameters":{
"vmwareParameters": {
"fallbackToCrashConsistent": False,
"skipPhysicalRdmDisks": False
}
}})
job[0].update({"cloudParameters":{"failoverToCloud": False}})
job[0].update({"leverageStorageSnapshots": False})
job[0].update({"leverageStorageSnapshotsForHyperFlex": False})
job[0].update({"description": ""})
# End Fix
# Take acion on group(s)
resp = cluster.UpdateVMProtectionJob(job[0])
print(resp.content)
else:
vmJobs = cluster.GetFilteredRequest("/public/protectionJobs", "?environments=kVMware")
uniqueJobs = {job['id'] : job for job in vmJobs}.values()
for job in uniqueJobs:
if "isPaused" in job:
if job['isPaused']:
continue
# If the job is marked for deletion, skip
if 'isDeleted' in job:
continue
# If the job does not belong to this vCetner, skip
if job['parentSourceId'] != vCenterID:
continue
else:
if (args.list == 'exclude'):
excludeTagIds = UpdateProtectionGroupExcludes(job, args.action)
if "excludeVmTagIds" in job:
job['excludeVmTagIds'][0] = excludeTagIds
else:
job.update({"excludeVmTagIds": [ excludeTagIds ]})
else:
includeTagIds = UpdateProtectionGroupIncludes(job, args.action)
if "vmTagIds" in job:
job['vmTagIds'][0] = includeTagIds
else:
job.update({"includeVmTagIds": [ includeTagIds ]})
# FIX the screw up
# Temp code to grab start time from previous runs
run = cluster.GetFilteredRequest("/public/protectionRuns","?numRuns=10&jobId=" + str(job['id']))
try:
hour = round(run[7]['backupRun']['stats']['startTimeUsecs']/1000000,0)
hour = time.strftime("%H", time.localtime(hour))
minute = round(run[7]['backupRun']['stats']['startTimeUsecs']/1000000,0)
minute = time.strftime("%M", time.localtime(minute))
hour = int(hour)
minute = int(minute)
except:
hour = 21
minute = 00
job['startTime']['hour'] = hour
job['startTime']['minute'] = minute
# EndTime
# Temp code to build indexing
job.update({"indexingPolicy":{
"disableIndexing": False,
"allowPrefixes": [
"/"
],
"denyPrefixes": [
"/$Recycle.Bin",
"/Windows",
"/ProgramData",
"/System Volume Information",
"/Users/*/AppData",
"/Recovery",
"/usr",
"/sys",
"/proc",
"/lib",
"/grub",
"/grub2",
"/opt/splunk",
"/splunk",
]
}})
# Temp code to set SLA
job.update({"incrementalProtectionSlaTimeMins": 480})
job.update({"fullProtectionSlaTimeMins": 480})
job.update({"abortInBlackoutPeriod": False})
job.update({"quiesce": False})
job.update({"qosType": "kBackupHDD"})
job.update({"environmentParameters":{
"vmwareParameters": {
"fallbackToCrashConsistent": False,
"skipPhysicalRdmDisks": False
}
}})
job.update({"cloudParameters":{"failoverToCloud": False}})
job.update({"leverageStorageSnapshots": False})
job.update({"leverageStorageSnapshotsForHyperFlex": False})
job.update({"description": ""})
# Take acion on group(s)
print("Updating: " + job['name'])
resp = cluster.UpdateVMProtectionJob(job)
print(resp.content)
+89
View File
@@ -0,0 +1,89 @@
import sys,argparse,json,time
sys.path.insert(0, './classes/')
import cohesityAPI as cohesity
def GetArgs():
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument('--cluster', '-c', type=str, action='store')
parser.add_argument('--vcenter', '-v', type=str, action='store')
parser.add_argument('--job', '-j', type=str, action='store')
parser.add_argument('--help', '-h', action='store_true')
return (parser.parse_args())
def PrintHelp():
print("\nBasic Usage:")
print("Set OS environment variables ITD_SHAREPOINT_PASS and ITD_SHAREPOINT_USER")
print("\nExample:")
print(r"$ export COHESITY_USER=\"NDGOV\jDoe\"")
print("$ export COHESITY_PASS=\"1Lik3Jane\"")
print("\n python3 <SCRIPT_NAME> -c cluster1.domain.tld [ -v vCenter.domain.tld ] [ -j protectionJobName ]")
print("\t -c FQDN of Cohesity cluster address")
print("\t -v FQDN of vCenter Server, used in conjuntion with -t kVMware ")
print("\t -j Cohesity job name")
print("\t -h Prints this help message")
args = GetArgs()
if args.help:
PrintHelp()
if not args.cluster:
sys.exit("Error: Specify Cohesity cluster fqdn with -c parameter.")
else:
cluster = cohesity.API(args.cluster)
authToken = cluster.GetAuthToken()
cluster.UpdateHeaders(authToken['accessToken'])
if args.vcenter:
vmSources = cluster.GetFilteredRequest("/public/protectionSources", "?environments=kVMware")
for source in vmSources:
if source['protectionSource']['name'] == args.vcenter:
vCenter = source
vCenterID = source['protectionSource']['id']
break
if args.job:
# Get the details of a single job
vmJobs = cluster.GetFilteredRequest("/public/protectionJobs", "?names=" + args.job)
else:
# Get the details of all jobs under a vCenter
vmJobs = cluster.GetFilteredRequest("/public/protectionJobs", "?environments=kVMware")
uniqueJobs = {job['id'] : job for job in vmJobs}.values()
# Do stuff with the JSON data
# Example:
if args.pause:
pausedJobs = []
for job in uniqueJobs:
if job['parentSourceId'] != vCenterID:
print("not vcenter")
continue
elif "isDeleted" in job:
continue
elif "isPaused" in job:
if job['isPaused']:
pausedJobs.append(job)
continue
print("Pausing: " + job['name'])
resp = cluster.PauseJob(job['id'])
print("The following jobs were previously paused before this operation.")
for pJob in pausedJobs:
print(pJob['name'])
if args.resume:
for job in uniqueJobs:
if job['parentSourceId'] != vCenterID:
continue
elif "isDeleted" in job:
continue
print("Resuming: " + job['name'])
resp = cluster.ResumeJob(job['id'])
@@ -0,0 +1,21 @@
import sys,argparse,json,time
sys.path.insert(0, './classes/')
import cohesityAPI as cohesity
mdn = cohesity.API('itdmdndpc01.nd.gov')
mdnToken = mdn.GetAuthToken()
mdn.UpdateHeaders(mdnToken['accessToken'])
# Block Update all SQL jobs with new SQL policy
sqlPolicyName = mdn.GetFilteredRequest("/public/protectionPolicies", "?names=ITD-SQL")
sqlProtectionJobs = mdn.GetFilteredRequest("/public/protectionJobs", "?environments=kSQL")
uniqueJobs = {job['id'] : job for job in sqlProtectionJobs}.values()
isPaused=bool('false')
for job in uniqueJobs:
resp = mdn.UpdateProtectionJob(job['sourceIds'],job['parentSourceId'], job['name'], sqlPolicyName[0]['id'], job['viewBoxId'], job['id'], isPaused)
print(resp.content)
#End SQL Policy Update Block