Main commands to check
Cluster status
$CRS_HOME/bin/crs_stat -t // $CRS_HOME/bin/crs_stat
$CRS_HOME/bin/crsctl check crs (check high availability,synchronization, Ready service and EventManager
$CRS_HOME/bin/crsctl check cluster // 11gR2 :crsctl check cluster –all //crsctl check cluster –n node
Check cluster name
$CRS_HOME/bin/cemutlo -n
Check crs version
$CRS_HOME/bin/crsctl query crs softwareversion
$CRS_HOME/bin/crsctl query crs activeversion
No. of nodes configured
$CRS_HOME/bin/olsnodes -n -p -i
Nodeapps services on each node (VIP,GSD,Listener)
srvctl status nodeapps -n node // srvctl config nodeapps -n nodename
ASM running on node
srvctl status asm -n nodename // srvctl config asm -n nodename
Database running on ASM
srvctl status database -d dbname // srvctl config database -d dbname (shows oracle home)
Services running on database
srvctl status service -d dbname // srvctl config service -d dbname
root> crsctl check crs
root> crsctl stat res -t
root> ocrcheck
root> ocrcheck -config
root> crsctl query css votedisk
crsctl stat res -t -w "((TARGET != ONLINE) or (STATE != ONLINE) and TYPE != ora.gsd))"
crsctl status res |grep -v "^$"|awk -F "=" 'BEGIN {print " "} {printf("%s",NR%4 ? $2"|" : $2"\n")}'|sed -e 's/ *, /,/g' -e 's/, /,/g'|\
awk -F "|" 'BEGIN { printf "%-40s%-35s%-20s%-50s\n","Resource Name","Resource Type","Target ","State" }{ split ($3,trg,",") split ($4,st,",")}{for (i in trg) {printf "%-40s%-35s%-20s%-50s\n",$1,$2,trg[i],st[i]}}'
Clusterware not starting
Check in in /etc/inittab, following lines are not commented
h1:3:respawn:/sbin/init.d/init.evmd run >/dev/null 2>&1 </dev/null
h2:3:respawn:/sbin/init.d/init.cssd fatal >/dev/null 2>&1 </dev/null
h3:3:respawn:/sbin/init.d/init.crsd run >/dev/null 2>&1 </dev/null
Run following commands
/etc/init.d/init.crs enable
/etc/init.d/init.crs start
$ORA_CRS_HOME/bin/crsctl start crs
Check following log files
$ORA_CRS_HOME/crs/log
$ORA_CRS_HOME/evm/log
$ORA_CRS_HOME/css/log
Troubleshooting Clusterware (comp : nodeapp , nodereach , nodecon , admprv , clu , ocr )
1. Connectivity from one particular node or all
cluvfy comp nodereach -n node_list [ -srcnode node ] [-verbose]
cluvfy comp nodecon -n all [-verbose]
2. Connectivity from one specific network interfaces
cluvfy comp nodecon -n node_list [ -i interface_list ] [-verbose]
cluvfy comp nodecon -n node1,node2,node3 –i eth0 -verbose
3. Verify user equivalence for all of the nodes
cluvfy comp admprv -n all -o user_equiv -verbose
4. Verify node apps i.e VIP, ONS and GSD, on all of the nodes
cluvfy comp nodeapp [ -n node_list ] [-verbose]
5. Installation Verifications
cluvfy stage -pre crsinst -n node_list [ -c ocr_location ] [ -q voting_disk ] [ -osdba osdba_group ] [ -orainv orainventory_group ] [-verbose]
cluvfy stage -pre dbinst -n node_list [ -osdba osdba_group ] [ -orainv orainventory_group ] [-verbose]
cluvfy stage -post crsinst -n node_list [-verbose]
6. Cluster Integrity Verifications : cluvfy comp clu
7. OCR Integrity Verifications : cluvfy comp ocr [ -n node_list ] [-verbose]
Data Collection CRS Diagnostics
TFA Collector is installed in the GI HOME and comes with 11.2.0.4 GI and higher
$GI_HOME/tfa/bin/tfactl diagcollect -all
$GI_HOME/tfa/bin/tfactl diagcollect -from "MMM/dd/yyyy hh:mm:ss" -to "MMM/dd/yyyy hh:mm:ss"
Before Clusterware upgrade (10g )
Check in in /etc/inittab, following lines are commented
#h1:3:respawn:/sbin/init.d/init.evmd run >/dev/null 2>&1 </dev/null
#h2:3:respawn:/sbin/init.d/init.cssd fatal >/dev/null 2>&1 </dev/null
#h3:3:respawn:/sbin/init.d/init.crsd run >/dev/null 2>&1 </dev/null
Run following commands
$ /etc/init.d/init.crs stop
$ ps -ef|egrep -i "crs|css"
root> preupdate.sh -crshome /apps/oracle/product/CRS -crsuser oracle
$./runInstaller
Cluster status
$CRS_HOME/bin/crs_stat -t // $CRS_HOME/bin/crs_stat
$CRS_HOME/bin/crsctl check crs (check high availability,synchronization, Ready service and EventManager
$CRS_HOME/bin/crsctl check cluster // 11gR2 :crsctl check cluster –all //crsctl check cluster –n node
Check cluster name
$CRS_HOME/bin/cemutlo -n
Check crs version
$CRS_HOME/bin/crsctl query crs softwareversion
$CRS_HOME/bin/crsctl query crs activeversion
No. of nodes configured
$CRS_HOME/bin/olsnodes -n -p -i
Nodeapps services on each node (VIP,GSD,Listener)
srvctl status nodeapps -n node // srvctl config nodeapps -n nodename
ASM running on node
srvctl status asm -n nodename // srvctl config asm -n nodename
Database running on ASM
srvctl status database -d dbname // srvctl config database -d dbname (shows oracle home)
Services running on database
srvctl status service -d dbname // srvctl config service -d dbname
root> crsctl check crs
root> crsctl stat res -t
root> ocrcheck
root> ocrcheck -config
root> crsctl query css votedisk
crsctl stat res -t -w "((TARGET != ONLINE) or (STATE != ONLINE) and TYPE != ora.gsd))"
crsctl status res |grep -v "^$"|awk -F "=" 'BEGIN {print " "} {printf("%s",NR%4 ? $2"|" : $2"\n")}'|sed -e 's/ *, /,/g' -e 's/, /,/g'|\
awk -F "|" 'BEGIN { printf "%-40s%-35s%-20s%-50s\n","Resource Name","Resource Type","Target ","State" }{ split ($3,trg,",") split ($4,st,",")}{for (i in trg) {printf "%-40s%-35s%-20s%-50s\n",$1,$2,trg[i],st[i]}}'
Clusterware not starting
Check in in /etc/inittab, following lines are not commented
h1:3:respawn:/sbin/init.d/init.evmd run >/dev/null 2>&1 </dev/null
h2:3:respawn:/sbin/init.d/init.cssd fatal >/dev/null 2>&1 </dev/null
h3:3:respawn:/sbin/init.d/init.crsd run >/dev/null 2>&1 </dev/null
Run following commands
/etc/init.d/init.crs enable
/etc/init.d/init.crs start
$ORA_CRS_HOME/bin/crsctl start crs
Check following log files
$ORA_CRS_HOME/crs/log
$ORA_CRS_HOME/evm/log
$ORA_CRS_HOME/css/log
Troubleshooting Clusterware (comp : nodeapp , nodereach , nodecon , admprv , clu , ocr )
1. Connectivity from one particular node or all
cluvfy comp nodereach -n node_list [ -srcnode node ] [-verbose]
cluvfy comp nodecon -n all [-verbose]
2. Connectivity from one specific network interfaces
cluvfy comp nodecon -n node_list [ -i interface_list ] [-verbose]
cluvfy comp nodecon -n node1,node2,node3 –i eth0 -verbose
3. Verify user equivalence for all of the nodes
cluvfy comp admprv -n all -o user_equiv -verbose
4. Verify node apps i.e VIP, ONS and GSD, on all of the nodes
cluvfy comp nodeapp [ -n node_list ] [-verbose]
5. Installation Verifications
cluvfy stage -pre crsinst -n node_list [ -c ocr_location ] [ -q voting_disk ] [ -osdba osdba_group ] [ -orainv orainventory_group ] [-verbose]
cluvfy stage -pre dbinst -n node_list [ -osdba osdba_group ] [ -orainv orainventory_group ] [-verbose]
cluvfy stage -post crsinst -n node_list [-verbose]
6. Cluster Integrity Verifications : cluvfy comp clu
7. OCR Integrity Verifications : cluvfy comp ocr [ -n node_list ] [-verbose]
Data Collection CRS Diagnostics
TFA Collector is installed in the GI HOME and comes with 11.2.0.4 GI and higher
$GI_HOME/tfa/bin/tfactl diagcollect -all
$GI_HOME/tfa/bin/tfactl diagcollect -from "MMM/dd/yyyy hh:mm:ss" -to "MMM/dd/yyyy hh:mm:ss"
NOte : TFA should be used wherever possible instead of diagcollection.sh
Pre Grid Infrastructure i.e 10g R
$OCH/bin/diagcollection.pl -crshome=$OCH --collect
Or
Where Grid Infrastructure is installed
$GRID_HOME/bin/diagcollection.sh
$GRID_HOME/bin/diagcollection.sh --adr /tmp/collect -beforetime 20120218100000 -aftertime 20120218050000
$GRID_HOME/bin/diagcollection.sh --chmos --incidenttime 02/18/201205:00:00 --incidentduration 05:00 (From 11.2.0.2 onwards )
Cluster health
$ script /tmp/cluvfy.log
$ $GRID_HOME/bin/cluvfy stage -pre crsinst -n <node1, node2...> -verbose
$ $GRID_HOME/bin/cluvfy stage -post crsinst -n all -verbose
$ exit
Before Clusterware upgrade (10g )
Check in in /etc/inittab, following lines are commented
#h1:3:respawn:/sbin/init.d/init.evmd run >/dev/null 2>&1 </dev/null
#h2:3:respawn:/sbin/init.d/init.cssd fatal >/dev/null 2>&1 </dev/null
#h3:3:respawn:/sbin/init.d/init.crsd run >/dev/null 2>&1 </dev/null
Run following commands
$ /etc/init.d/init.crs stop
$ ps -ef|egrep -i "crs|css"
root> preupdate.sh -crshome /apps/oracle/product/CRS -crsuser oracle
$./runInstaller