Use Cognos 8.3 + Oracle 11g for business reporting

Part I: Configuring Cognos to use OpenLDAP Authentication

Environment: Cognos 8.3 on Windows 2003 server
Objective: use external OpenLDAP server on Linux for Cognos web authentication.


1    OpenLDAP Installation
Download the latest stable release of OpenLDAP stable release.
make test
make install

2. configuring /usr/local/etc/openldap/slapd.conf

[root@ldap1 scripts]# cat ../slapd.conf| grep -v ^#  | grep -v ^$
include         /usr/local/etc/openldap/schema/core.schema
pidfile         /usr/local/var/run/
argsfile        /usr/local/var/run/slapd.args
include /usr/local/etc/openldap/schema/corba.schema
include /usr/local/etc/openldap/schema/cosine.schema
include /usr/local/etc/openldap/schema/inetorgperson.schema
include /usr/local/etc/openldap/schema/java.schema
include /usr/local/etc/openldap/schema/misc.schema
include /usr/local/etc/openldap/schema/nis.schema
include /usr/local/etc/openldap/schema/openldap.schema

# assume your company name is 'dev'. Firstly create database for your own company
database        bdb
suffix          "dc=dev,dc=com"
rootdn          "cn=root,dc=dev,dc=com"
rootpw          secret
directory       /usr/local/var/openldap-data-dev
index   objectClass     eq

# access control part
access to attr=userPassword
        by self write
        by anonymous auth
        by dn="cn=Manager,dc=dev,dc=com" write
        by * none
access to *
        by dn="cn=Manager,dc=dev,dc=com" write
        by users read
database        bdb
suffix          "dc=client1,dc=com"
rootdn          "cn=Manager,dc=client1,dc=com"
rootpw          {SSHA}gFuMY3m3Cb0P4px3TNuf4o7sG30jHcwgi3urEA==
directory       /usr/local/var/openldap-data-client1
index   objectClass     eq

# access control part
access to attr=userPassword
        by self write
        by anonymous auth
        by dn="cn=Manager,dc=client1,dc=com" write
        by * none
access to *
        by dn="cn=Manager,dc=client1,dc=com" write
        by users read

a. use slappasswd to generate rootpw, the default is to use SSHA encryption.
b. create directory /usr/local/var/openldap-data-dev and /usr/local/var/openldap-data-client1 fist before restarting slapd.
c. for configuring slapd.conf, please refer to

3. source file to import

[root@ldap1 scripts]# more batchuser.ldif
# entry-id: 1
dn: dc=dev,dc=com
dc: dev
objectClass: top
objectClass: domain

# entry-id: 2
dn: ou=Special Users,dc=dev,dc=com
objectClass: top
objectClass: organizationalUnit
ou: Special Users
description: Special Administrative Accounts

# entry-id: 3
dn: ou=People,dc=dev,dc=com
objectClass: top
objectClass: organizationalunit
ou: People

# entry-id: 4
dn: ou=Groups,dc=dev,dc=com
objectClass: top
objectClass: organizationalunit
ou: Groups

dn: cn=admin,ou=Groups,dc=dev,dc=com
cn: admin
objectClass: top
objectClass: groupofuniquenames
ou: Groups
uniqueMember: uid=jephe,ou=People,dc=dev,dc=com
uniqueMember: uid=zhitan,ou=People,dc=dev,dc=com

# entry-id: 5
dn: uid=zhitan,ou=People,dc=dev,dc=com
uid: zhitan
objectClass: inetorgperson
givenName: Zhitan
sn: Wu 
cn: Zhitan Wu
userPassword: {SSHA}h7HBuirlNhYJl1TwVEtKqJlJVCb53cqm

# entry-id: 6
dn: uid=jephe,ou=People,dc=dev,dc=com
uid: jephe
objectClass: inetorgperson
givenName: Wu
sn: Jephe
cn: Jephe Wu
userPassword: {SSHA}EWo+m4UrXE0yjgefxa4yJ54hz845B3xz

4. commands used

4.1 ldap search everything
ldapsearch -x -b 'dc=dev,dc=com' '(objectclass=*)'

4.2  ldap add user

ldapadd -x -D 'cn=Manager,dc=dev,dc=com' -W -f jephe.ldif
Enter LDAP Password:
adding new entry "uid=jephe,ou=People,dc=dev,dc=com"

root@app1 scripts]# more jephe.ldif
# entry-id: 7
dn: uid=jephe,ou=People,dc=dev,dc=com
uid: jephe
givenName: Wu
objectClass: inetorgperson
sn: Jephe
cn: Jephe Wu
userPassword: {SSHA}hfJXE/3c8zK42rD6FL7mZB6SxG1DA2o+

4.3  ldap delete user
ldapdelete -x -D 'cn=Manager,dc=dev,dc=com' -W uid=jephe,ou=people,dc=dev,dc=com
Enter LDAP Password:

4.4 ldap change password for user
a. ldappasswd -D "cn=manager,dc=dev,dc=com" -x -W -s abcd1235 "uid=jephe,ou=people,dc=dev,dc=com"

b. ldappasswd -D "cn=manager,dc=dev,dc=com" -x -W  -S "uid=jephe,ou=people,dc=dev,dc=com"
New password: (new password for user)
Re-enter new password: (new password for user again)
Enter LDAP Password: manager password
Result: Success (0)

4.5  ldap add group
ldapadd -x -D 'cn=Manager,dc=dev,dc=com' -W -f group1.ldif
Enter LDAP Password:
adding new entry "cn=testgroup,ou=Groups,dc=dev,dc=com"

[root@app1 scripts]# more group1.ldif
dn: cn=testgroup,ou=Groups,dc=dev,dc=com
cn: testgroup
objectClass: top
objectClass: groupofuniquenames
ou: Groups
uniqueMember: uid=jephe,ou=People,dc=dev,dc=com
uniqueMember: uid=zhitan,ou=People,dc=dev,dc=com

4.6 ldap delete group
ldapdelete -x -D 'cn=Manager,dc=dev,dc=com' -W cn=testgroup,ou=Groups,dc=dev,dc=com
Enter LDAP Password:

4.7 ldap modify/delete groupmember

ldapmodify -x -D 'cn=Manager,dc=dev,dc=com' -W -f b
Enter LDAP Password:
modifying entry "cn=admin,ou=Groups,dc=dev,dc=com"

[root@app1 scripts]# more b
dn: cn=admin,ou=Groups,dc=dev,dc=com
changetype: modify
delete: uniquemember
uniquemember: uid=jephe,ou=people,dc=dev,dc=com

4.8 ldap modify/add member. ldapmodify -x -D 'cn=Manager,dc=dev,dc=com' -W -f b
Enter LDAP Password:
modifying entry "cn=admin,ou=Groups,dc=dev,dc=com"

[root@app1 scripts]# more b
dn: cn=admin,ou=Groups,dc=dev,dc=com
changetype: modify
add: uniquemember
uniquemember: uid=jephe,ou=people,dc=dev,dc=com

4.9. how to add user through phpldapadmin
4.9.1 manual way:
a. custom
b. uid=user1
c. ou=people,dc=dev,dc=com
d. inetorgperson
after done, add new attribute- userpasswd , then it will automatically encrypt it using md5 algorithm.
type in password, use to generate

4.9.2 use tools from phpldapadmin
After adding existing users, you can copy the existing user setup to another user, even in different client.

4.10 how to reset password for user
use mkpasswd on linux to generate random password (4 digitals and 4 characters, mkpasswd -l 8 -s 0 )

Part II: install Cognos and configure web server and database

5. configuring Apache on Windows

We use apache server 2.2.2 for Windows.
In httpd.conf configuration, put the following:

options Indexes followsymlinks
allowoverride none
order allow,deny

allowoverride none
options none
order allow,deny
allow from all

in alias module, put
scriptalias /cgi-bin "C:/program files/apache software fouondation/apache 2.2/cgi-bin/"
scriptalias /cognos8/cgi-bin "C:/program files/cognos/c8/cgi-bin"
alias /cognos8 "C:/program files/cognos/c8/webcontent"

You can install cognos web gateway on Linux server also.

6. Configuring browser to use http://ipaddress/cognos8/ (in IE security, make http://ipaddress in the trusted website)

7. setup database for cognos
7.1 MS SQL database
a. go to enterprise manager,go to database, right click, create new one called 'cm'
b. go to security, logins, 'new login' put 'cmuser' as name, password is also 'cmuser', database is 'cm',
database access part, put 'cm' as public and 'db_owner'.

7.2 Oracle database
7.2.1 You have to create a separated instance only for cognos use, it requires AL32UTF8 character set.
can use dbca to create a new instance id 'cognos' with character set AL32UTF8.
After that, create a tablespace 'contentstore' and user 'contentstore' and give user contentstore as dba privileges.
Create tablespace contentstore datafile '/u01/app/oracle/oradata/cognos/contentstore.dbf' size 20m autoextend on next 5m flashback off;
Create user contentstore identified by contentstore default tablespace contentstore temporary tablespace temp;
Grant dba to contentstore;

7.2.2 Configuring Net manager
From start - All programs - Oracle oraClient11g_home1-configuration and Migration tools- net manager
Oracle Net Configuration-local -Servide Naming, add a new one called 'cognos' which has the following information:
Service name: cognos
Connection type: Database default
Protocol: TCP/IP
Hostname: (replace with your oracle server IP)
Port number: 1521

7.2.3 create tablespace and users:
create tablespace cognos datafile '/u01/app/oracle/oradata/cognos.dbf' size 20m autoextend on next 10m flashback off;
create user cognos identified by password default tablespace cognos temporary tablespace temp;
grant connect,resource,create view to cognos;
revoke unlimited tablespace from cognos;
alter user cognos quota unlimited on cognos;

7.2.4 Important - Changing 'cursor sharing' mode in Oracle 11g database used for Cognos reporting
When using Cognos 8 framework manager to fetch tables from Oracle 11g database, by default, it will give you the following errors. It is okay for views.
'RQP-DEF-0177' an error occurred while performing operation sqlbulkfetch status =-9
UDA-SQL-0177 general exception has occurred during the operation fetch.

Solution: In Oracle, change the cursor sharing mode to 'Exact'
By default, Oracle 11g use 'SIMILAR' mode for cursor sharing, you need to change it to 'EXACT' mode.

Alter system set cursor_sharing='EXACT' scope = both;

To verify what's the value for cursor sharing, use:

select name, value from gv$parameter where name like '%cursor%'

8    Cognos Installation
8.1    Install Cognos for Windows and Framework manager
Install everything including web content for use for apache later

8.2    Configuring 'Cognos configruaton'
8.2.1    Use Oracle as content store
You have to delete the default Microsoft sql server content store first before you can create Oracle content store
Download the oracle 10g latest JDBC driver class12.jar then put it under c:\Program files\cognos\c8\webapps\p2pd\WEB-INF\lib

Note: Oracle 11g JDBC driver doesn't work with Cognos 8
Now you can configure Data Access - Content Manager- contentstore part as follows:
Database server and port number: Serverip:1521
Userid and password: contentstore/contentstore/contentstore
Service name which is oracle instance sid : cognos
save and start cognos service

8.3    Configuring data source in Cognos
Use browser to access http://localhost/cognos8/, then go to 'cognos connection', go to 'tools' - 'directory' , to create a new data source to use Oracle database 11g.

8.4    Open framework manager to create a new project and data source.
For sql net connection string, use 'cognos' which is defined earlier in tnsname.ora
Remember: you must change Oracle 11g cursor sharing mode to 'EXACT' so that Framework manager can receive table information from database, otherwise, it will give errors. For views, it is okay

9. cognos configuration for LDAP
Authentication: dev
Namespace ID: DEV
Host and port:
base distinguished Name: dc=dev,dc=com
user lookup: uid=${userID},ou=people,dc=dev,dc=com
Bind user DN and password: cn=Manager,dc=dev,dc=com
Password is the Manager password
Size Limit: 0
Time out in Seconds: 0

note: you also need to disable anonymous access from default 'Cognos' namespace.

10. restart cognos configuration and test it ( http://server/cognos8/)

Part III Cognos user permission and how it works

11. system administrators in cognos namespace itself
You can put your admin group 'admin' in 'dev' openldap database (pointing to dev openldap database for authentication) in the cognos directory 'system administrators'.
After adding to system administrator, you can view all directories under 'public folder' in cognos for reports.

steps below:

1. use IE to login https://url/cognos/
2. go to 'launch' menu, cognos administration
3. go to 'security'-'cognos', the second page, the last second one which is 'system administrators'
4. go to properities, members, add 'admin' group in 'dev' namespace
5. you only need to add 'admin' group to system administrator, no need to add to other cognos directories.

12. how to create user and assign permission for reports
If a user needs to view multiple openldap database/ cognos namespace reports, you should add this user in dev openldap database
instead of individual client namespace.

Then you can assign this user to multiple namespace to view their reports.
steps: login as dev admin group user, go to 'home' and clien properity for each client folder, permission, add the user.

Part IV Cognos reporting
13. create data source connection, use a user which can read all client schemas data ( select any table) or use system user for creating data source connection which can read all schema data
14. modify datasource schema before creating and publishing a new package for another client.
Before publishing a package, you need to change 'schema' part in data source name properities then save so that that published package is only for that schema only. Don't enable data source view when publishing package, choose only 'english'.

How to grant Oracle user/schema privileges properly

Environment: Oracle 11g 64bit
Objective: create and grant proper privileges to a user/schema


create tablespace jephe datafile ‘/u01/app/oracle/oradata/jephe.dbf’ size 100m autoextend on next 10m [flashback off]
create user jephe identified by password default tablespace jephe temporary tablespace temp;
grant connect,resource,create view to jephe
revoke unlimited tablespace from jephe
alter user jephe quota unlimited on jephe;

a. If you use impdp to import schema from expdp dump, the unlimited tablespace system privileges will remain, you have to manually revoke that system privilege.

Revoke unlimited tablespace from jephe

This will revoke systemwide unlimited tablespace privileges, otherwise, the user can write to any tablespace

UNLIMITED TABLESPACE system privilege: Overrides all individual tablespace quotas and
gives the user unlimited quota on all tablespaces, including SYSTEM and SYSAUX. This
privilege must be granted with caution.
Note: Be aware that granting the RESOURCE role includes granting this privilege.

b. CREATE PROCEDURE privilege is for creating procedures, functions, packages

c. you can check your privs after login:

select * from session_privs;
select * from session_roles;

d. check role sys,tab and role privileges
sqlplus / as sysdba
select distinct object_name from dba_objects where object_name like 'ROLE_%' order by object_name;


SQL> select * from role_sys_privs where role like 'CONNECT';

------------------------------ ---------------------------------------- ---

SQL> select * from role_sys_privs where role like '%RESOURCE%';

------------------------------ ---------------------------------------- ---

8 rows selected.

SQL> select * from role_tab_privs where role like '%RESOURCE%';

no rows selected

SQL> select * from role_role_privs where role like '%RESOURCE%';

no rows selected

How to copy the whole schema from one server to another in Oracle 11g

Objective: clone one schema from one Oracle 11g database to another 11g database server, schema name might change.
Environment: RHEL5, From Oracle 11g 64bit to Oracle 11g 32bit.


1.  create server side directory first before exporting schema
. oraenv
sqlplus / as sysdba
create or replace directory dmpdp as '/u01/dmpdp';
grant read,write on directory dmpdp to system;

note: you can use 'select * from dba_directories' to check after creation.

2.  use expdp to dump schema
expdp system directory=dmpdp dumpfile=jephe.20091210.dmpdp logfile=jephe.20091210.logdp schemas=jephe

note: if you are exporting schema for lower version of Oracle database, such as 10g, you might need to use exp instead of expdp:
exp system  file=jephe.20091210.dmp log=jephe.20091210.log statistics=none owner=jephe

3. check the existing schema on destination database server
. oraenv
sqlplus / as sysdba
select name from v$datafile;
select distinct s.owner,s.tablespace_name,d.file_name from dba_segments s,dba_data_files d where s.tablespace_name = d.tablespace_name;

note: above statement lists out tablespace names for relevant schema.
drop tablespace jephe including contents and datafiles
drop user jephe cascade;

note: if above drop user command got ORA-01940 error: cannot drop a user that is currently connected. Then you need to kill all the existing user connections first.
You can use the following scripts to do it:

# more killusersession.sql
set head off feedback off pagesize 0 echo off term off linesize 32767 trimsp on tab off define off;
spool /u01/scripts/killusersessionfinal.sql;
select 'alter system kill session '''||sid||','||serial#||''' immediate;' from v$session where username='JEPHE';
select 'exit;' from dual;
spool off;

cd /u01/scripts/
sqlplus / as sysdba @killusersession.sql
sqlplus / as sysdba @killusersessionfinal.sql

4. create tablespace on destination database server and import
sqlplus / as sysdba
select name from v$datafile;
create tablespace jephe datafile '/path/to/jephe.dbf'  size 100m autoextend on next 10m flashback off;

if you are going to use imp instead of impdp to import schema later, you will have to do this:

create tablespace jephe datafile ‘/u01/app/oracle/oradata/uatdb/jephe.dbf’ size 100m autoextend on next 10m flashback off
create user jephe identified by password default tablespace jephe temporary tablespace temp;
grant connect,resource,create view, select any table to jephe
revoke unlimited tablespace from jephe
alter user jephe quota unlimited on jephe;

5. transfer dumpfile to /home/oracle on destination server and define the /home/oracle as 'oracle'
sqlplus / as sysdba
create or replace directory oracle as '/home/oracle';
grant read,write on directory dmpdp to system;

# impdp system directory=oracle dumpfile=jephe.20091210.dmpdp logfile=jephe.20091210.logimpdp schemas=jephe

if you use imp, do this:
imp system  file=/home/oracle/jephe.dmp log=/home/oracle/jephe.implog fromuser=jephe touser=jephe;

1. if you need to import to another schema, you can do this:
firstly create tablespace zhitan, then run
# impdp system directory=oracle dumpfile=jephe.20091210.dmpdp logfile=jephe.20091210.logimpdp remap_schema=jephe:zhitan remap_tablespace=jephe:zhitan transform=oid:n
# imp system  file=/home/oracle/jephe.dmp log=/home/oracle/jephe.implog fromuser=jephe touser=zhitan;

note: if using imp to import from one user to another use, you need to 

Make sure the new user’s default tablespace is the new tablespace:
alter user zhitan default tablespace zhitan;

Revoke the unlimited tablespace and change the new user’s quota to have space only on the new tablespace:
revoke unlimited tablespace from zhitan;
alter user zhitan quota unlimited on zhitan quota 0 on jephe;

6. checking after import
see if the destination tablespace got data after import
sqlplus / as sysdba
SQL> column owner format a20
SQL> select owner,count(*) from dba_segments where tablespace_name='TEST_DATA' group by owner;

see if user jephe is using his own tablespace:
select username,default_tablespace from dba_users where username='JEPHE';

7. compile the schema
sqlplus / as sysdba
sql> exec dbms_utility.compile_schema('JEPHE');

8. check invalid objects and the number of invalid objects for schema JEPHE
# more numberofinvalidobjects.sql
select count(*) JEPHE, object_type from dba_objects where owner='JEPHE' and  status <> 'VALID' and object_name not like 'FB_%' group by object_type;

# more invalidobjects.sql
column object_type format A10;
column object_name format A30;
select object_type "Invalid JEPHE",object_name from dba_objects where owner='JEPHE' and  status <> 'VALID' and object_name not like 'FB_%' order by object_type,object_name;

Cloning an existing virtualbox VM for remote desktop connection

Objective: cloning an existing virtualbox VM for remote desktop connection
Environment: Linux host server running Fedora 9 and Virtualbox 2.2.2r46594


1. shutdown existing VM vm1
# VBoxManage controlvm vm1 acpipowerbutton

2. clone vm1 to vm2
# cd /root/.VirtualBox/HardDisks
# VBoxManage clonehd vm1.vdi vm2.vdi

3. add one more VM and use the existing filechange memory setting for vm2
use virtualbox command on Linux host to add one more VM, use the existing vdi file vm2.vdi. Change settings such as network card (NAT or Bridge), Memory usage and different vrdp port etc

4. enable mutli connection  for vrdp
# VBoxManage modifyvm vm2 --vrdpmulticon on

5. enable pam for external vrdp authentication
for Fedora 9, you need to do this:

# ln -sf /lib/ /lib/

then external authentication can use PAM to authenticate normal user accounts on Fedora 9 VirtualBox host server.

6. create username and password on Linux host for vm2 vrdp authentication
# useradd vm2user
# passwd vm2user

7. startup vm2
# VBoxManage startvm vm2 --type vrdp

8. use remote desktop connection on Windows client pc to connect to vm2
configure mstsc to save username and password for vm2 connection.

9. compress vdi file size

use dd to create empty zero file on Linux/Windows host
dd if=/dev/zero of=temp1 bs=1M
rm -f temp1 or del temp1(Windows)
VBoxManage modifyhd vdifilename --compact

Run db2 script at the specified time automatically and send log file out

Objective: run db2 script at the specified time automatically and send log file out.
Environment: RHEL5 with db2 V9.0


  1. write script as below
#more /db2/db2inst1/scripts/20091210/

export PATH=/db2/db2inst1/sqllib/bin:/usr/bin:/bin
. /db2/db2inst1/sqllib/db2profile
cd /db2/db2inst1/scripts/20091210


db2 connect to db1
db2 set schema = schema1
db2 -tvf $FILE -l ${FILE}.log
db2 terminate

sleep 5
scp ${FILE}.log backup@mon1:/tmp

sleep 5

#copy log file to email server and run email command there
ssh -t backup@mon1 "email -b -s "script result" <  /tmp/${FILE}.log"

2. make it run as cronjob
#chmod +x  /db2/db2inst1/scripts/20091210/

then crontab -e to add the following  to run it at 9am 5 Jan

0 9 5 1 * /db2/db2inst1/scripts/20091210/ > /dev/null 2>&1

1. make sure all your command used in the script are in /usr/bin or /bin, otherwise, put the necessary paths inside the script export line.
2. put open.sql under /db2/db2inst1/scripts/20091210/
3. some tips - open and close code
update something set code=concat(code,'_DISABLE') where something;
update something set code=replace(code,'_DISABLE','') where something;

How to make Nagios configuration easier

Environment: Redhat Enterprise Linux 5 or CentOS 5
Objective: Some tips to make Nagios configuration easier


1. setting up monitor host (localhost) first

2. Follow Nagios website PDF document for NRPE to configure monitored host
Install Nagios plugin and NRPE, add monitor host ip into /etc/xinetd.d/nrpe as follows:
       only_from       =
use 'chkconfig xinetd on' command to make xinetd auto restart then 'service xinetd restart'

3. Public services monitoring
From locahost which is installed Nagios core monitor host, you can add some public services for monitoring:

it's better you can use the different configuration file name for each monitored host and public service, because you can later use vi to global replace the servername and ip address easily.

in /usr/local/nagios/etc/nagios.cfg ,you can add the following to cfg_file section:

For publicservices.cfg ,here is an example:
define service{
       use generic-service
        max_check_attempts 2
     host_name   localhost 
      service description   tomcat on app server 1
     check_command  check_tomcat_app1

define command{
     command_name   check_tomcat_app1
    commane_line  $USER1/check_http  -I ip address -u /url/jsp/index.jsp  -p 8443 -S -s "Having trouble"

define service{
       use generic-service
     host_name   localhost 
      service description   real user login test

     check_command  check_www

define command{
     command_name   check_www
    commane_line  $USER1$ ipaddress

note: script must have return code, 0 means OK, 1 means warning, 2 means critical.

4. individual server configuration
for server1.cfg:

define host{
   use  generic-host
  max_check_attempts 2


define service {

   use general-service
   service_description  Current Load
   check_command    check_nrpe!check_load

define service {
use general-service
   service_description  Current Load
   check_command    check_nrpe!check_usrlocal

on server1 nrpe.cfg, you need to define check_usrlocal and check_load(default,built-in).

For server1, you can copy from server1 then use vi to global replace server1 to server2 and change ip address.

5.  Windows monitored host
install NSClient++, during installation ,in 'allowed host', type in ',' ( is your monitor host), tick 'enable common check plugins' and 'enable NRPE server'.
After finishing installation, in c:\program files\nsclient++\nsc.ini, to uncomment out checkexternalscript.dll line so that you can use alias part.

# windows1.cfg
define host{
   use generic-host

define service{
   use general-service
  check_command  check_nrpe! -H windows1 -p 5666 -c alias_cpu

note: some other alias like alias_disk, alias_up, alias_service, alias_mem

6. FAQ

check_http.c:807: undefined reference to `np_net_ssl_write`. 
solution: make clean first, then make again. 

b. how to check nagios main configuration syntax error?
/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg

c. how to check remote server ? (disable check-host-alive)
use check_ssh command instead:

     check_command           check_ssh

d. ./configure hangs for nagios-plugin on RHEL4
If you find that the configure script appears to hang on this line:

checking for redhat spopen problem...

use './configure --enable-redhat-pthread-workaround' instead 

Linux bash scripting comman usage guide

Jephe Wu -

Objective: When you need to write bash script, you need to follow certain steps, I've summarized some tips which I've been using for the past.

Parameter and print out script usage:
The first thing after #!/bin/sh line should be usage for your script, especially when your script needs the parameters:

if  [ $# -eq 1 ];then echo "$0 username";exit 1;fi

note: your script requires username as command parameter, so above line will force you to give one parameter after the command.

Path definition:
You should export PATH variables first like this:
export PATH=/usr/bin:/bin:/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin

Common tips in the script:

  • Let history command show date and time
# export HISTTIMEFORMAT="%h/%d - %H:%M:%S "
# history | less
  • check command return code 
if [ $? -ne 0 ];then

  • recursive variable
 A="DATABASE/";B=${A/DATABASE/orcl};echo $B
it will return value: orcl/
  • while loop to read each line from /path/to/file and process it
while read line
echo $line
done < /path/to/file

or while IFS=\| read left right
echo $left $right
done < /path/to/filename

# another method
LINES=`wc -l /path/to/file | awk '{print $1}'`
while [ $i -le $LINES ]
iLINE=`sed -n $i'p' /path/to/file`
echo $iLINE

  • specify return value
if [ $? -eq 0 ];then
return 0
return 2
  •  seq command usage in for loop
 for i in `seq -w 1 10`;do echo $i;done
01 01 03 04 05 06 07 08 09 10

  • Always use "bash -n scriptname" to verify if your script got any syntax error.
  •  check if the file size is 0
if [ ! -s /path/to/filename ];then commands ;fi
note: if the /path/to/file filesize is not zero, then run commands

  • use mktemp and mkdtemp 
You can use mktemp and mkdtemp to create temporary file or directorie variables
# TMPFILE1=`mktemp`
# TMPDIR1=`mktempdir`
  • how to let ls function as cd
alias ls=cd

ls () { builtin cd ; }

note: how to let ccd function as cd;pwd;foo

alias foo=`echo testing builtin`
ccd () { builtin cd "$1";pwd;foo; }

so ccd /tmp command will cd to /tmp, then print current working directory and print string 'testing builtin'

common tips:
  1. batch rename file
rename 1.txt and 2.txt to 1.bat and 2.bat

ls *.txt | sed 's#\(.*\).txt#mv \1.txt \1.bat#'  | sh 
    2.  use bc to calculate from CLI

# echo "scale=2;34359730176/1024/1024/1024" | bc
# echo "ibase=10;obase=16;10" | bc
# echo "ibase=16;obase=A;A" | bc

  3.  force root user to run script
if [ "$(id -u)" != "0" ]; then
echo "Only root can run this script"
exit 1

4. vmstat with timestamp

# more
function eg {
  while read line
    printf "$line"
    date '+ %m-%d-%Y %H:%M:%S'

find /var/adm/logs/vmstat.log* -type f -mtime +15 -exec rm -f {} \;
vmstat 10 | eg >> /var/adm/logs/vmstat.log.`date +%Y%m%d`

in crontab:

0 0 * * * sleep 1;kill `ps -ef | grep -v  grep  |grep | awk '{print $2}'`;sleep 3;nohup /home/oracle/bin/ &

5. variables in for loop

servers=("root@server1" "root@server2" )
for server in ${servers[*]}
ssh -n $server .....


for file in ${Files[*]}


6. create sparse file and detect it

[root@oratest ~]# dd if=/dev/zero of=filea bs=1M count=0 seek=5
0+0 records in
0+0 records out
0 bytes (0 B) copied, 3.8969e-05 seconds, 0.0 kB/s
You have new mail in /var/spool/mail/root
[root@oratest ~]# ls -l filea
-rw-r--r-- 1 root root 5242880 Jul 19 12:25 filea
[root@oratest ~]# du -sm filea
0 filea
[root@oratest ~]# ls -lh filea -s
0 -rw-r--r-- 1 root root 5.0M Jul 19 12:25 filea

How to remotely get Cisco router serial number?

Environment : Cisco router 3845
Objective: get serial number of router remotely


You can use one of the following commands to get Cisco Processer id/Chassis serial numer:

show snmp (Chassis)
show ver  (Processor board ID)
show diag | include Chassis
show inventory (Chassis SN which should be the first one)

note:  related commands:

snmpget -v1 -c communitystring IP mib-
snmp-server chassis-id string (conf t first) 

How to create a global readonly user in Oracle

Jephe Wu -

Objective: create a Oracle user to have readonly access for all schemas in the database.


  • create user and assign tablespace first.
sqlplus / as sysdba
create user jephe identified by password default tablespace users temporary tablespace temp;
  • create readonly role  as 'readonly' and assign privileges and users to it
sqlplus / as sysdba
drop role readonly;
create role readonly;
grant create session to readonly;
grant select any table to readonly;
grant select any sequence to readonly;
grant select any dictionary to readonly;
grant execute any type to readonly;
grant readonly to jephe;
  •  You can switch current schema after login as jephe
select sys_context('USERENV','SESSION_USER') from dual;
select sys_context('USERENV','SESSION_SCHEMA') from dual;
alter session set current_schema=schemaname  
note: in DB2, use 'set schema = ABC' command, but the owner of the table created will be still under the connecting user.

note: you can check your privs after login:

select * from session_privs;
select * from session_roles;

How to clean up disk space for Oracle 11g database server

Jephe Wu -

Environment: OEL5 64bit OS and Oracle 11g 64bit database server
Objective:  temp and undo tablespace occupied too much disk space, need to clean up to save space.

 1. For shrinking undo tablespace:
$ sqlplus / as sysdba
select name from v$datafile

create undo tablespace UNDOTBS2 datafile '/path/to/undotbs2.dbf' size 100m autoextend on next 20m flashback off;
alter system set undo_tablespace=undotbs2

then go to Enterprise Manger to find out the old undo tablespace name, e.g. undotbs1
drop tablespace undotbs1 including contents and datafiles;

Most likely, you have to bounce database to get the disk space back to OS. So, do this:

sqlplus / as sysdba
alter system checkpoint;
shutdown immediate;

note: how to find out tablespace_name,username and datafile name

select distinct s.owner,s.tablespace_name,d.file_name from dba_segments s,dba_data_files d where s.tablespace_name = d.tablespace_name;

metalink How to Shrink the datafile of Undo Tablespace [ID 268870.1] 
SQL> create temporary tablespace TEMP1 tempfile 'c:\path\to\temp02.dbf' size 100M extent management 
local uniform size 128K;
SQL> alter database default temporary tablespace TEMP1;
SQL> alter user <username> temporary tablespace TEMP1; if required

2. For shrinking temp tablespace;
Oracle 11g supports to shrink temporary tablespace online, just run this:

alter tablespace temp shrink space; 
to shrink the temp tablespace to minimum possible size.
before that, you can use command below to find out the minimum size and which are the 
temp files:
select file_name,bytes,blocks from dba_temp_files;
SELECT * FROM dba_temp_free_space;

Now you can the minimum possible size, you can use commands below also:
alter tablespace temp shrink space keep 10m;
alter tablespace temp shrink tempfile '/path/to/file.dbf' [keep 20m];
select username,temporary_tablespace from dba_users where username = 'SCHEMA_NAME' 
For shrinking temp tablespace for database 10g, refer to 
 How to Shrink the datafile of Temporary Tablespace [ID 273276.1] 
SQL> create temporary tablespace TEMP1 tempfile 'c:\temp01.dbf' size 100M extent management
local uniform size 128K;
 SQL> alter database default temporary tablespace TEMP1;
SQL> alter user <username> temporary tablespace TEMP1; - optional
Resizing (or Recreating) the Temporary Tablespace [ID 409183.1] 

Database altered. 

TEMPFILE '/u02/oradata/TESTDB/temp01.dbf' SIZE 512m 

Tablespace altered. 

On some platforms 
(i.e. Windows 2000), it is possible for the tempfile to be deleted from 
DBA_TEMP_FILES but not from the hard drive of the server. 
If this occurs, 
simply delete the file using regular O/S commands. 

tablespace_name, file_name, bytes 
2 FROM dba_temp_files WHERE 
tablespace_name = 'TEMP'; 


----------------- -------------------------------- -------------- 
/u02/oradata/TESTDB/temp01.dbf 536,870,912 

If users are currently 
accessing the tempfile that you are attempting to drop, you may receive the 
following error: 

'/u02/oradata/TESTDB/temp01.dbf' DROP INCLUDING DATAFILES; 

ERROR at line 1: 
ORA-25152: TEMPFILE cannot be dropped at this time 
How to use command line to generate AWR and ADDM report?
How to create temporary tablespace
create temporary tablespace temp2 tempfile '/data/tb_temp/temp2.dbf' size 100m autoextend on next 10m flashback off; 

How to send out email through Linux command line or Windows

Environment: RHEL servers
Objective: sending email directly from Linux CLI


1. use Sendmail command:
(echo "From:";echo "To:";echo "Subject: testing";echo "";cat filename) | /usr/sbin/sendmail -v

2. use Mutt command:
[jephe@app1 ~]$ more .muttrc
set from=""
set envelope_from=yes

note: or put above .muttrc to /etc/Muttrc

edit /home/jephe/.muttrc as above , then send like this:
$ mutt -a /etc/hosts < /dev/null

note: you can man muttrc:
$ export LANG=en-US
$ man muttrc

3. use email (
or ssmtp (install EPEL < > then yum install ssmtp)
or msmtp ( (msmtp is recommended),
or mailsend ( )

4. Windows command line email client

How to online resize LVM2 partitions size

Jephe Wu  -

Environment: Fedora 3 or RHEL servers with LVM partitions
Objective: online increase partitions size including /, /usr, /var partitions


1.  login as root, run 'vgdisplay' to find out the free PE extend and size

  Total PE              2039
  Alloc PE / Size       1745 / 54.53 GB
  Free  PE / Size       294 / 9.19 GB

2. run 'vgdisplay -v' to find out the logical volume LV name which you want to increase, let's say it's /dev/VolGroup00/LogVol06

3. increase the 5 Physical Extent(PE)
lvm lvresize -l +5 /dev/VolGroup00/LogVol06

4. use resize2fs(RHEL 4, 5) or ext2online(Fedora, starting from Fedora 6, this command is removed) to online increase partition file system size

ext2online /dev/VolGroup00/LogVol06 [newsize]
resize2fs  /dev/VolGroup00/LogVol06

Online reduce a /data/ partition and increase / partition
Objective: LVM partitions allocation is done, there's no free PE available already, need to reduce /data partition and increase / partition
Environment: CentOS 5.4, reduce /data from 100g to 50g, and increase / from 15g to 30g, still left 35g free for future
umount /data
e2fsck -f /dev/VolGroup01/LogVol02
resize2fs /dev/VolGroup01/LogVol02 50G
lvreduce -L 50G /dev/VolGroup01/LogVol02
e2fsck -f /dev/VolGroup01/LogVol02

# online increase / partition from 15G to 30G and increase file system to make use of the full 30G space
lvresize -L 30G /dev/VolGroup01/LogVol00
resize2fs /dev/VolGroup01/LogVol00

vgdisplay -v

Use RIP Linux to shrink LVM root partition for default CentOS 5.3

Environment: default installation of CentOS 5.3 with a very bit /root LVM partition
Objective: reduce the root LVM partition to 20G file system size


1. boot up with a RIP Linux CD
2. run command 'lvm -a y VolGroup00' to activate the volume group and logical volumes
3. resize2fs /dev/VolGroup00/LogVol00 20G
4. lvm lvresize --size 20G /dev/VolGroup00/LogVol00
note: to be very safe, you might want to resize logical volume to 21G instead of 20G, but according to the article from redhat access below, it's not required.
5. reboot

How do I reduce the size of the root file system after installation Red Hat Enterprise Linux 5?

I have attached above article from redhat below:

Release Found: Red Hat Enterprise Linux 5

The default file system layout from the Red Hat Enterprise Linux 5 installation process includes a special space for /boot and swap space then gives all left space to one logical volume and used the logical volume as root / volume.

Integrating all data files and system files in one file system is not always an ideal choice for production systems. If the system cannot be reinstalled, it is possible to reduce the size of the root file system and the logical volume on which it resides.

Reducing the logical volume on the root / volume must be done in rescue mode.

First, boot the system from Red Hat Enterprise Linux 5 Disc 1, and at the prompt, type linux rescue and press enter. When prompted for language, and keyboard, provide the pertinent information for the system. When prompted to enable the network devices on the system, select "No." Finally, select "Skip" when prompted to allow the rescue environment to mount Red Hat Enterprise Linux installation under the /mnt/sysimage directory. The filesystems MUST NOT be mounted to carry out the following steps.

Next run following commands to scan all disks LVM2 volume groups:

# lvm.static vgscan

Next, activate the logical volume to reduce. In this example, /dev/VolGroup00/LogVol00 was made available with the following command:

# lvm.static lvchange -ay /dev/VolGroup00/LogVol00

Next, reduce the size of file system and logical volume on /dev/VolGroup00/LogVol00. Please make sure there is enough space left on the root / file system and that the logical volume is large enough to contain all the data that was previously present. If the file system is at close to being full, for example, this may not work. Before resizing file system, run e2fsck to check file system first.

# e2fsck -f /dev/VolGroup00/LogVol00
# resize2fs /dev/VolGroup00/LogVol00 3000M
# lvm.static lvreduce -L 3000M /dev/VolGroup00/LogVol00

Please note that this is done on /dev/VolGroup00/LogVol00. The number at the end is the final size of the file system, not the amount it is reduced by.

Finally, verify the modification then reboot the system.

# lvm.static vgdisplay VolGroup00
# exit

WARNING: Resizing an active logical volume can cause catastrophic data loss if carried out incorrectly. Plan and act accordingly. ALWAYS create backups!


 How to remove a LVM partition forcely?

# kpartx -d xxxx 

General Linux server network performance guide

Environment: Linux web server serves browser client and Database server on the local LAN serves the application server which is also on the same LAN
Objective: maximum the network performance which is one of the 4 performance bottlenecks(CPU,Memory,Storage and Network I/O)


1. net.core.wmem_default(/proc/sys/net/core/wmem_default) and net.core.rmem_default(/proc/sys/net/core/rmem_default) (the following settings are also recommended by Oracle 11gR1 installation)

net.core.rmem_default = 262144
net.core.rmem_max = 4194304
net.core.wmem_default = 262144
net.core.wmem_max = 4194304
For Oracle database, it's not recommended to configure net.ipv4.tcp_rmem and net.ipv4.tcp_wmem.
as stated on metalink.

2. net.core.netdev_max_backlog (/proc/sys/net/core/netdev_max_backlog), default is 1000 in Linux RHEL 5 kernel 2.6
set maximum number of incoming packets that will be queued for delivery to the device queue.
3. net.core.somaxconn(/proc/sys/net/core/somaxconn), default is 128
maximum accept queue backlog that can be specified via the listen() system call. or the number of pending connection requests.
4. optmem_max (/proc/sys/net/core/optmem_max)
maximum initialization size of socket buffers, expressed in bytes.
increase this  
TCP options:
5. net.ipv4.tcp_window_scaling (/proc/sys/net/ipv4/tcp_window_scaling), enable
6. disable net.ipv4.tcp_sack(/proc/sys/net/ipv4/tcp_sack)
on LAN, disabling this tcp_sack can actually improve performance.
when tcp_sack is disabled, you should also disable 7 and 8
7. net.ipv4.tcp_dsack(/proc/sys/net/ipv4/tcp_dsack)
8. net.ipv4.tcp_fack(/proc/sys/net/ipv4/tcp_fack)
9. net.ipv4.tcp_max_syn_backlog(/proc/sys/net/ipv4/tcp_max_syn_backlog)
controls the length of the tcp syn queue for each port. If client experience failures
connecting to busy servers, this value should be increased.
10. net.ipv4.tcp_synack_retries(/proc/sys/net/ipv4/tcp_synack_retries) set to 3
controls the number of times kernel tries to resend a response to an incoming syn/ack segments
11. net.ipv4.tcp_retries2 (/proc/sys/net/ipv4/tcp_retries2) set to 5
controls the number of times kernel tries to resend data to a remote host with which it has an established connection.
12. net.ipv4.tcp_max_tw_buckets (/proc/sys/net/ipv4/tcp_max_tw_buckets)
increase this to double value. 
13. net.ipv4.tcp_orphan_retries (/proc/sys/net/ipv4/tcp_orphan_retries)  set to 0
14. net.ipv4.tcp_fin_timeout set to 30
15. net.ipv4.tcp_keepalive
16. ip_local_port_range (net.ipv4.ip_local_port_range)
1024 65000
17. net.ipv4.tcp_window_scaling = 1
18. net.ipv4.tcp_timestamps = 1 
Partitions and File system performance:
1. database raid:
For oracle database server hard disk raid. Use raid1 for redo log, archivelog,
including flash recovery area archivelog, temporary tablespace.
use raid1+0 for database files

2. for partition on individual hard disk, the first partition for /boot, the second is 
for swap, the third is for /var, the fourth is for /usr, the last is for /home and /
for other partitions, the first partition is at the outer side of the hard disk which 
is much faster then the inside. outsider partition can be accessed faster than insider ones.
3. add 'noatime' for those often accessed partitions in /etc/fstab.
4. switch to another I/O scheduler 
root (hd0,0) kernel /vmlinuz-2.6.18-8.el5 ro root=/dev/sda2 elevator=deadline
5. swap partition size:
  RAM               Swap Space
  1 GB - 2 GB       1.5 times the size of RAM
  2 GB - 8 GB       Equal to the size of RAM
  more than 8GB     0.75 times the size of RAM
6. use hugepage and ramfs to improve performance.



Understanding qmail smtp relay

Environment: datacenter network, qmail server ip address:
Objective: how to configure smtp relay from other ips, including those from the same network segment  and another network segment ip

How qmail smtp relay works:

1.  check configuration files /var/qmail/control/rcpthosts and /var/qmail/control/smtproutes and /etc/tcp.smtp(actually useful file is /etc/tcp.smtp.cdb, you need to generate this cdb file after changing tcp.smtp and also make sure /etc/tcp.smtp.cdb file is world readable, very important, otherwise, qmail will ignore this file and refuse relay from any other hosts except for localhost )

Important: any changes to rcpthosts, smtproutes and tcp.smtp(thereafter, run the following tcprules command to generate tcp.smtp.cdb), no need to restart qmail to take effect, qmail will read them on the fly.

The command is :
# cd /etc/

# tcprules tcp.smtp.cdb tcp.smtp.temp < tcp.smtp
# chmod go+r tcp.smtp.cdb
2. under the following cases, qmail allows the smtp relay from other hosts:
a. the host appears in the /etc/tcp.smtp file and generated the coresponding /etc/tcp.smtp.cdb file.
for example:
note: this will allow segment and ip to be able to relay email 
through this qmail server
b. if the smtp client ip does not appear in above tcp.smtp file, then alternative way is 
to add the recipient domain in the /var/qmail/control/rcpthosts.
for example: the following is the content of the file /var/qmail/control/rcpthosts

note: when you add any domain to above file, qmail will be using the new configuration on the fly. Although the smtp client ip is not inside the database file /etc/tcp.smtp.cdb, as long as the recipient email domain part appears in above rcpthosts file, the smtp relay is allowed.

c.  only after the smtp relay is allowed, you can decide which email gateway will be forwarded to for the specified domain, this can be configured in /var/qmail/control/smtproutes.

for example:


note:  for different domain, the email will be sent to the different ip, for any other domains, will be sent to

How to stop/start qmail:

You can vi /etc/inittab to comment out the svscanboot line, then run 'init q' to take effect, after that, vi /etc/inittab to uncomment it then run 'init q' again.


How to configure a new Cisco router or switch

Environment: Cisco 2960G switch 48ports
Objective: Configuring it as a managable switch and use port 48 as monitoring port for SNORT server use.


1. Use Putty to specify COM1 to connect to switch console, then power on the switch.
2. waiting  for the switch startup, it takes some time.
3. follow the default configuration wizard
4. specify vlan1 as management port, to specify IP address etc
5. specify ssh username and password

username root secret yourpassword

6. configuring the rest
no enable password
no ip domain-lookup

ip domain-name
crypto key generate rsa (then give 1024)
note: above 2 lines are for enabling ssh server function

clock timezone SGT 8 (not in conf t prompt)
service password-encryption

no ip http server
no ip http secure-server
snmp-server community public RO
line con 0
line vty 0 4
  login local
  transport input ssh
line vty 5 15
  login local  (using local authentication, not remote authentication server)
  transport input ssh  (only accept ssh incoming connection)

7. configuring monitoring information
monitor session source vlan 1
monitor session 1 destination interface GigabitEthernet0/48

8. configuring ntp servers
ntp server

note: if you don't set the ntp server, after router's power is off and power it on again, it will lose clock information. If there's ntp server available, it will get the correct time from NTP server immediately after power on

9. setting clock
clock set 14:28:00 20 OCT 2009 (in conf t prompt)

10. save

11. disable logging message
no logging console
no logging monitor  (This command disables logging for terminal lines other than the system console)
logging buffered 16384
logging trap notifications  (This command provides notification (level 5) messaging to the specified syslog server. The default logging level for all devices (console, monitor, buffer, and traps) is debugging (level 7). Leaving the trap logging level at 7 produces many extraneous messages that are of little or no concern to the health of the network. It is recommended that the default logging level for traps be set to 5. )

You can synchronise the logging messages with your command prompt as follows:

Router(config)# line con 0
Router(config-line)# logging synchronous
Router(config)# line aux 0
Router(config-line)# logging synchronous
Router(config)# line vty 0 4
Router(config-line)# logging synchronous

12. some other useful commands
show clock
show monitor
show ssh
show line

hostname 48PortCiscoSwitch
ip name-server

13. the example of configuration
version 12.2
no service pad
service timestamps debug datetime msec
service timestamps log datetime msec

service password-encryption
hostname 48PortCiscoSwitch
enable secret 5 $1$7zZ5$91N7FPR68YbLyRO4NE3jr/
username root secret 5 $1$2IEN$wmDzCnXsatjHtaPMrss4e.
no aaa new-model
clock timezone SGT 8
system mtu routing 1500
ip subnet-zero
no ip domain-lookup
ip domain-name
spanning-tree mode pvst
spanning-tree extend system-id
vlan internal allocation policy ascending

interface GigabitEthernet0/1
interface GigabitEthernet0/2
interface GigabitEthernet0/48
interface Vlan1
 ip address
 no ip route-cache
ip default-gateway
no ip http server
no ip http secure-server
snmp-server community public RO
line con 0
 logging synchronous
line vty 0 4
 logging synchronous
 login local
 transport input ssh
line vty 5 15
 logging synchronous
 login local
 transport input ssh
monitor session 1 source vlan 1
monitor session 1 destination interface Gi0/48
ntp server


 Cisco routers/Switches have two privilege levels:

  • User EXEC mode—privilege level 1
  • Privileged EXEC mode—privilege level 15
When you log in to a Cisco router under the default configuration, you're in user EXEC mode (level 1). From this mode, you have access to some information about the router, such as the status of interfaces, and you can view routes in the routing table. However, you can't make any changes or view the running configuration file.
Because of these limitations, most Cisco router users immediately type enable to get out of user EXEC mode. By default, typing enable takes you to level 15, privileged EXEC mode. In the Cisco IOS, this level is equivalent to having root privileges in UNIX or administrator privileges in Windows. In other words, you have full access to the router.
For networks maintained by just a few people, everyone typically has the password to get to privileged mode

Cisco IOS provides for 16 different privilege levels ranging from 0 to 15.

The Cisco IOS comes with 2 predefined user levels.
User EXEC mode runs at privilege level 1 and “enabled” mode (privileged EXEC mode)runs at level 15.

Setting up 64bit Oracle 11gR1 on 64bit OEL5.4 with ramfs and hugepage configuration

Jephe Wu -

Environment:  HP Proliant Server DL360P6, 16G RAM, 8x300G SAS hard disk, Oracle Enterprise Linux(OEL) 5.4 x86-64, Oracle 11g 64bit
Objective: Setting up Oracle server with Oracle recommended RAID and using ramfs and hugepage to improve performance.

Part I:  RAID, Partition and Installation of OEL and Oracle


1. Oracle suggest to use RAID1 for control file, redo log, archive log and temp tablespace. RAID10 for database files which means you can put /u01 partition on RAID1+0 (search metalink for

Doc ID: 30286.1 -  I/O Tuning with Different RAID Configurations )
2. For installation of OEL and Oracle, you can refer to tutorial part.
Note: for the installation of OEL, you should run 'mkdir -p /u01/app/oracle /u01/app/oraInventory/' after following the tutorial and before installing Oracle 11g.

note: for 64bit OEL, when putting shmmax and shmall, don't follow the tutorial if the existing default value is already more than the one you will be putting.

3. You might need to install PAE kernel on 32bit OEL to recognize more than 4G memory from OS level.

Part II: Optimize the system from OS point of view

1. kernel boot parameter to optimize I/O
Edit the /etc/grub.conf file and add the following parameter to the kernel that's being used,
title Red Hat Enterprise Linux Server (2.6.18-8.el5)
        root (hd0,0) 
        kernel /vmlinuz-2.6.18-8.el5 ro root=/dev/sda2 elevator=deadline
        initrd /initrd-2.6.18-8.el5.img
2. add the following mount parameter to /etc/fstab for /u01 partition
3.  remove all unnecessary services, use 'ntsysv --level 2345' to remove them, only leave services like crond, sysstat, sendmail, syslog, sshd, irq-balance, portmap, nfslock, network.
4. install HP Support Pack for hardware monitoring.

Part III: Optimize Oracle with ramfs and hugepage


1.  using ramfs instead of default tmpfs which comes with OEL
  • Edit the /etc/rc.local file and add the following entries to it to configure the computer to mount ramfs over the /dev/shm directory, whenever you start the computer:

    umount /dev/shm
    mount -t ramfs ramfs /dev/shm
    chown oracle:oinstall /dev/shm
    In the preceding commands, oracle is the owner of Oracle software files and oinstall is the group for Oracle owner account. So we only have Oracle installed which will be using ramfs.

    ramfs cannot be pageable, tmpfs can be pageable/swpable. There is no page replacement mechanism overhead.HugePages are universally regarded as pinned.


    (previously shmfs)
    Fill maximum space and continue writin display error continue writing
    Fixed Size Yes No
    Uses Swap Yes No
    Volatile Storage Yes Yes

  • Run the following command to check if the /dev/shm directory is mounted with the ramfs type:

    # mount | grep shm
    ramfs on /dev/shm type ramfs (rw)
  • Run the following command to check the permissions on the /dev/shm directory:
# ls -ld /dev/shm
drwxr-xr-x  3 oracle oinstall 0 Jan 13 12:12 /dev/shm
note: about tmpfs (previously called shmfs) - from
tmpfs is supported by the Linux kernel from version 2.4 and up.[3] tmpfs (previously known as shmfs) distinguishes itself from the Linux ramdisk device by allocating memory dynamically and by allowing less-used pages to be moved onto swap space. ramfs, in contrast, does not make use of swap (which can be an advantage or disadvantage). In addition, MFS and some older versions of ramfs did not grow and shrink dynamically and instead used a fixed amount of memory at all times.

Usage of tmpfs for example is "mount -t tmpfs -o size=1G,nr_inodes=10k,mode=0700 tmpfs /space" which will allow up to 1 GiB in RAM/swap with 10240 inodes and only accessible by the owner of /space. Owner can be overridden with the uid/gid mount options, and will otherwise default to root. The filesystem's maximum size can also be changed on-the-fly, like "mount -o remount,size=2G /space".
/var/run and /var/lock can be tmpfs filesystems, to alleviate having to clean them up at each reboot.
2.  configuring memlock  
  • Increase max locked memory limit. Edit /etc/security/limits.conf and add

    oracle   soft   memlock 12582912 
    oracle   hard   memlock 12582912
    Logon to oracle then check max locked mem limit:        
    $ ulimit -l
The memlock parameter specifies how much memory the oracle user can lock into its address space.  The memlock setting is specified in KB and must match the memory size of the number of Huge Pages that Oracle should be able to allocate.If memlock is too small, then no single Huge Page will be allocated when the Oracle database starts.

You can specify the maximum lockable memory for oracle user, above 12582912k (=12G*1024*1024), my total physical memory is 16G which is 3/4.

This value can be specified by calculating the number of hugepage, e.g. using the following ~/bin/ to get 1000 hugepages, then 1000*2=2000m(=2000*1024k=2048000k), which you can put 2048000 in this memlock settings, so if you need to increase hugepage size, get the number first then increase here for memlock also.

Hugepage uses 2M continuous memory space per page.

3.  Configuring hugepage
According to Doc ID: 361468.1-HugePages on 64-bit Linux

HugePages is a feature of the Linux kernel  which allows larger pages to manage memory as the alternative to the small 4K pagesize. There is a general misconception where the HugePages is a feature specific to 32-bit Linux. The major function of 64-bit architecture and O/S with 64-bit support is the ability to address Very Large Memory (VLM) natively. With 32-bit architectures, generally speaking, VLM is accessed through a VLM window (See Note 200266.1), which is a data structure in the process address space (virtually max 4GB) that provides access to whole virtual address space from a "window" of a specific size.  On 32-bit Linux, we need to set the USE_INDIRECT_DATA_BUFFERS=TRUE and mount a shmfs/tmpfs/ramfs type of in-memory filesystem over /dev/shm.

 64-bit Linux does not need these mechanisms and can use the physical memory directly as the address space is virtually 16 EB (exabyte = 2^60 bytes) at maximum.

To check whether HugePages are supported/available on a running configuration, run:

    $ grep Huge /proc/meminfo 
and check the output:

Regardless of the values printed, if you can see the lines above, the system supports HugePages.

 The AMM and HugePages are not compatible. One needs to disable AMM on 11g to be able to use HugePages

HugePages is a method to have larger pages where it is useful for working with very large memory. It is both useful in 32- and 64-bit configurations

HugePages can be used without indirect buffers on 64-bit systems (normal configuration will do).

  • startup instance by runing
sqlplus / as sysdba
sql> startup nomount pfile='initorcl.ora'
sql> show parameter shared_pool_size
sql> show sga
sql> show parameter filesystemio_options
sql> show parameter disk_asynch_io
  • estimate the number of hugepages and finalize it
put the following program in $ORACLE_HOME/bin

# Linux bash script to compute values for the
# recommended HugePages/HugeTLB configuration
# Note: This script does calculation for all shared memory
# segments available when the script is run, no matter it
# is an Oracle RDBMS shared memory segment or not.

# Check for the kernel version
KERN=`uname -r | awk -F. '{ printf("%d.%d\n",$1,$2); }'`

# Find out the HugePage size
HPG_SZ=`grep Hugepagesize /proc/meminfo | awk '{print $2}'`

# Start from 1 pages to be on the safe side and guarantee 1 free HugePage

# Cumulative number of pages required to handle the running shared memory segments
for SEG_BYTES in `ipcs -m | awk '{print $5}' | grep "[0-9][0-9]*"`
   MIN_PG=`echo "$SEG_BYTES/($HPG_SZ*1024)" | bc -q`
   if [ $MIN_PG -gt 0 ]; then
      NUM_PG=`echo "$NUM_PG+$MIN_PG+1" | bc -q`

# Finish with results
case $KERN in
   '2.4') HUGETLB_POOL=`echo "$NUM_PG*$HPG_SZ/1024" | bc -q`;
          echo "Recommended setting: vm.hugetlb_pool = $HUGETLB_POOL" ;;
   '2.6') echo "Recommended setting: vm.nr_hugepages = $NUM_PG" ;;
    *) echo "Unrecognized kernel version $KERN. Exiting." ;;

# End

  • Run the following command to change the permission of the file:

    $ chmod +x

Run the script to compute the values for hugepages configuration:
$ ./
  1. Set the following kernel parameter:

    # sysctl -w vm.nr_hugepages=value_displayed_above
  2. To make the value of the parameter available for every time you restart the computer, edit the /etc/sysctl.conf file and add the following entry:

  3. Run the following command to check the available hugepages:

    $ grep Huge /proc/meminfo
  4. Restart the instance.
  5. Run the following command to check the available hugepages (1 or 2 pages free):

    $ grep Huge /proc/meminfo
if you noticed the total hugepage number in /proc/meminfo is lower than the number you specified in /etc/sysctl.conf, you might have not enough memory or system cannot find the enough continuous 2M memory pages(in this case, hugepage cannot be used for shared memory segment although it occupied some but not enough memory area), so you might need to reboot server.

the purpose above is to startup instance only(startup nomount) then estimate how many hugepage should be specified in /etc/sysctl.conf, after that, shutdown instance, then make sure oracle user can see the new specified hugepage parameter /proc/meminfo, then startup instance and database again)

you can verify them using 'ipcs -m' and ls -l /dev/shm/ for buffer cache memory file system and shared pool.

1. hugepage and AMM
    The 11g AMM feature is enabled by the MEMORY_TARGET / MEMORY_MAX_TARGET instance initialization parameters (see Note 465048.1 for further information). That is also the case with a default database instance created using Database Configuration Assistant (DBCA).

    With AMM all SGA memory is allocated by creating files under /dev/shm. When Oracle DB does SGA allocations that way HugePages are not reserved/used. The use of AMM is absolutely incompatible with HugePages.

    Please also note that ramfs (instead of tmpfs mount over /dev/shm) is not supported for AMM at all. With AMM the Oracle database needs to grow and reduce the size of SGA dynamically. This is not possible with ramfs where it possible and supported with tmpfs (which is the default for the OS installation).

    If you want to use HugePages make sure that both MEMORY_TARGET / MEMORY_MAX_TARGET initialization parameters are disabled for the database instance.

    2.  hugepage and shared memory segment
    Huge Pages pool is not being used by the ramfs shared memory filesystems, only being used by shared memory segment which is what the command 'ipcs -m' is showing. Not ls -l /dev/shm/

     Part IV: 32bit Oracle - using VLM (no need special setting for 64bit which support VLM natively)
    On 32bit Oracle with VLM configuration, Note that huge pages will only apply to a small part of your SGA (shared, java, large pool) since your data blocks are not handled by huge pages but by indirect data buffers.

    You should use 64-bit operating system and Oracle. Using indirect block buffers has a performance overhead. If you use a 64-bit operating system all your problems are gone and you can use huge pages for the whole sga including buffer cache.
    for 32bit OS and Oracle, in order to configure VLM, do this:

    1. add the following into initorcl.ora (8G (=8192k*1048576) buffer cache and 2G shared pool)
    change all DB_CACHE_SIZE (remove DB_xK_CACHE_SIZE parameters ) with DB_BLOCK_BUFFERS parameter.
    *.shared_pool_size has high priority than orcl.__shared_pool_size
    shared_io_poolsize=0 is reserved setting, don't change
    Part V: using async io and direct io (for both 32bit and 64bit Oracle) 
    • by default, RHEL5 and Oracle 11g

      disk_asynch_io=true (to confirm, use show parameter disk_asynch_io)

    • If you use filesystems

      filesystemio_options=setall  (to confirm, after startup database, show parameter filesystemio_opitons)

    • direct I/O
    Direct I/O is a feature of the file system whereby file reads and writes go directly from the applications to the storage device, bypassing the operating system read and write caches. Direct I/O is used by only a few applications that manage their own caches, such as databases.

    In the following example, $ORACLE_HOME/bin/oracle was relinked with async I/O:
    $ ldd $ORACLE_HOME/bin/oracle | grep libaio => /usr/lib/ (0x0093d000)
    $ nm $ORACLE_HOME/bin/oracle | grep io_getevent
    w io_getevents@@LIBAIO_0.1
    In the following example, $ORACLE_HOME/bin/oracle has NOT been relinked with async I/O: $ ldd $ORACLE_HOME/bin/oracle | grep libaio
    $ nm $ORACLE_HOME/bin/oracle | grep io_getevent
    w io_getevents
    use the first number which is 515 to confirm the system is using asyncio, If it's not async
    system, the 515 will be zero:

    [oracle@jephe scripts]$ cat /proc/slabinfo | grep kioctx
    kioctx 515 540 256 15 1 : tunables 120 60 8 : slabdata 36 36 0

    so, on 32bit Oracle with VLM feature. We should use direct I/O and async filesystem I/O for Oracle, and hugepages for shared pool and ramfs for buffer cache, both hugepages and ramfs will lock the memory to prevent swapping

    Part VI: Appendix:

     2. my 64bit Oracle initorcl.ora file:
    orcl.__oracle_base='/u01/app/oracle'#ORACLE_BASE set from environment
    *.dispatchers='(PROTOCOL=TCP) (SERVICE=orclXDB)'
    1.  using ramfs instead of tmpfs to prevent swapping whenever it's possible
    2.  using 64bit OS and Oracle whenever it's possible
    3.  on 64bit OS+Oracle, hugepage can be used for the whole SGA, while on 32bit OS +Oracle, hugepage can only be used for whole SGA except for data block buffer which is using indirect data buffer. On 32-bit Linux, we need to set the USE_INDIRECT_DATA_BUFFERS=TRUE and mount a shmfs/tmpfs/ramfs type of in-memory filesystem over /dev/shm.
    4. *.java_jit_enabled=FALSE, so that 64bit Oracle 11g can use exp to export data.

    5. to speed up oracle processing, you might consider to collect statistics for schema such as:
    exec dbms_stats.gather_schema_stats('JEPHE', cascade => true);
    exec dbms_stats.delete_table_stats('SYS','CDEF$');
    exec dbms_stats.delete_table_stats('SYS','CON$');
    exec dbms_stats.delete_table_stats('SYS','USER$');
    exec dbms_stats.gather_table_stats('SYS','CDEF$');
    exec dbms_stats.gather_table_stats('SYS','CON$');
    exec dbms_stats.gather_table_stats('SYS','USER$');