The issue with non-p (0x1e 0x95 0x14) ISP programming (and thus bootloader installing) from the Arduino IDE is that the avrdude.conf shipped with Arduino (at least 1.0.5) doesn't include the non-p 328 variant, and the boards.txt doesn't include it either.
Fixing this is a simple matter of adding a couple config sections.
As mentioned by a poster above, once the bootloader is in, and you are programming via it, the chip is ALWAYS a 328p because the bootloader has it hard coded, so use the "Arduino Duemilanove w/ ATmega328 (Not P)" board (below) when using ISP to program, and use "Arduino Duemilanove w/ ATmega328" when programming via the bootloader.
NB: just because the chip has a non-p signature does not necessarily mean it's a non-p chip, mis-signatured chips have been seen. You can test it by trying to disable the brown-out-detection (BOD), if it can be disabled and you confirm with it disabled that it does not reset on brown-out, then it's "p" regardless of it's signature bytes, that's the only practical difference between the two.
Add to boards.txt
##############################################################
atmega328nop.name=Arduino Duemilanove w/ ATmega328 (Not P)
atmega328nop.upload.protocol=arduino
atmega328nop.upload.maximum_size=30720
atmega328nop.upload.speed=57600
atmega328nop.bootloader.low_fuses=0xFF
atmega328nop.bootloader.high_fuses=0xDA
atmega328nop.bootloader.extended_fuses=0x05
atmega328nop.bootloader.path=atmega
atmega328nop.bootloader.file=ATmegaBOOT_168_atmega328.hex
atmega328nop.bootloader.unlock_bits=0x3F
atmega328nop.bootloader.lock_bits=0x0F
atmega328nop.build.mcu=atmega328
atmega328nop.build.f_cpu=16000000L
atmega328nop.build.core=arduino
atmega328nop.build.variant=standard
##############################################################
Add to avrdude.conf (search for the existing ATmega328P section and add it above there)
#------------------------------------------------------------
# ATmega328
#------------------------------------------------------------
part
id = "m328";
desc = "ATMEGA328";
has_debugwire = yes;
flash_instr = 0xB6, 0x01, 0x11;
eeprom_instr = 0xBD, 0xF2, 0xBD, 0xE1, 0xBB, 0xCF, 0xB4, 0x00,
0xBE, 0x01, 0xB6, 0x01, 0xBC, 0x00, 0xBB, 0xBF,
0x99, 0xF9, 0xBB, 0xAF;
stk500_devcode = 0x86;
# avr910_devcode = 0x;
signature = 0x1e 0x95 0x14;
pagel = 0xd7;
bs2 = 0xc2;
chip_erase_delay = 9000;
pgm_enable = "1 0 1 0 1 1 0 0 0 1 0 1 0 0 1 1",
"x x x x x x x x x x x x x x x x";
chip_erase = "1 0 1 0 1 1 0 0 1 0 0 x x x x x",
"x x x x x x x x x x x x x x x x";
timeout = 200;
stabdelay = 100;
cmdexedelay = 25;
synchloops = 32;
bytedelay = 0;
pollindex = 3;
pollvalue = 0x53;
predelay = 1;
postdelay = 1;
pollmethod = 1;
pp_controlstack =
0x0E, 0x1E, 0x0F, 0x1F, 0x2E, 0x3E, 0x2F, 0x3F,
0x4E, 0x5E, 0x4F, 0x5F, 0x6E, 0x7E, 0x6F, 0x7F,
0x66, 0x76, 0x67, 0x77, 0x6A, 0x7A, 0x6B, 0x7B,
0xBE, 0xFD, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00;
hventerstabdelay = 100;
progmodedelay = 0;
latchcycles = 5;
togglevtg = 1;
poweroffdelay = 15;
resetdelayms = 1;
resetdelayus = 0;
hvleavestabdelay = 15;
resetdelay = 15;
chiperasepulsewidth = 0;
chiperasepolltimeout = 10;
programfusepulsewidth = 0;
programfusepolltimeout = 5;
programlockpulsewidth = 0;
programlockpolltimeout = 5;
memory "eeprom"
paged = no;
page_size = 4;
size = 1024;
min_write_delay = 3600;
max_write_delay = 3600;
readback_p1 = 0xff;
readback_p2 = 0xff;
read = " 1 0 1 0 0 0 0 0",
" 0 0 0 x x x a9 a8",
" a7 a6 a5 a4 a3 a2 a1 a0",
" o o o o o o o o";
write = " 1 1 0 0 0 0 0 0",
" 0 0 0 x x x a9 a8",
" a7 a6 a5 a4 a3 a2 a1 a0",
" i i i i i i i i";
loadpage_lo = " 1 1 0 0 0 0 0 1",
" 0 0 0 0 0 0 0 0",
" 0 0 0 0 0 0 a1 a0",
" i i i i i i i i";
writepage = " 1 1 0 0 0 0 1 0",
" 0 0 x x x x a9 a8",
" a7 a6 a5 a4 a3 a2 0 0",
" x x x x x x x x";
mode = 0x41;
delay = 20;
blocksize = 4;
readsize = 256;
;
memory "flash"
paged = yes;
size = 32768;
page_size = 128;
num_pages = 256;
min_write_delay = 4500;
max_write_delay = 4500;
readback_p1 = 0xff;
readback_p2 = 0xff;
read_lo = " 0 0 1 0 0 0 0 0",
" 0 0 a13 a12 a11 a10 a9 a8",
" a7 a6 a5 a4 a3 a2 a1 a0",
" o o o o o o o o";
read_hi = " 0 0 1 0 1 0 0 0",
" 0 0 a13 a12 a11 a10 a9 a8",
" a7 a6 a5 a4 a3 a2 a1 a0",
" o o o o o o o o";
loadpage_lo = " 0 1 0 0 0 0 0 0",
" 0 0 0 x x x x x",
" x x a5 a4 a3 a2 a1 a0",
" i i i i i i i i";
loadpage_hi = " 0 1 0 0 1 0 0 0",
" 0 0 0 x x x x x",
" x x a5 a4 a3 a2 a1 a0",
" i i i i i i i i";
writepage = " 0 1 0 0 1 1 0 0",
" 0 0 a13 a12 a11 a10 a9 a8",
" a7 a6 x x x x x x",
" x x x x x x x x";
mode = 0x41;
delay = 6;
blocksize = 128;
readsize = 256;
;
memory "lfuse"
size = 1;
min_write_delay = 4500;
max_write_delay = 4500;
read = "0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0",
"x x x x x x x x o o o o o o o o";
write = "1 0 1 0 1 1 0 0 1 0 1 0 0 0 0 0",
"x x x x x x x x i i i i i i i i";
;
memory "hfuse"
size = 1;
min_write_delay = 4500;
max_write_delay = 4500;
read = "0 1 0 1 1 0 0 0 0 0 0 0 1 0 0 0",
"x x x x x x x x o o o o o o o o";
write = "1 0 1 0 1 1 0 0 1 0 1 0 1 0 0 0",
"x x x x x x x x i i i i i i i i";
;
memory "efuse"
size = 1;
min_write_delay = 4500;
max_write_delay = 4500;
read = "0 1 0 1 0 0 0 0 0 0 0 0 1 0 0 0",
"x x x x x x x x x x x x x o o o";
write = "1 0 1 0 1 1 0 0 1 0 1 0 0 1 0 0",
"x x x x x x x x x x x x x i i i";
;
memory "lock"
size = 1;
min_write_delay = 4500;
max_write_delay = 4500;
read = "0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0",
"x x x x x x x x x x o o o o o o";
write = "1 0 1 0 1 1 0 0 1 1 1 x x x x x",
"x x x x x x x x 1 1 i i i i i i";
;
memory "calibration"
size = 1;
read = "0 0 1 1 1 0 0 0 0 0 0 x x x x x",
"0 0 0 0 0 0 0 0 o o o o o o o o";
;
memory "signature"
size = 3;
read = "0 0 1 1 0 0 0 0 0 0 0 x x x x x",
"x x x x x x a1 a0 o o o o o o o o";
;
;