!----------------------------------------------------------------------- ! trainset.f90 - handling of the training set file !----------------------------------------------------------------------- !+ This file is part of the AENET package. !+ !+ Copyright (C) 2012-2016 Nongnuch Artrith and Alexander Urban !+ !+ This program is free software: you can redistribute it and/or modify !+ it under the terms of the GNU General Public License as published by !+ the Free Software Foundation, either version 3 of the License, or !+ (at your option) any later version. !+ !+ This program is distributed in the hope that it will be useful, but !+ WITHOUT ANY WARRANTY; without even the implied warranty of !+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU !+ General Public License for more details. !+ !+ You should have received a copy of the GNU General Public License !+ along with this program. If not, see <http://www.gnu.org/licenses/>. !----------------------------------------------------------------------- ! 2013-05-09 Alexander Urban (AU), Nongnuch Artrith (NA) !----------------------------------------------------------------------- module trainset use aeio, only: aeio_header, & TYPELEN, PATHLEN use io, only: io_adjustl, & io_unit use sfsetup, only: Setup, & save_Setup, & load_Setup, & del_Setup, & stp_init, & stp_final, & stp_normalize, & stp_assert_moduleinit, & sfval, & sfderiv_i, & sfderiv_j implicit none private save public :: new_TrnSet, & open_TrnSet, & close_TrnSet, & rewind_TrnSet, & new_TrnSet_info, & save_TrnSet_info, & save_TrnSet_info_ASCII, & load_TrnSet_info, & load_TrnSet_info_ASCII, & save_TrnSet_ASCII, & load_TrnSet_ASCII, & ts_count_atoms, & ts_load_Setups, & ts_normalize, & ts_print_info, & ts_read_atom_info, & ts_read_header, & ts_read_sf_info, & ts_read_sf_values, & ts_read_structure_info, & ts_skip_header, & ts_skip_structure, & ts_unload_Setups, & ts_write_atom_info, & ts_write_header, & ts_write_sf_info, & ts_write_structure_info, & ts_read_footer, & ts_write_footer private :: ts_assert_init, & ts_assert_notinit, & ts_assert_writemode, & ts_assert_readmode, & ts_energy_norm_simple type, public :: TrnSet !-----------------------------------------------------------------! ! init .true., if the training set has been initialized ! ! file name of the corresponding training set file ! ! unit unit number of that file ! ! mode current access mode; 'read', 'write', 'info' ! ! normalized .true., if the input and output values have been ! ! normalized to the interval [-1,1] ('read' mode only)! ! ! ! if (normalized == .true.) ! ! scale energy scaling factor used for the normalization ! ! shift atomic energy shift used for energy normalization ! ! ! ! nTypes number of atomic species in the training set ! ! nAtomsTot total number of atoms in the training set ! ! typeName(i) name of i-th atomic species ! ! nStrucs total number of structures in the training set ! ! --> when open in 'write' mode, not necessarily all ! ! files have yet been parsed ! ! iStruc current file record position (0=before first file) ! !-----------------------------------------------------------------! logical :: init = .false. character(len=PATHLEN) :: file integer :: unit character(len=5) :: mode logical :: normalized double precision :: scale, shift integer :: nTypes integer :: nAtomsTot character(len=TYPELEN), dimension(:), allocatable :: typeName double precision, dimension(:), allocatable :: E_atom integer :: nStrucs integer :: iStruc double precision :: E_min, E_max, E_av end type TrnSet !--------------------------------------------------------------------! ! Basis function values and derivatives may be read and written ! ! either using a basis function setup [type(Setup)] or directly into ! ! double precision arrays of the correct dimensions. ! !--------------------------------------------------------------------! interface ts_read_sf_values module procedure ts_read_sf_values_stp, ts_read_sf_values_direct end interface ts_read_sf_values interface ts_write_sf_info module procedure ts_write_sf_info_stp, ts_write_sf_info_direct end interface ts_write_sf_info contains function new_TrnSet(nTypes, typeName, E_atom, nStrucs, file, scale, & shift) result(ts) implicit none integer, intent(in) :: nTypes character(len=*), dimension(nTypes), intent(in) :: typeName double precision, dimension(nTypes), intent(in) :: E_atom integer, intent(in) :: nStrucs character(len=*), intent(in) :: file double precision, optional, intent(in) :: scale double precision, optional, intent(in) :: shift type(TrnSet) :: ts logical :: fexists inquire(file=trim(adjustl(file)), exist=fexists) if (fexists) then write(0,*) 'Error: file already exists: ', trim(adjustl(file)) stop end if allocate(ts%typeName(nTypes), ts%E_atom(nTypes)) ts%file = trim(adjustl(file)) ts%nTypes = nTypes ts%typeName(1:nTypes) = typeName(1:nTypes) ts%E_atom(1:nTypes) = E_atom(1:nTypes) ts%nStrucs = nStrucs ts%iStruc = 0 if (present(scale) .and. present(shift)) then ts%normalized = .true. ts%scale = scale ts%shift = shift else ts%normalized = .false. ts%scale = 1.0d0 ts%shift = 0.0d0 end if ts%unit = io_unit() open(ts%unit, file=trim(ts%file), status='new', action='write', & form='unformatted') ts%nAtomsTot = 0 ts%mode = 'write' ts%init = .true. call ts_write_header(ts) end function new_TrnSet !--------------------------------------------------------------------! function open_TrnSet(file, maxenergy, readfooter) result(ts) implicit none !------------------------------------------------------------------! ! file path to the training set file ! ! maxenergy maximum atomic energy (do not consider structures ! ! with higher energy) ! ! readfooter skip over all structures to read footer first ! !------------------------------------------------------------------! character(len=*), intent(in) :: file double precision, optional, intent(in) :: maxenergy logical, optional, intent(in) :: readfooter type(TrnSet) :: ts logical :: do_footer call ts_read_header(ts, file) ! To read the footer we have to skip over all structures. ! The flag `readfooter=.false.' can thus save some time, if ! the stats from the footer are not needed. if (present(readfooter)) then do_footer = readfooter else do_footer = .true. end if ts%iStruc = 0 ts%mode = 'read' ts%init = .true. if (.not. ts%normalized) then if (present(maxenergy)) then call ts_normalize(ts, maxenergy) else call ts_normalize(ts, huge(1.0d0)) end if end if if (do_footer) then call ts_read_footer(ts) call rewind_TrnSet(ts) else if (ts%normalized) then ts%E_av = 0.0d0 ts%E_min = -1.0d0 ts%E_max = 1.0d0 end if end function open_TrnSet !--------------------------------------------------------------------! subroutine close_TrnSet(ts, stp, status) implicit none type(TrnSet), intent(inout) :: ts type(Setup), dimension(:), optional, intent(in) :: stp character(len=*), optional, intent(in) :: status if (.not. ts%init) return if (trim(ts%mode) == 'write') then if (present(stp)) then call ts_write_footer(ts, stp=stp) else call ts_write_footer(ts) end if end if if ((trim(ts%mode)=='read') .or. (trim(ts%mode)=='write')) then if (present(status)) then close(ts%unit, status=trim(status)) else close(ts%unit) end if end if deallocate(ts%typeName, ts%E_atom) ts%init = .false. end subroutine close_TrnSet !--------------------------------------------------------------------! subroutine rewind_TrnSet(ts, rec) implicit none type(TrnSet), intent(inout) :: ts integer, optional, intent(in) :: rec integer :: irec call ts_assert_init(ts) call ts_assert_readmode(ts) ! BCM: break the if statement so to evaluate present(rec) only if (present(rec)) then if (rec > 0) then irec = rec - 1 else irec = 0 end if else irec = 0 end if ! BCM: done! if (ts%iStruc > irec) then rewind(ts%unit) ! skip header call ts_skip_header(ts) ! reset structure record pointer ts%iStruc = 0 end if do while(ts%iStruc < irec) ! fast-forward to desired structure call ts_skip_structure(ts) end do end subroutine rewind_TrnSet !--------------------------------------------------------------------! ! only training set info - no actual data ! !--------------------------------------------------------------------! subroutine save_TrnSet_info(ts, file, unit) implicit none type(TrnSet), intent(in) :: ts character(len=*), optional, intent(in) :: file integer, optional, intent(in) :: unit integer :: u call ts_assert_init(ts) call ts_assert_readmode(ts) if (present(unit)) then u = unit else if (present(file)) then u = io_unit() open(u, file=trim(file), status='replace', & form='unformatted', action='write') else write(0,*) "Error: neither unit nor file specified in `save_TrnSet_info()'." stop end if write(u) ts%file write(u) ts%normalized write(u) ts%scale write(u) ts%shift write(u) ts%nTypes write(u) ts%typeName(1:ts%nTypes) write(u) ts%E_atom(1:ts%nTypes) write(u) ts%nAtomsTot write(u) ts%nStrucs write(u) ts%E_min, ts%E_max, ts%E_av if (.not. present(unit)) close(u) end subroutine save_TrnSet_info !--------------------------------------------------------------------! subroutine save_TrnSet_info_ASCII(ts, file, unit) implicit none type(TrnSet), intent(in) :: ts character(len=*), optional, intent(in) :: file integer, optional, intent(in) :: unit integer :: i integer :: u character(len=*), parameter :: dfrmt = '(4(1x,ES24.17))' call ts_assert_init(ts) call ts_assert_readmode(ts) if (present(unit)) then u = unit else if (present(file)) then u = io_unit() open(u, file=trim(file), status='replace', action='write') else write(0,*) "Error: neither unit nor file specified in ", & "`save_TrnSet_info()'." stop end if write(u,'(A)') trim(ts%file) write(u,*) ts%normalized write(u,*) ts%scale write(u,*) ts%shift write(u,*) ts%nTypes write(u,'(A)') (ts%typeName(i), i=1,ts%nTypes) write(u,dfrmt) (ts%E_atom(i), i=1,ts%nTypes) write(u,*) ts%nAtomsTot write(u,*) ts%nStrucs write(u,*) ts%E_min, ts%E_max, ts%E_av if (.not. present(unit)) close(u) end subroutine save_TrnSet_info_ASCII !--------------------------------------------------------------------! function load_TrnSet_info(file, unit) result(ts) implicit none character(len=*), optional, intent(in) :: file integer, optional, intent(in) :: unit type(TrnSet) :: ts integer :: u if (present(unit)) then u = unit else if (present(file)) then u = io_unit() open(u, file=trim(file), status='old', & form='unformatted', action='read') else write(0,*) "Error: neither unit nor file specified in ", & "`load_TrnSet_info()'." stop end if read(u) ts%file read(u) ts%normalized read(u) ts%scale read(u) ts%shift read(u) ts%nTypes allocate(ts%typeName(ts%nTypes), ts%E_atom(ts%nTypes)) read(u) ts%typeName(1:ts%nTypes) read(u) ts%E_atom(1:ts%nTypes) read(u) ts%nAtomsTot read(u) ts%nStrucs read(u) ts%E_min, ts%E_max, ts%E_av ts%iStruc = ts%nStrucs ts%init = .true. ts%mode = 'info' if (.not. present(unit)) close(u) end function load_TrnSet_info !--------------------------------------------------------------------! function load_TrnSet_info_ASCII(file, unit) result(ts) implicit none character(len=*), optional, intent(in) :: file integer, optional, intent(in) :: unit type(TrnSet) :: ts integer :: i integer :: u character(len=*), parameter :: dfrmt = '(4(1x,ES24.17))' if (present(unit)) then u = unit else if (present(file)) then u = io_unit() open(u, file=trim(file), status='old', action='read') else write(0,*) "Error: neither unit nor file specified in `load_TrnSet_info()'." stop end if read(u,'(A)') ts%file read(u,*) ts%normalized read(u,*) ts%scale read(u,*) ts%shift read(u,*) ts%nTypes allocate(ts%typeName(ts%nTypes), ts%E_atom(ts%nTypes)) read(u,'(A)') (ts%typeName(i), i=1,ts%nTypes) read(u,dfrmt) (ts%E_atom(i), i=1,ts%nTypes) read(u,*) ts%nAtomsTot read(u,*) ts%nStrucs read(u,*) ts%E_min, ts%E_max, ts%E_av ts%iStruc = ts%nStrucs ts%init = .true. ts%mode = 'info' if (.not. present(unit)) close(u) end function load_TrnSet_info_ASCII !--------------------------------------------------------------------! function new_TrnSet_info(nTypes) result(ts) implicit none integer, intent(in) :: nTypes type(TrnSet) :: ts allocate(ts%typeName(nTypes), ts%E_atom(nTypes)) ts%nTypes = nTypes ts%normalized = .false. ts%scale = 1.0d0 ts%shift = 1.0d0 ts%file = '' ts%unit = -1 ts%nAtomsTot = 0 ts%nStrucs = 0 ts%iStruc = 0 ts%init = .true. ts%mode = 'info' end function new_TrnSet_info !--------------------------------------------------------------------! ! save entire training set to ASCII file ! !--------------------------------------------------------------------! subroutine save_TrnSet_ASCII(ts, file, unit) implicit none type(TrnSet), intent(inout) :: ts character(len=*), optional, intent(in) :: file integer, optional, intent(in) :: unit integer :: istruc integer :: iatom, nAtoms integer :: itype, nTypes character(len=PATHLEN) :: filename double precision :: E_coh double precision, dimension(3) :: forCart, cooCart integer :: nsf, nsf_max double precision, dimension(:), allocatable :: sfval integer :: i integer :: u character(len=*), parameter :: dfrmt = '(4(1x,ES24.17))' call ts_assert_init(ts) call ts_assert_readmode(ts) if (present(unit)) then u = unit else if (present(file)) then u = io_unit() open(u, file=trim(file), status='replace', action='write') else write(0,*) "Error: neither unit nor file specified in `save_TrnSet()'." stop end if call save_TrnSet_info_ASCII(ts, unit=u) call rewind_TrnSet(ts) nsf_max = 100 allocate(sfval(nsf_max)) do istruc = 1, ts%nStrucs call ts_read_structure_info(ts, filename, nAtoms, nTypes, E_coh) write(u,'(A)') trim(filename) write(u,*) nAtoms, nTypes write(u,*) E_coh do iatom = 1, nAtoms call ts_read_atom_info(ts, itype, cooCart, forCart) write(u,*) itype write(u,*) cooCart write(u,*) forCart call ts_read_sf_info(ts, nsf) write(u,*) nsf if (nsf > nsf_max) then nsf_max = nsf deallocate(sfval) allocate(sfval(nsf_max)) end if call ts_read_sf_values(ts, nsf, sfval(1:nsf)) write(u,dfrmt) (sfval(i), i=1,nsf) end do end do deallocate(sfval) if (.not. present(unit)) close(u) end subroutine save_TrnSet_ASCII !--------------------------------------------------------------------! function load_TrnSet_ASCII(trnset_file, file, unit) result(ts) implicit none !------------------------------------------------------------------! ! This will not load the training set into memory, but will ! ! transcribe the ASCII format to the binary format. A new trnset ! ! file will be created at path 'trnset_file'. ! !------------------------------------------------------------------! character(len=*), intent(in) :: trnset_file character(len=*), optional, intent(in) :: file integer, optional, intent(in) :: unit type(TrnSet) :: ts1, ts integer :: istruc integer :: iatom, nAtoms integer :: itype, nTypes character(len=PATHLEN) :: filename double precision :: energy double precision, dimension(3) :: forCart, cooCart integer :: nsf, nsf_max double precision, dimension(:), allocatable :: sfval integer :: i integer :: u logical :: fexists character(len=*), parameter :: dfrmt = '(4(1x,ES24.17))' inquire(file=trim(trnset_file), exist=fexists) if (fexists) then write(0,*) 'Error: Will not overwrite file: ', trim(trnset_file) stop end if if (present(unit)) then u = unit else if (present(file)) then u = io_unit() open(u, file=trim(file), status='old', action='read') else write(0,*) "Error: neither unit nor file specified in `load_TrnSet()'." stop end if ts1 = load_TrnSet_info(unit=u) ts = new_TrnSet(ts1%nTypes, ts1%typeName, ts1%E_atom, ts1%nStrucs, & trnset_file, ts1%scale, ts1%shift) call close_TrnSet(ts1) nsf_max = 100 allocate(sfval(nsf_max)) do istruc = 1, ts%nStrucs read(u,'(A)') filename read(u,*) nAtoms, nTypes read(u,*) energy call ts_write_structure_info(ts, filename, nAtoms, nTypes, energy) do iatom = 1, nAtoms read(u,*) itype read(u,*) cooCart read(u,*) forCart call ts_write_atom_info(ts, itype, cooCart, forCart) read(u,*) nsf if (nsf > nsf_max) then nsf_max = nsf deallocate(sfval) allocate(sfval(nsf_max)) end if read(u,dfrmt) (sfval(i), i=1,nsf) call ts_write_sf_info_direct(ts, nsf, sfval(1:nsf)) end do end do deallocate(sfval) if (.not. present(unit)) close(u) end function load_TrnSet_ASCII !--------------------------------------------------------------------! ! info about the training set ! !--------------------------------------------------------------------! subroutine ts_print_info(ts) implicit none type(TrnSet), intent(inout) :: ts integer :: itype call ts_assert_init(ts) call aeio_header("Training set info.") write(*,*) write(*,*) 'Training set file : ', trim(adjustl(ts%file)) write(*,*) 'Number of structures in the data set: ', trim(io_adjustl(ts%nStrucs)) if (ts%iStruc /= ts%nStrucs) then if (trim(ts%mode) == 'write') then write(*,*) ' Structures included so far : ', trim(io_adjustl(ts%iStruc)) else write(*,*) ' Structures read so far : ', trim(io_adjustl(ts%iStruc)) end if end if write(*,*) write(*,*) 'Atomic species in training set : ', trim(io_adjustl(ts%nTypes)) write(*,'(1x," Species :")', advance='no') do itype = 1, ts%nTypes write(*,'(1x,A)', advance='no') trim(ts%typeName(itype)) end do write(*,*) write(*,*) if (ts%normalized .or. (ts%iStruc == ts%nStrucs)) then write(*,*) 'Average energy (eV/atom) : ', trim(io_adjustl(ts%E_av,6)) write(*,*) 'Minimum energy (eV/atom) : ', trim(io_adjustl(ts%E_min,6)) write(*,*) 'Maximum energy (eV/atom) : ', trim(io_adjustl(ts%E_max,6)) write(*,*) end if if (ts%normalized) then write(*,*) 'The input and output values have been normalized to [-1.0, 1.0].' write(*,*) 'Structures outside of this interval will not be used for training.' write(*,*) ' Energy scaling factor: ', trim(io_adjustl(ts%scale,6)) write(*,*) ' Atomic energy shift : ', trim(io_adjustl(ts%shift,6)) else write(*,*) 'The input and output values have not yet been normalized.' end if write(*,*) end subroutine ts_print_info !--------------------------------------------------------------------! ! training set file header ! !--------------------------------------------------------------------! subroutine ts_write_header(ts) implicit none type(TrnSet), intent(inout) :: ts call ts_assert_init(ts) call ts_assert_writemode(ts) write(ts%unit) ts%nTypes write(ts%unit) ts%nStrucs write(ts%unit) ts%typeName(:) write(ts%unit) ts%E_atom(:) write(ts%unit) ts%normalized write(ts%unit) ts%scale write(ts%unit) ts%shift end subroutine ts_write_header !--------------------------------------------------------------------! subroutine ts_read_header(ts, file) implicit none type(TrnSet), intent(inout) :: ts character(len=*), intent(in) :: file character(len=1024) :: file2 logical :: fexists call ts_assert_notinit(ts) ! check, if normalized training set file exists: file2 = trim(adjustl(file)) // '.scaled' inquire(file=trim(file2), exist=fexists) if (fexists) then write(*,*) 'Loading scaled training set file: ' // trim(file2) write(*,*) else file2 = trim(adjustl(file)) inquire(file=trim(file2), exist=fexists) if (.not. fexists) then write(0,*) 'Error: file not found: ', trim(adjustl(file)) stop end if end if ts%file = trim(adjustl(file2)) ts%unit = io_unit() open(ts%unit, file=trim(ts%file), status='old', action='read', & form='unformatted') read(ts%unit) ts%nTypes read(ts%unit) ts%nStrucs allocate(ts%typeName(ts%nTypes), ts%E_atom(ts%nTypes)) read(ts%unit) ts%typeName(:) read(ts%unit) ts%E_atom(:) read(ts%unit) ts%normalized read(ts%unit) ts%scale read(ts%unit) ts%shift end subroutine ts_read_header !--------------------------------------------------------------------! subroutine ts_skip_header(ts) implicit none type(TrnSet), intent(inout) :: ts call ts_assert_init(ts) call ts_assert_readmode(ts) read(ts%unit) ! ts%nTypes read(ts%unit) ! ts%nStrucs read(ts%unit) ! ts%typeName(:) read(ts%unit) ! ts%E_atom(:) read(ts%unit) ! ts%normalized read(ts%unit) ! ts%scale read(ts%unit) ! ts%shift end subroutine ts_skip_header !--------------------------------------------------------------------! ! training set file footer containing statistics ! !--------------------------------------------------------------------! subroutine ts_write_footer(ts, stp) implicit none type(TrnSet), intent(inout) :: ts type(Setup), dimension(:), optional, intent(in) :: stp integer :: itype, nTypes logical :: has_setups call ts_assert_init(ts) call ts_assert_writemode(ts) if (ts%iStruc < ts%nStrucs) then write(0,*) "Warning: writing footer to incomplete training set file." end if write(ts%unit) ts%nAtomsTot write(ts%unit) ts%E_av, ts%E_min, ts%E_max if (present(stp)) then nTypes = size(stp(:)) if (nTypes /= ts%nTypes) then write(0,*) "Error: wrong size of array stp in `ts_read_footer()'." stop end if has_setups = .true. write(ts%unit) has_setups do itype = 1, ts%nTypes write(ts%unit) itype call save_Setup(stp(itype), unit=ts%unit) end do else has_setups = .false. write(ts%unit) has_setups end if end subroutine ts_write_footer !--------------------------------------------------------------------! subroutine ts_read_footer(ts, stp) implicit none type(TrnSet), intent(inout) :: ts type(Setup), dimension(:), optional, intent(out) :: stp integer :: i, itype, nTypes logical :: has_setups call ts_assert_init(ts) call ts_assert_readmode(ts) do while(ts%iStruc < ts%nStrucs) call ts_skip_structure(ts) end do read(ts%unit) ts%nAtomsTot read(ts%unit) ts%E_av, ts%E_min, ts%E_max read(ts%unit) has_setups if (present(stp) .and. has_setups) then nTypes = size(stp(:)) if (nTypes /= ts%nTypes) then write(0,*) "Error: wrong size of array stp in `ts_read_footer()'." stop end if do i = 1, nTypes read(ts%unit) itype stp(itype) = load_Setup(ts%typeName, unit=ts%unit) end do else if (present(stp) .and. (.not. has_setups)) then write(0,*) "Error: no structural fingerprint basis setups in training set file!" stop end if end subroutine ts_read_footer !--------------------------------------------------------------------! ! data from structures in the training set ! !--------------------------------------------------------------------! subroutine ts_write_structure_info(ts, filename, nAtoms, nTypes, energy) implicit none type(TrnSet), intent(inout) :: ts character(len=*), intent(in) :: filename integer, intent(in) :: nAtoms integer, intent(in) :: nTypes double precision, intent(in) :: energy double precision :: E_atom call ts_assert_init(ts) call ts_assert_writemode(ts) if (ts%iStruc >= ts%nStrucs) then write(0,*) "Error: too many files for training set." stop else ts%iStruc = ts%iStruc + 1 end if write(ts%unit) len_trim(filename) write(ts%unit) trim(filename) write(ts%unit) nAtoms, nTypes write(ts%unit) energy ! energy stats E_atom = energy/dble(nAtoms) if (ts%iStruc > 1) then ts%E_min = min(ts%E_min, E_atom) ts%E_max = max(ts%E_max, E_atom) ts%E_av = ts%E_av + E_atom/dble(ts%nStrucs) else ts%E_min = E_atom ts%E_max = E_atom ts%E_av = E_atom/dble(ts%nStrucs) end if ! keep track of the atoms in the training set ts%nAtomsTot = ts%nAtomsTot + nAtoms end subroutine ts_write_structure_info !--------------------------------------------------------------------! subroutine ts_read_structure_info(ts, filename, nAtoms, nTypes, energy) implicit none type(TrnSet), intent(inout) :: ts character(len=*), intent(out) :: filename integer, intent(out) :: nAtoms integer, intent(out) :: nTypes double precision, intent(out) :: energy integer :: l call ts_assert_init(ts) call ts_assert_readmode(ts) if (ts%iStruc >= ts%nStrucs) then write(0,*) "Error: no more file record to read." stop else ts%iStruc = ts%iStruc + 1 end if read(ts%unit) l filename = ' ' l = min(l,len(filename)) read(ts%unit) filename(1:l) read(ts%unit) nAtoms, nTypes read(ts%unit) energy end subroutine ts_read_structure_info !--------------------------------------------------------------------! subroutine ts_write_atom_info(ts, itype, cooCart, forCart) implicit none type(TrnSet), intent(inout) :: ts integer, intent(in) :: itype double precision, dimension(3), intent(in) :: cooCart double precision, dimension(3), intent(in) :: forCart call ts_assert_init(ts) call ts_assert_writemode(ts) write(ts%unit) itype write(ts%unit) cooCart(1:3) write(ts%unit) forCart(1:3) end subroutine ts_write_atom_info !--------------------------------------------------------------------! subroutine ts_read_atom_info(ts, itype, cooCart, forCart) implicit none type(TrnSet), intent(inout) :: ts integer, intent(out) :: itype double precision, dimension(3), intent(out) :: cooCart double precision, dimension(3), intent(out) :: forCart call ts_assert_init(ts) call ts_assert_readmode(ts) read(ts%unit) itype read(ts%unit) cooCart(1:3) read(ts%unit) forCart(1:3) end subroutine ts_read_atom_info !--------------------------------------------------------------------! subroutine ts_write_sf_info_stp(ts, stp) implicit none type(TrnSet), intent(inout) :: ts type(Setup), intent(in) :: stp call ts_assert_init(ts) call ts_assert_writemode(ts) write(ts%unit) stp%nsf write(ts%unit) sfval(1:stp%nsf) !$$ write(ts%unit) sfderiv_i(1:3,1:stp%nsf) end subroutine ts_write_sf_info_stp !--------------------------------------------------------------------! subroutine ts_write_sf_info_direct(ts, nsf, sfval) !$$ subroutine ts_write_sf_info_direct(ts, nsf, sfval, sfderiv_i) implicit none type(TrnSet), intent(inout) :: ts integer, intent(in) :: nsf double precision, dimension(nsf), intent(in) :: sfval !$$ double precision, dimension(3,nsf), intent(in) :: sfderiv_i call ts_assert_init(ts) call ts_assert_writemode(ts) write(ts%unit) nsf write(ts%unit) sfval(1:nsf) !$$ write(ts%unit) sfderiv_i(1:3,1:nsf) end subroutine ts_write_sf_info_direct !--------------------------------------------------------------------! subroutine ts_read_sf_info(ts, nsf) implicit none type(TrnSet), intent(inout) :: ts integer, intent(out) :: nsf call ts_assert_init(ts) call ts_assert_readmode(ts) read(ts%unit) nsf end subroutine ts_read_sf_info !--------------------------------------------------------------------! subroutine ts_read_sf_values_stp(ts, stp) implicit none type(TrnSet), intent(inout) :: ts type(Setup), intent(inout) :: stp call ts_assert_init(ts) call ts_assert_readmode(ts) call stp_assert_moduleinit() read(ts%unit) sfval(1:stp%nsf) !$$ read(ts%unit) sfderiv_i(1:3,1:stp%nsf) end subroutine ts_read_sf_values_stp !--------------------------------------------------------------------! subroutine ts_read_sf_values_direct(ts, nsf, sfval) !$$ subroutine ts_read_sf_values_direct(ts, nsf, sfval, sfderiv_i) implicit none type(TrnSet), intent(inout) :: ts integer, intent(in) :: nsf double precision, dimension(nsf), intent(out) :: sfval !$$ double precision, dimension(3,nsf), intent(out) :: sfderiv_i call ts_assert_init(ts) call ts_assert_readmode(ts) read(ts%unit) sfval(1:nsf) !$$ read(ts%unit) sfderiv_i(1:3,1:nsf) end subroutine ts_read_sf_values_direct !--------------------------------------------------------------------! subroutine ts_skip_structure(ts) implicit none type(TrnSet), intent(inout) :: ts character(len=1024) :: filename integer :: nAtoms integer :: nTypes double precision :: energy ! sanity checks & increment of iFile done by read_structure_info call ts_read_structure_info(ts, filename, nAtoms, nTypes, energy) ! skip atomic data call ts_skip_atoms(ts, nAtoms) end subroutine ts_skip_structure !--------------------------------------------------------------------! subroutine ts_skip_atoms(ts, nAtoms) implicit none type(TrnSet), intent(inout) :: ts integer, intent(in) :: nAtoms integer :: iatom do iatom = 1, nAtoms read(ts%unit) ! itype read(ts%unit) ! cooCart(1:3) read(ts%unit) ! forCart(1:3) read(ts%unit) ! stp%nsf read(ts%unit) ! stp%sfval(1:stp%nsf) !$$ read(ts%unit) ! stp%sfderiv_i(1:3,1:stp%nsf) end do end subroutine ts_skip_atoms !--------------------------------------------------------------------! ! save/restore basis function set-ups ! !--------------------------------------------------------------------! subroutine ts_load_Setups(ts, stp) implicit none type(TrnSet), intent(inout) :: ts type(Setup), dimension(:), intent(out) :: stp call ts_read_footer(ts, stp=stp) end subroutine ts_load_Setups !--------------------------------------------------------------------! subroutine ts_unload_Setups(ts, stp) implicit none type(TrnSet), intent(inout) :: ts type(Setup), dimension(:), intent(inout) :: stp integer :: itype call ts_assert_init(ts) do itype = 1, ts%nTypes if (stp(itype)%init) call del_Setup(stp(itype)) end do end subroutine ts_unload_Setups !--------------------------------------------------------------------! ! data set normalization ! !--------------------------------------------------------------------! subroutine ts_normalize(ts, maxenergy) implicit none type(TrnSet), intent(inout) :: ts double precision, intent(in) :: maxenergy type(TrnSet) :: ts2 character(len=1024) :: file2 type(Setup), dimension(:), allocatable :: stp double precision :: scale, shift character(len=1024) :: filename integer :: iatom, nAtoms integer :: itype, nTypes integer :: nStrucs2 integer :: nsf double precision :: energy double precision, dimension(3) :: cooCart, forCart call ts_assert_init(ts) call ts_assert_readmode(ts) ! file name for normalized training set file2 = trim(ts%file) // ".scaled" call aeio_header('Training set normalization') write(*,*) write(*,*) 'The training set will be normalized now. Depending on its size this' write(*,*) 'process can take a while. The normalized data set will be written to' write(*,*) 'another file. Load that file in future to avoid this step.' write(*,*) write(*,*) 'Name of the new training set file: ' // trim(file2) write(*,*) ! read energy stats and structural fingerprint basis setups allocate(stp(ts%nTypes)) call ts_read_footer(ts, stp=stp) call rewind_TrnSet(ts) ! initialize the setup module to allocate needed memory ! however, we set nnb_max=1, as more is not necessary call stp_init(ts%nTypes, stp, 1) ! Further normalization methods could be implemented here call ts_energy_norm_simple(ts, maxenergy, scale, shift) write(*,*) 'The network output energy will be normalized to the interval [-1,1].' write(*,*) ' Energy scaling factor: f = ' // trim(io_adjustl(scale,6)) write(*,*) ' Atomic energy shift : s = ' // trim(io_adjustl(shift,6)) write(*,*) ! If maxenergy is lower than some structure in the initial training ! set, these structures will not be included. Thus, we need to count ! all included structures. if (maxenergy < ts%E_max) then nStrucs2 = 0 do while(ts%iStruc < ts%nStrucs) call ts_read_structure_info(ts, filename, nAtoms, nTypes, energy) energy = scale*(energy - dble(nAtoms)*shift) if (energy/dble(nAtoms) <= 1.001d0) nStrucs2 = nStrucs2 + 1 call ts_skip_atoms(ts, nAtoms) end do call rewind_TrnSet(ts) if (nStrucs2 <= ts%nStrucs) then write(*,*) trim(io_adjustl(ts%nStrucs-nStrucs2)) // & ' high-energy structures will be removed from the scaled training set.' write(*,*) end if else nStrucs2 = ts%nStrucs end if ! new normalized (and possibly smaller) training set ts2 = new_TrnSet(ts%nTypes, ts%typeName, ts%E_atom, nStrucs2, & trim(file2), scale=scale, shift=shift) do while(ts%iStruc < ts%nStrucs) call ts_read_structure_info(ts, filename, nAtoms, nTypes, energy) energy = scale*(energy - dble(nAtoms)*shift) if (energy/dble(nAtoms) <= 1.001d0) then call ts_write_structure_info(ts2, filename, nAtoms, nTypes, energy) do iatom = 1, nAtoms call ts_read_atom_info(ts, itype, cooCart, forCart) ! scale force to match energy forCart(1:3) = scale*forCart(1:3) call ts_write_atom_info(ts2, itype, cooCart, forCart) call ts_read_sf_info(ts, nsf) call ts_read_sf_values(ts, stp(itype)) call stp_normalize(stp(itype)) call ts_write_sf_info(ts2, stp(itype)) end do else ! energy too high, do not include this structure call ts_skip_atoms(ts, nAtoms) end if end do call stp_final(ts%nTypes, stp) call close_TrnSet(ts2, stp=stp) call ts_unload_Setups(ts,stp) deallocate(stp) call close_TrnSet(ts) ts = open_TrnSet(trim(file2)) write(*,*) 'Training set normalization done.' write(*,*) end subroutine ts_normalize !--------------------------------------------------------------------! subroutine ts_energy_norm_simple(ts, maxenergy, scale, shift) implicit none type(TrnSet), intent(inout) :: ts double precision, intent(in) :: maxenergy double precision, intent(out) :: scale double precision, intent(out) :: shift double precision :: E_max call ts_assert_init(ts) call ts_assert_readmode(ts) E_max = min(ts%E_max, maxenergy) ! scale = 1.0d0/(max(abs(ts%E_max), abs(ts%E_min))) scale = 2.0d0/(E_max - ts%E_min) shift = 0.5d0*(E_max + ts%E_min) end subroutine ts_energy_norm_simple !--------------------------------------------------------------------! ! count atoms in train and test set ! !--------------------------------------------------------------------! subroutine ts_count_atoms(ts, isTest, nAtomsTrain, nAtomsTest) implicit none type(TrnSet), intent(inout) :: ts logical, dimension(:), intent(in) :: isTest integer, intent(out) :: nAtomsTrain integer, intent(out) :: nAtomsTest character(len=1024) :: filename integer :: iatom, nAtoms integer :: nTypes double precision :: energy call ts_assert_init(ts) call ts_assert_readmode(ts) if (size(isTest) < ts%nStrucs) then write(0,*) "Error: vector `isTest' too short in `ts_count_atoms()'." stop end if if (ts%iStruc > 0) call rewind_TrnSet(ts) nAtomsTrain = 0 nAtomsTest = 0 do while(ts%iStruc < ts%nStrucs) call ts_read_structure_info(ts, filename, nAtoms, nTypes, energy) do iatom = 1, nAtoms read(ts%unit) ! itype read(ts%unit) ! cooCart(1:3) read(ts%unit) ! forCart(1:3) read(ts%unit) ! stp%nsf read(ts%unit) ! stp%sfval(1:stp%nsf) !$$ read(ts%unit) ! stp%sfderiv_i(1:3,1:stp%nsf) end do if (isTest(ts%iStruc)) then nAtomsTest = nAtomsTest + nAtoms else nAtomsTrain = nAtomsTrain + nAtoms end if end do end subroutine ts_count_atoms !--------------------------------------------------------------------! ! state checks ! !--------------------------------------------------------------------! subroutine ts_assert_init(ts) implicit none type(TrnSet), intent(in) :: ts if (.not. ts%init) then write(0,*) "Error: training set not initialized." stop end if end subroutine ts_assert_init subroutine ts_assert_notinit(ts) implicit none type(TrnSet), intent(in) :: ts if (ts%init) then write(0,*) "Error: training set already initialized." stop end if end subroutine ts_assert_notinit subroutine ts_assert_writemode(ts) implicit none type(TrnSet), intent(in) :: ts if (trim(ts%mode) /= 'write') then write(0,*) "Error: training set not in 'write' mode." stop end if end subroutine ts_assert_writemode subroutine ts_assert_readmode(ts) implicit none type(TrnSet), intent(in) :: ts if (trim(ts%mode) /= 'read') then write(0,*) "Error: training set not in 'read' mode." stop end if end subroutine ts_assert_readmode end module trainset