Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion dpnegf/entrypoints/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,10 @@ def run(
in_common_options = {}
if jdata.get("device", None):
in_common_options.update({"device": jdata["device"]})

# Otherwise build on the default device (CPU). The NEGF pipeline runs
# hamiltonian init and Fermi-level calc on CPU; only RGF runs on
# RGF_device, and DeviceProperty handles CPU→RGF_device transfer.

if jdata.get("dtype", None):
in_common_options.update({"dtype": jdata["dtype"]})

Expand Down
11 changes: 6 additions & 5 deletions dpnegf/negf/density.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ def integrate(self, deviceprop, kpoint, eta_lead=1e-5, eta_device=0.,Vbias=None,
deviceprop.lead_L.self_energy(kpoint=kpoint, energy=e, eta_lead=eta_lead)
deviceprop.lead_R.self_energy(kpoint=kpoint, energy=e, eta_lead=eta_lead)
deviceprop.cal_green_function(energy=e, kpoint=kpoint, block_tridiagonal=block_tridiagonal, eta_device=eta_device)
gr_gamma_ga = torch.mm(torch.mm(deviceprop.grd[0], deviceprop.lead_R.gamma), deviceprop.grd[0].conj().T).real
gr_gamma_ga = torch.mm(torch.mm(deviceprop.grd[0], deviceprop.lead_R.gamma.to(deviceprop.grd[0].device)), deviceprop.grd[0].conj().T).real
gr_gamma_ga = gr_gamma_ga * (deviceprop.lead_R.fermi_dirac(e+deviceprop.mu) - deviceprop.lead_L.fermi_dirac(e+deviceprop.mu))
DM_neq = DM_neq + wlg[i] * gr_gamma_ga
else:
Expand Down Expand Up @@ -292,10 +292,11 @@ def density_integrate_Fiori(self,e_grid,kpoint,Vbias,block_tridiagonal,subblocks
x0 = min(lx, tx)
x1 = min(rx, ty)

gammaL = torch.zeros(size=(tx, tx), dtype=torch.complex128)
gammaL[:x0, :x0] += deviceprop.lead_L.gamma[:x0, :x0]
gammaR = torch.zeros(size=(ty, ty), dtype=torch.complex128)
gammaR[-x1:, -x1:] += deviceprop.lead_R.gamma[-x1:, -x1:]
gf_device = deviceprop.g_trans.device
gammaL = torch.zeros(size=(tx, tx), dtype=torch.complex128, device=gf_device)
gammaL[:x0, :x0] += deviceprop.lead_L.gamma[:x0, :x0].to(gf_device)
gammaR = torch.zeros(size=(ty, ty), dtype=torch.complex128, device=gf_device)
gammaR[-x1:, -x1:] += deviceprop.lead_R.gamma[-x1:, -x1:].to(gf_device)

if not block_tridiagonal:
A_Rd = [torch.mm(torch.mm(deviceprop.grd[i],gammaR),deviceprop.grd[i].conj().T) for i in range(len(deviceprop.grd))]
Expand Down
55 changes: 38 additions & 17 deletions dpnegf/negf/device_property.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
import torch
import os
from typing import Union
from dpnegf.negf.negf_utils import update_kmap, update_temp_file,gauss_xw, leggauss
from dpnegf.negf.density import Ozaki
from dpnegf.utils.constants import Boltzmann, eV2J,pi
Expand All @@ -24,7 +25,7 @@ def _build_s_in_batched(hd, seinL, seinR, idx0, idy0, idx1, idy1):
here are the already-scaled 1j*(seL - seL.mH) * f tensors of shape [B,n,n]).
'''
B = seinL.shape[0]
s_in = [torch.zeros((B,) + tuple(blk.shape), dtype=torch.complex128) for blk in hd]
s_in = [torch.zeros((B,) + tuple(blk.shape), dtype=torch.complex128, device=blk.device) for blk in hd]
s_in[0][:, :idx0, :idy0] = s_in[0][:, :idx0, :idy0] + seinL[:, :idx0, :idy0]
s_in[-1][:, -idx1:, -idy1:] = s_in[-1][:, -idx1:, -idy1:] + seinR[:, -idx1:, -idy1:]
return s_in
Expand Down Expand Up @@ -90,14 +91,17 @@ class DeviceProperty(object):
calculate density matrix

'''
def __init__(self, hamiltonian, structure, results_path, e_T=300,
efermi: dict=None, chemiPot: dict=None, E_ref: float=None ) -> None:
def __init__(self, hamiltonian, structure, results_path, e_T=300,
efermi: dict=None, chemiPot: dict=None, E_ref: float=None,
rgf_device: Union[str, torch.device]="cpu") -> None:
self.greenfuncs = 0
self.hamiltonian = hamiltonian
self.structure = structure # ase Atoms
self.results_path = results_path
self.cdtype = torch.complex128
self.device = "cpu"
if isinstance(rgf_device, str):
rgf_device = torch.device(rgf_device)
self.rgf_device = rgf_device
self.kBT = Boltzmann * e_T / eV2J
self.e_T = e_T
# self.efermi = efermi
Expand Down Expand Up @@ -164,7 +168,7 @@ def cal_green_function(self, energy, kpoint, eta_device=0., block_tridiagonal=Tr
A boolean parameter that indicates whether the last column blocks of the retarded Green's function are needed.
'''
assert len(np.array(kpoint).reshape(-1)) == 3
energy = torch.as_tensor(energy, dtype=torch.complex128)
energy = torch.as_tensor(energy, dtype=torch.complex128, device=self.rgf_device)
if energy.ndim == 0:
energy = energy.reshape(1)
assert energy.ndim == 1, f"energy must be 0-d, scalar, or 1-D [B]; got shape {tuple(energy.shape)}"
Expand Down Expand Up @@ -197,8 +201,9 @@ def cal_green_function(self, energy, kpoint, eta_device=0., block_tridiagonal=Tr
self.V = torch.tensor(0.)
else:
self.V = Vbias

assert torch.is_tensor(self.V)
self.V = self.V.to(self.rgf_device)
if not self.oldV is None:
if torch.abs(self.V - self.oldV).sum() > 1e-5:
self.newV_flag = True
Expand All @@ -207,17 +212,25 @@ def cal_green_function(self, energy, kpoint, eta_device=0., block_tridiagonal=Tr
else:
self.newV_flag = True # for the first time to run cal_green_function in Poisson-NEGF SCF

if (not (hasattr(self, "hd") and hasattr(self, "sd"))) or (self.newK_flag or self.newV_flag):
if (not (hasattr(self, "hd") and hasattr(self, "sd"))) or (self.newK_flag or self.newV_flag):
self.hd, self.sd, self.hl, self.su, self.sl, self.hu = self.hamiltonian.get_hs_device(self.kpoint, self.V, block_tridiagonal)
# TODO: if all blocks transferred to GPU, OOM may happen for large systems.
# Optimization should be implemented here.
self.hd = [b.to(self.rgf_device) for b in self.hd]
self.sd = [b.to(self.rgf_device) for b in self.sd]
self.hl = [b.to(self.rgf_device) for b in self.hl]
self.su = [b.to(self.rgf_device) for b in self.su]
self.sl = [b.to(self.rgf_device) for b in self.sl]
self.hu = [b.to(self.rgf_device) for b in self.hu]


tags = ["g_trans","gr_lc", \
"grd", "grl", "gru", "gr_left", \
"gnd", "gnl", "gnu", "gin_left", \
"gpd", "gpl", "gpu", "gip_left"]

seL = self.lead_L.se
seR = self.lead_R.se
seL = self.lead_L.se.to(self.rgf_device)
seR = self.lead_R.se.to(self.rgf_device)
if batched_mode:
assert seL.ndim == 3 and seR.ndim == 3, f"In batched mode, the self-energy should have shape [B,n,n], but got {seL.shape} and {seR.shape}"
else:
Expand Down Expand Up @@ -254,7 +267,7 @@ def cal_green_function(self, energy, kpoint, eta_device=0., block_tridiagonal=Tr
else:
seinL = 1j*(seL-seL.conj().T) * self.lead_L.fermi_dirac(energy+self.E_ref).reshape(-1)
seinR = 1j*(seR-seR.conj().T) * self.lead_R.fermi_dirac(energy+self.E_ref).reshape(-1)
s_in = [torch.zeros(i.shape).cdouble() for i in self.hd]
s_in = [torch.zeros(i.shape, dtype=torch.complex128, device=self.rgf_device) for i in self.hd]
s_in[0][:idx0,:idy0] = s_in[0][:idx0,:idy0] + seinL[:idx0,:idy0]
s_in[-1][-idx1:,-idy1:] = s_in[-1][-idx1:,-idy1:] + seinR[-idx1:,-idy1:]
else:
Expand Down Expand Up @@ -363,17 +376,17 @@ def _cal_tc_(self):
g_trans = self.g_trans
batched = g_trans.ndim == 3
tx, ty = g_trans.shape[-2], g_trans.shape[-1]
gammaL_full = self.lead_L.gamma
gammaR_full = self.lead_R.gamma
gammaL_full = self.lead_L.gamma.to(self.rgf_device)
gammaR_full = self.lead_R.gamma.to(self.rgf_device)
lx = gammaL_full.shape[-2]
rx = gammaR_full.shape[-2]
x0 = min(lx, tx)
x1 = min(rx, ty)

gL_shape = (g_trans.shape[0], tx, tx) if batched else (tx, tx)
gR_shape = (g_trans.shape[0], ty, ty) if batched else (ty, ty)
gammaL = torch.zeros(size=gL_shape, dtype=self.cdtype, device=self.device)
gammaR = torch.zeros(size=gR_shape, dtype=self.cdtype, device=self.device)
gammaL = torch.zeros(size=gL_shape, dtype=self.cdtype, device=self.rgf_device)
gammaR = torch.zeros(size=gR_shape, dtype=self.cdtype, device=self.rgf_device)
if batched:
gammaL[:, :x0, :x0] = gammaL[:, :x0, :x0] + gammaL_full[:, :x0, :x0]
gammaR[:, -x1:, -x1:] = gammaR[:, -x1:, -x1:] + gammaR_full[:, -x1:, -x1:]
Expand All @@ -394,9 +407,16 @@ def _cal_dos_(self):
'''
dos = 0
#TODO: transfer cal_dos to static method for any k and energy
if (not(hasattr(self, "hd") and hasattr(self, "sd"))) or (self.newK_flag or self.newV_flag):
if (not(hasattr(self, "hd") and hasattr(self, "sd"))) or (self.newK_flag or self.newV_flag):
self.hd, self.sd, self.hl, self.su, self.sl, self.hu = \
self.hamiltonian.get_hs_device(self.kpoint, self.V, self.block_tridiagonal)
# defensive .to(self.device) in case the blocks came back from a cached/legacy path on CPU.
self.hd = [b.to(self.rgf_device) for b in self.hd]
self.sd = [b.to(self.rgf_device) for b in self.sd]
self.hl = [b.to(self.rgf_device) for b in self.hl]
self.su = [b.to(self.rgf_device) for b in self.su]
self.sl = [b.to(self.rgf_device) for b in self.sl]
self.hu = [b.to(self.rgf_device) for b in self.hu]

for jj in range(len(self.grd)):
if not self.block_tridiagonal or len(self.gru) == 0:
Expand Down Expand Up @@ -460,8 +480,9 @@ def _cal_local_current_(self):
# check the energy grid satisfied the requirement

na = len(self.norbs_per_atom)
local_current = torch.zeros(na, na)
local_current = torch.zeros(na, na, device=self.rgf_device)
hd = self.hamiltonian.get_hs_device(kpoint=self.kpoint, V=self.V, block_tridiagonal=self.block_tridiagonal)[0][0]
hd = hd.to(self.rgf_device) # defensive .to(self.device) in case the block came back from a cached/legacy path on CPU.

for i in range(na):
for j in range(na):
Expand Down Expand Up @@ -490,7 +511,7 @@ def _cal_density_(self, dm_options):

'''
dm = Ozaki(**dm_options)
DM_eq, DM_neq = dm.integrate(deviceprop=self.device, kpoint=self.kpoint)
DM_eq, DM_neq = dm.integrate(deviceprop=self.rgf_device, kpoint=self.kpoint)

return DM_eq, DM_neq

Expand Down
2 changes: 1 addition & 1 deletion dpnegf/negf/lead_property.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ def _load_self_energy(save_path: str, kpoint, energy, save_format: str):
elif save_format == "h5":
try:
data = read_from_hdf5(save_path, kpoint, energy)
se = torch.as_tensor(data, dtype=torch.complex128) # 自能一般是复数
se = torch.as_tensor(data, dtype=torch.complex128) # self-energy should be complex
except KeyError as e:
kx, ky, kz = np.asarray(kpoint, dtype=float).reshape(3)
ev = energy.item() if hasattr(energy, "item") else float(energy)
Expand Down
29 changes: 21 additions & 8 deletions dpnegf/negf/negf_hamiltonian_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,9 @@ def __init__(self,

if isinstance(torch_device, str):
torch_device = torch.device(torch_device)
self.torch_device = torch_device
self.torch_device = torch_device
self.model = model
self.model.to(self.torch_device)
self.AtomicData_options = AtomicData_options
self.model.eval()

Expand Down Expand Up @@ -417,8 +418,8 @@ def Hamiltonian_initialized(self,kpoints:List[List[float]],useBloch:bool,bloch_f
f.create_dataset(f"{key}", data=np.array("None", dtype=h5py.string_dtype()))
elif key in ["HL", "SL", "HDL", "SDL", "HLL", "SLL"]:
# TODO: HDL,SDL can be saved in reduced form by eliminating zero rows
f.create_dataset(f"{key}_real", data=items.real.numpy().astype(numpy_dtype))
f.create_dataset(f"{key}_imag", data=items.imag.numpy().astype(numpy_dtype))
f.create_dataset(f"{key}_real", data=items.real.cpu().numpy().astype(numpy_dtype))
f.create_dataset(f"{key}_imag", data=items.imag.cpu().numpy().astype(numpy_dtype))
else:
raise ValueError(f"Unsupported key {key} in HS_leads")

Expand Down Expand Up @@ -462,11 +463,11 @@ def Hamiltonian_initialized(self,kpoints:List[List[float]],useBloch:bool,bloch_f
sub_block_len = len(item)
for idx_block in range(sub_block_len):
# group.create_dataset(f"{key}_k{idx_k}_b{idx_block}", data=item[idx_block].numpy())
group.create_dataset(f"{key}_k{idx_k}_b{idx_block}_real", data=item[idx_block].real.numpy())
group.create_dataset(f"{key}_k{idx_k}_b{idx_block}_imag", data=item[idx_block].imag.numpy())
group.create_dataset(f"{key}_k{idx_k}_b{idx_block}_real", data=item[idx_block].real.cpu().numpy())
group.create_dataset(f"{key}_k{idx_k}_b{idx_block}_imag", data=item[idx_block].imag.cpu().numpy())
else:
assert key in ["HD", "SD", "Hall", "Sall"], f"Unsupported key {key} for not block_tridiagnal case"
f.create_dataset(f"{key}", data=items.numpy())
f.create_dataset(f"{key}", data=items.cpu().numpy())
else:
raise ValueError(f"Unsupported key {key} in HS_device")

Expand Down Expand Up @@ -846,6 +847,15 @@ def get_hs_device(self, kpoint=[0,0,0], V=None, block_tridiagonal=False, only_su
HS_device["hl"][ik], HS_device["su"][ik], \
HS_device["sl"][ik], HS_device["hu"][ik]

# move blocks onto torch_device so downstream RGF runs on the chosen device.
hd_k = [b.to(self.torch_device) for b in hd_k]
sd_k = [b.to(self.torch_device) for b in sd_k]
hl_k = [b.to(self.torch_device) for b in hl_k]
su_k = [b.to(self.torch_device) for b in su_k]
sl_k = [b.to(self.torch_device) for b in sl_k]
hu_k = [b.to(self.torch_device) for b in hu_k]
V = torch.as_tensor(V).to(self.torch_device)

if V.shape == torch.Size([]):
allorb = sum([hd_k[i].shape[0] for i in range(len(hd_k))])
V = V.repeat(allorb)
Expand All @@ -855,15 +865,18 @@ def get_hs_device(self, kpoint=[0,0,0], V=None, block_tridiagonal=False, only_su
l_slice = slice(counted, counted+hd_k[i].shape[0])
V_sub = V[l_slice].view(-1,1).cdouble()
hd_k[i] = hd_k[i] - V_sub * sd_k[i]
if i<len(hd_k)-1:
if i<len(hd_k)-1:
hu_k[i] = hu_k[i] - V_sub * su_k[i]
if i > 0:
hl_k[i-1] = hl_k[i-1] - V_sub * sl_k[i-1]
counted += hd_k[i].shape[0]

return hd_k , sd_k, hl_k , su_k, sl_k, hu_k
else:
HD_k, SD_k = HS_device["HD"][ik], HS_device["SD"][ik]
HD_k = HD_k.to(self.torch_device)
SD_k = SD_k.to(self.torch_device)
V = torch.as_tensor(V).to(self.torch_device)
return HD_k - V*SD_k, SD_k, [], [], [], []

def get_hs_lead(self, kpoint, tab, v):
Expand Down
Loading