Actual source code: vecseqcupm.cu
  1: #include "../vecseqcupm.hpp" /*I <petscvec.h> I*/
  2: #include "../vecseqcupm_impl.hpp"
  4: using namespace ::Petsc::vec::cupm;
  5: using ::Petsc::device::cupm::DeviceType;
  7: template class impl::VecSeq_CUPM<DeviceType::CUDA>;
  9: static constexpr auto VecSeq_CUDA = impl::VecSeq_CUPM<DeviceType::CUDA>{};
 11: /*MC
 12:   VECSEQCUDA - VECSEQCUDA = "seqcuda" - The basic sequential vector, modified to use CUDA
 14:   Options Database Key:
 15: . -vec_type seqcuda - sets the vector type to `VECSEQCUDA` during a call to `VecSetFromOptions()`
 17:   Level: beginner
 19: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIWithArray()`, `VECSEQ`,
 20: `VecType`, `VecCreateMPI()`, `VecSetPinnedMemoryMin()`, `VECCUDA`, `VECHIP`, VECMPICUDA`, `VECMPIHIP`, `VECSEQHIP`
 21: M*/
 23: PetscErrorCode VecCreate_SeqCUDA(Vec v)
 24: {
 25:   PetscFunctionBegin;
 26:   PetscCall(VecSeq_CUDA.Create(v));
 27:   PetscFunctionReturn(PETSC_SUCCESS);
 28: }
 30: PetscErrorCode VecConvert_Seq_SeqCUDA_inplace(Vec v)
 31: {
 32:   PetscFunctionBegin;
 33:   PetscCall(VecSeq_CUDA.Convert_IMPL_IMPLCUPM(v));
 34:   PetscFunctionReturn(PETSC_SUCCESS);
 35: }
 37: // PetscClangLinter pragma disable: -fdoc-internal-linkage
 38: /*@
 39:   VecCreateSeqCUDA - Creates a standard, sequential, array-style vector.
 41:   Collective, Possibly Synchronous
 43:   Input Parameters:
 44: + comm - the communicator, must be `PETSC_COMM_SELF`
 45: - n    - the vector length
 47:   Output Parameter:
 48: . v - the vector
 50:   Level: intermediate
 52:   Notes:
 53:   Use `VecDuplicate()` or `VecDuplicateVecs()` to form additional vectors of the same type as an
 54:   existing vector.
 56:   This function may initialize `PetscDevice`, which may incur a device synchronization.
 58: .seealso: [](ch_vectors), `PetscDeviceInitialize()`, `VecCreate()`, `VecCreateSeq()`, `VecCreateSeqCUDAWithArray()`,
 59:           `VecCreateMPI()`, `VecCreateMPICUDA()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`
 60: @*/
 61: PetscErrorCode VecCreateSeqCUDA(MPI_Comm comm, PetscInt n, Vec *v)
 62: {
 63:   PetscFunctionBegin;
 64:   PetscCall(VecCreateSeqCUPMAsync<DeviceType::CUDA>(comm, n, v));
 65:   PetscFunctionReturn(PETSC_SUCCESS);
 66: }
 68: // PetscClangLinter pragma disable: -fdoc-internal-linkage
 69: /*@C
 70:   VecCreateSeqCUDAWithArrays - Creates a sequential, array-style vector using CUDA, where the
 71:   user provides the complete array space to store the vector values.
 73:   Collective, Possibly Synchronous
 75:   Input Parameters:
 76: + comm     - the communicator, must be `PETSC_COMM_SELF`
 77: . bs       - the block size
 78: . n        - the local vector length
 79: . cpuarray - CPU memory where the vector elements are to be stored (or `NULL`)
 80: - gpuarray - GPU memory where the vector elements are to be stored (or `NULL`)
 82:   Output Parameter:
 83: . v - the vector
 85:   Level: intermediate
 87:   Notes:
 88:   If the user-provided array is `NULL`, then `VecCUDAPlaceArray()` can be used at a later stage to
 89:   SET the array for storing the vector values. Otherwise, the array must be allocated on the
 90:   device.
 92:   If both cpuarray and gpuarray are provided, the provided arrays must have identical
 93:   values.
 95:   The arrays are NOT freed when the vector is destroyed via `VecDestroy()`. The user must free
 96:   them themselves, but not until the vector is destroyed.
 98:   This function may initialize `PetscDevice`, which may incur a device synchronization.
100: .seealso: [](ch_vectors), `PetscDeviceInitialize()`, `VecCreate()`, `VecCreateSeqWithArray()`, `VecCreateSeqCUDA()`,
101:           `VecCreateSeqCUDAWithArray()`, `VecCreateMPICUDA()`, `VecCreateMPICUDAWithArray()`,
102:           `VecCreateMPICUDAWithArrays()`, `VecCUDAPlaceArray()`
103: C@*/
104: PetscErrorCode VecCreateSeqCUDAWithArrays(MPI_Comm comm, PetscInt bs, PetscInt n, const PetscScalar cpuarray[], const PetscScalar gpuarray[], Vec *v)
105: {
106:   PetscFunctionBegin;
107:   PetscCall(VecCreateSeqCUPMWithArraysAsync<DeviceType::CUDA>(comm, bs, n, cpuarray, gpuarray, v));
108:   PetscFunctionReturn(PETSC_SUCCESS);
109: }
111: // PetscClangLinter pragma disable: -fdoc-internal-linkage
112: /*@C
113:   VecCreateSeqCUDAWithArray - Creates a sequential, array-style vector using CUDA, where the
114:   user provides the device array space to store the vector values.
116:   Collective, Possibly Synchronous
118:   Input Parameters:
119: + comm     - the communicator, must be `PETSC_COMM_SELF`
120: . bs       - the block size
121: . n        - the vector length
122: - gpuarray - GPU memory where the vector elements are to be stored (or `NULL`)
124:   Output Parameter:
125: . v - the vector
127:   Level: intermediate
129:   Notes:
130:   If the user-provided array is `NULL`, then `VecCUDAPlaceArray()` can be used at a later stage to
131:   SET the array for storing the vector values. Otherwise, the array must be allocated on the
132:   device.
134:   The array is NOT freed when the vector is destroyed via `VecDestroy()`. The user must free the
135:   array themselves, but not until the vector is destroyed.
137:   Use `VecDuplicate()` or `VecDuplicateVecs()` to form additional vectors of the same type as an
138:   existing vector.
140:   This function may initialize `PetscDevice`, which may incur a device synchronization.
142: .seealso: [](ch_vectors), `PetscDeviceInitialize()`, `VecCreate()`, `VecCreateSeq()`, `VecCreateSeqWithArray()`,
143:           `VecCreateMPIWithArray()`, `VecCreateSeqCUDA()`, `VecCreateMPICUDAWithArray()`, `VecCUDAPlaceArray()`,
144:           `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`
145: @*/
146: PetscErrorCode VecCreateSeqCUDAWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, const PetscScalar gpuarray[], Vec *v)
147: {
148:   PetscFunctionBegin;
149:   PetscCall(VecCreateSeqCUDAWithArrays(comm, bs, n, nullptr, gpuarray, v));
150:   PetscFunctionReturn(PETSC_SUCCESS);
151: }
153: // PetscClangLinter pragma disable: -fdoc-internal-linkage
154: /*@C
155:   VecCUDAGetArray - Provides access to the device buffer inside a vector
157:   Logically Collective; Asynchronous; No Fortran Support
159:   Input Parameter:
160: . v - the vector
162:   Output Parameter:
163: . a - the device buffer
165:   Level: intermediate
167:   Notes:
168:   This routine has semantics similar to `VecGetArray()`; the returned buffer points to a
169:   consistent view of the vector data. This may involve copying data from the host to the device
170:   if the data on the device is out of date. It is also assumed that the returned buffer is
171:   immediately modified, marking the host data out of date. This is similar to intent(inout) in
172:   Fortran.
174:   If the user does require strong memory guarantees, they are encouraged to use
175:   `VecCUDAGetArrayRead()` and/or `VecCUDAGetArrayWrite()` instead.
177:   The user must call `VecCUDARestoreArray()` when they are finished using the array.
179:   Developer Note:
180:   If the device memory hasn't been allocated previously it will be allocated as part of this
181:   routine.
183: .seealso: [](ch_vectors), `VecCUDARestoreArray()`, `VecCUDAGetArrayRead()`, `VecCUDAGetArrayWrite()`, `VecGetArray()`,
184:           `VecGetArrayRead()`, `VecGetArrayWrite()`
185: @*/
186: PetscErrorCode VecCUDAGetArray(Vec v, PetscScalar **a)
187: {
188:   PetscFunctionBegin;
189:   PetscCall(VecCUPMGetArrayAsync<DeviceType::CUDA>(v, a));
190:   PetscFunctionReturn(PETSC_SUCCESS);
191: }
193: // PetscClangLinter pragma disable: -fdoc-internal-linkage
194: /*@C
195:   VecCUDARestoreArray - Restore a device buffer previously acquired with `VecCUDAGetArray()`.
197:   NotCollective; Asynchronous; No Fortran Support
199:   Input Parameters:
200: + v - the vector
201: - a - the device buffer
203:   Level: intermediate
205:   Note:
206:   The restored pointer is invalid after this function returns. This function also marks the
207:   host data as out of date. Subsequent access to the vector data on the host side via
208:   `VecGetArray()` will incur a (synchronous) data transfer.
210: .seealso: [](ch_vectors), `VecCUDAGetArray()`, `VecCUDAGetArrayRead()`, `VecCUDAGetArrayWrite()`, `VecGetArray()`,
211:           `VecRestoreArray()`, `VecGetArrayRead()`
212: @*/
213: PetscErrorCode VecCUDARestoreArray(Vec v, PetscScalar **a)
214: {
215:   PetscFunctionBegin;
216:   PetscCall(VecCUPMRestoreArrayAsync<DeviceType::CUDA>(v, a));
217:   PetscFunctionReturn(PETSC_SUCCESS);
218: }
220: // PetscClangLinter pragma disable: -fdoc-internal-linkage
221: /*@C
222:   VecCUDAGetArrayRead - Provides read access to the CUDA buffer inside a vector.
224:   Not Collective; Asynchronous; No Fortran Support
226:   Input Parameter:
227: . v - the vector
229:   Output Parameter:
230: . a - the CUDA pointer.
232:   Level: intermediate
234:   Notes:
235:   See `VecCUDAGetArray()` for data movement semantics of this function.
237:   This function assumes that the user will not modify the vector data. This is analgogous to
238:   intent(in) in Fortran.
240:   The device pointer must be restored by calling `VecCUDARestoreArrayRead()`. If the data on the
241:   host side was previously up to date it will remain so, i.e. data on both the device and the
242:   host is up to date. Accessing data on the host side does not incur a device to host data
243:   transfer.
245: .seealso: [](ch_vectors), `VecCUDARestoreArrayRead()`, `VecCUDAGetArray()`, `VecCUDAGetArrayWrite()`, `VecGetArray()`,
246:           `VecGetArrayRead()`
247: @*/
248: PetscErrorCode VecCUDAGetArrayRead(Vec v, const PetscScalar **a)
249: {
250:   PetscFunctionBegin;
251:   PetscCall(VecCUPMGetArrayReadAsync<DeviceType::CUDA>(v, a));
252:   PetscFunctionReturn(PETSC_SUCCESS);
253: }
255: // PetscClangLinter pragma disable: -fdoc-internal-linkage
256: /*@C
257:   VecCUDARestoreArrayRead - Restore a CUDA device pointer previously acquired with
258:   `VecCUDAGetArrayRead()`.
260:   Not Collective; Asynchronous; No Fortran Support
262:   Input Parameters:
263: + v - the vector
264: - a - the CUDA device pointer
266:   Level: intermediate
268:   Note:
269:   This routine does not modify the corresponding array on the host in any way. The pointer is
270:   invalid after this function returns.
272: .seealso: [](ch_vectors), `VecCUDAGetArrayRead()`, `VecCUDAGetArrayWrite()`, `VecCUDAGetArray()`, `VecGetArray()`,
273:           `VecRestoreArray()`, `VecGetArrayRead()`
274: @*/
275: PetscErrorCode VecCUDARestoreArrayRead(Vec v, const PetscScalar **a)
276: {
277:   PetscFunctionBegin;
278:   PetscCall(VecCUPMRestoreArrayReadAsync<DeviceType::CUDA>(v, a));
279:   PetscFunctionReturn(PETSC_SUCCESS);
280: }
282: // PetscClangLinter pragma disable: -fdoc-internal-linkage
283: /*@C
284:   VecCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a vector.
286:    Logically Collective; Asynchronous; No Fortran Support
288:   Input Parameter:
289: . v - the vector
291:   Output Parameter:
292: . a - the CUDA pointer
294:   Level: advanced
296:   Notes:
297:   The data pointed to by the device pointer is uninitialized. The user may not read from this
298:   data. Furthermore, the entire array needs to be filled by the user to obtain well-defined
299:   behaviour. The device memory will be allocated by this function if it hasn't been allocated
300:   previously. This is analogous to intent(out) in Fortran.
302:   The device pointer needs to be released with `VecCUDARestoreArrayWrite()`. When the pointer is
303:   released the host data of the vector is marked as out of data. Subsequent access of the host
304:   data with e.g. VecGetArray() incurs a device to host data transfer.
306: .seealso: [](ch_vectors), `VecCUDARestoreArrayWrite()`, `VecCUDAGetArray()`, `VecCUDAGetArrayRead()`,
307:           `VecCUDAGetArrayWrite()`, `VecGetArray()`, `VecGetArrayRead()`
308: @*/
309: PetscErrorCode VecCUDAGetArrayWrite(Vec v, PetscScalar **a)
310: {
311:   PetscFunctionBegin;
312:   PetscCall(VecCUPMGetArrayWriteAsync<DeviceType::CUDA>(v, a));
313:   PetscFunctionReturn(PETSC_SUCCESS);
314: }
316: // PetscClangLinter pragma disable: -fdoc-internal-linkage
317: /*@C
318:   VecCUDARestoreArrayWrite - Restore a CUDA device pointer previously acquired with
319:   `VecCUDAGetArrayWrite()`.
321:    Logically Collective; Asynchronous; No Fortran Support
323:   Input Parameters:
324: + v - the vector
325: - a - the CUDA device pointer.  This pointer is invalid after `VecCUDARestoreArrayWrite()` returns.
327:   Level: intermediate
329:   Note:
330:   Data on the host will be marked as out of date. Subsequent access of the data on the host
331:   side e.g. with `VecGetArray()` will incur a device to host data transfer.
333: .seealso: [](ch_vectors), `VecCUDAGetArrayWrite()`, `VecCUDAGetArray()`, `VecCUDAGetArrayRead()`,
334:           `VecCUDAGetArrayWrite()`, `VecGetArray()`, `VecRestoreArray()`, `VecGetArrayRead()`
335: @*/
336: PetscErrorCode VecCUDARestoreArrayWrite(Vec v, PetscScalar **a)
337: {
338:   PetscFunctionBegin;
339:   PetscCall(VecCUPMRestoreArrayWriteAsync<DeviceType::CUDA>(v, a));
340:   PetscFunctionReturn(PETSC_SUCCESS);
341: }
343: // PetscClangLinter pragma disable: -fdoc-internal-linkage
344: /*@C
345:   VecCUDAPlaceArray - Allows one to replace the GPU array in a vector with a GPU array provided
346:   by the user.
348:   Logically Collective; Asynchronous; No Fortran Support
350:   Input Parameters:
351: + vec - the vector
352: - array - the GPU array
354:   Level: advanced
356:   Notes:
357:   Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`.
359:   This routine is useful to avoid copying an array into a vector, though you can return to the
360:   original GPU array with a call to `VecCUDAResetArray()`.
362:   It is not possible to use `VecCUDAPlaceArray()` and `VecPlaceArray()` at the same time on the
363:   same vector.
365:   `vec` does not take ownership of `array` in any way. The user must free `array` themselves
366:   but be careful not to do so before the vector has either been destroyed, had its original
367:   array restored with `VecCUDAResetArray()` or permanently replaced with
368:   `VecCUDAReplaceArray()`.
370: .seealso: [](ch_vectors), `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`,
371:           `VecResetArray()`, `VecCUDAResetArray()`, `VecCUDAReplaceArray()`
372: @*/
373: PetscErrorCode VecCUDAPlaceArray(Vec vin, const PetscScalar array[])
374: {
375:   PetscFunctionBegin;
376:   PetscCall(VecCUPMPlaceArrayAsync<DeviceType::CUDA>(vin, array));
377:   PetscFunctionReturn(PETSC_SUCCESS);
378: }
380: // PetscClangLinter pragma disable: -fdoc-internal-linkage
381: /*@C
382:   VecCUDAReplaceArray - Permanently replace the GPU array in a vector with a GPU array provided
383:   by the user.
385:   Logically Collective; No Fortran Support
387:   Input Parameters:
388: + vec   - the vector
389: - array - the GPU array
391:   Level: advanced
393:   Notes:
394:   Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`.
396:   This is useful to avoid copying a GPU array into a vector.
398:   This frees the memory associated with the old GPU array. The vector takes ownership of the
399:   passed array so it CANNOT be freed by the user. It will be freed when the vector is
400:   destroyed.
402: .seealso: [](ch_vectors), `VecGetArray()`, `VecRestoreArray()`, `VecPlaceArray()`, `VecResetArray()`,
403:           `VecCUDAResetArray()`, `VecCUDAPlaceArray()`, `VecReplaceArray()`
404: @*/
405: PetscErrorCode VecCUDAReplaceArray(Vec vin, const PetscScalar array[])
406: {
407:   PetscFunctionBegin;
408:   PetscCall(VecCUPMReplaceArrayAsync<DeviceType::CUDA>(vin, array));
409:   PetscFunctionReturn(PETSC_SUCCESS);
410: }
412: // PetscClangLinter pragma disable: -fdoc-internal-linkage
413: /*@C
414:   VecCUDAResetArray - Resets a vector to use its default memory.
416:   Logically Collective; No Fortran Support
418:   Input Parameters:
419: . vec - the vector
421:   Level: advanced
423:   Note:
424:   Call this after the use of `VecCUDAPlaceArray()`.
426: .seealso: [](ch_vectors), `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecPlaceArray()`,
427:           `VecResetArray()`, `VecCUDAPlaceArray()`, `VecCUDAReplaceArray()`
428: @*/
429: PetscErrorCode VecCUDAResetArray(Vec vin)
430: {
431:   PetscFunctionBegin;
432:   PetscCall(VecCUPMResetArrayAsync<DeviceType::CUDA>(vin));
433:   PetscFunctionReturn(PETSC_SUCCESS);
434: }