improve virtio_disk comments; bring it closer to wording in the spec
This commit is contained in:
		
							parent
							
								
									da002a48fb
								
							
						
					
					
						commit
						271d89ae30
					
				
					 2 changed files with 85 additions and 37 deletions
				
			
		|  | @ -47,7 +47,8 @@ | ||||||
| // must be a power of two.
 | // must be a power of two.
 | ||||||
| #define NUM 8 | #define NUM 8 | ||||||
| 
 | 
 | ||||||
| struct VRingDesc { | // a single descriptor, from the spec.
 | ||||||
|  | struct virtq_desc { | ||||||
|   uint64 addr; |   uint64 addr; | ||||||
|   uint32 len; |   uint32 len; | ||||||
|   uint16 flags; |   uint16 flags; | ||||||
|  | @ -56,17 +57,38 @@ struct VRingDesc { | ||||||
| #define VRING_DESC_F_NEXT  1 // chained with another descriptor
 | #define VRING_DESC_F_NEXT  1 // chained with another descriptor
 | ||||||
| #define VRING_DESC_F_WRITE 2 // device writes (vs read)
 | #define VRING_DESC_F_WRITE 2 // device writes (vs read)
 | ||||||
| 
 | 
 | ||||||
| struct VRingUsedElem { | // the (entire) avail ring, from the spec.
 | ||||||
|  | struct virtq_avail { | ||||||
|  |   uint16 flags; // always zero
 | ||||||
|  |   uint16 idx;   // driver will write ring[idx] next
 | ||||||
|  |   uint16 ring[NUM]; // descriptor numbers of chain heads
 | ||||||
|  |   uint16 unused; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | // one entry in the "used" ring, with which the
 | ||||||
|  | // device tells the driver about completed requests.
 | ||||||
|  | struct virtq_used_elem { | ||||||
|   uint32 id;   // index of start of completed descriptor chain
 |   uint32 id;   // index of start of completed descriptor chain
 | ||||||
|   uint32 len; |   uint32 len; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| // for disk ops
 | struct virtq_used { | ||||||
|  |   uint16 flags; // always zero
 | ||||||
|  |   uint16 idx;   // device increments when it adds a ring[] entry
 | ||||||
|  |   struct virtq_used_elem ring[NUM]; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | // these are specific to virtio block devices, e.g. disks,
 | ||||||
|  | // described in Section 5.2 of the spec.
 | ||||||
|  | 
 | ||||||
| #define VIRTIO_BLK_T_IN  0 // read the disk
 | #define VIRTIO_BLK_T_IN  0 // read the disk
 | ||||||
| #define VIRTIO_BLK_T_OUT 1 // write the disk
 | #define VIRTIO_BLK_T_OUT 1 // write the disk
 | ||||||
| 
 | 
 | ||||||
| struct UsedArea { | // the format of the first descriptor in a disk request.
 | ||||||
|   uint16 flags; | // to be followed by two more descriptors containing
 | ||||||
|   uint16 id; | // the block, and a one-byte status.
 | ||||||
|   struct VRingUsedElem elems[NUM]; | struct virtio_blk_req { | ||||||
|  |   uint32 type; // VIRTIO_BLK_T_IN or ..._OUT
 | ||||||
|  |   uint32 reserved; | ||||||
|  |   uint64 sector; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | @ -21,14 +21,37 @@ | ||||||
| #define R(r) ((volatile uint32 *)(VIRTIO0 + (r))) | #define R(r) ((volatile uint32 *)(VIRTIO0 + (r))) | ||||||
| 
 | 
 | ||||||
| static struct disk { | static struct disk { | ||||||
|  // memory for virtio descriptors &c for queue 0.
 |   // the virtio driver and device mostly communicate through a set of
 | ||||||
|  // this is a global instead of allocated because it must
 |   // structures in RAM. pages[] allocates that memory. pages[] is a
 | ||||||
|  // be multiple contiguous pages, which kalloc()
 |   // global (instead of calls to kalloc()) because it must consist of
 | ||||||
|  // doesn't support, and page aligned.
 |   // two contiguous pages of page-aligned physical memory.
 | ||||||
|   char pages[2*PGSIZE]; |   char pages[2*PGSIZE]; | ||||||
|   struct VRingDesc *desc; | 
 | ||||||
|   uint16 *avail; |   // pages[] is divided into three regions (descriptors, avail, and
 | ||||||
|   struct UsedArea *used; |   // used), as explained in Section 2.6 of the virtio specification
 | ||||||
|  |   // for the legacy interface.
 | ||||||
|  |   // https://docs.oasis-open.org/virtio/virtio/v1.1/virtio-v1.1.pdf
 | ||||||
|  |    | ||||||
|  |   // the first region of pages[] is a set (not a ring) of DMA
 | ||||||
|  |   // descriptors, with which the driver tells the device where to read
 | ||||||
|  |   // and write individual disk operations. there are NUM descriptors.
 | ||||||
|  |   // most commands consist of a "chain" (a linked list) of a couple of
 | ||||||
|  |   // these descriptors.
 | ||||||
|  |   // points into pages[].
 | ||||||
|  |   struct virtq_desc *desc; | ||||||
|  | 
 | ||||||
|  |   // next is a ring in which the driver writes descriptor numbers
 | ||||||
|  |   // that the driver would like the device to process.  it only
 | ||||||
|  |   // includes the head descriptor of each chain. the ring has
 | ||||||
|  |   // NUM elements.
 | ||||||
|  |   // points into pages[].
 | ||||||
|  |   struct virtq_avail *avail; | ||||||
|  | 
 | ||||||
|  |   // finally a ring in which the device writes descriptor numbers that
 | ||||||
|  |   // the device has finished processing (just the head of each chain).
 | ||||||
|  |   // there are NUM used ring entries.
 | ||||||
|  |   // points into pages[].
 | ||||||
|  |   struct virtq_used *used; | ||||||
| 
 | 
 | ||||||
|   // our own book-keeping.
 |   // our own book-keeping.
 | ||||||
|   char free[NUM];  // is a descriptor free?
 |   char free[NUM];  // is a descriptor free?
 | ||||||
|  | @ -98,14 +121,15 @@ virtio_disk_init(void) | ||||||
|   memset(disk.pages, 0, sizeof(disk.pages)); |   memset(disk.pages, 0, sizeof(disk.pages)); | ||||||
|   *R(VIRTIO_MMIO_QUEUE_PFN) = ((uint64)disk.pages) >> PGSHIFT; |   *R(VIRTIO_MMIO_QUEUE_PFN) = ((uint64)disk.pages) >> PGSHIFT; | ||||||
| 
 | 
 | ||||||
|   // desc = pages -- num * VRingDesc
 |   // desc = pages -- num * virtq_desc
 | ||||||
|   // avail = pages + 0x40 -- 2 * uint16, then num * uint16
 |   // avail = pages + 0x40 -- 2 * uint16, then num * uint16
 | ||||||
|   // used = pages + 4096 -- 2 * uint16, then num * vRingUsedElem
 |   // used = pages + 4096 -- 2 * uint16, then num * vRingUsedElem
 | ||||||
| 
 | 
 | ||||||
|   disk.desc = (struct VRingDesc *) disk.pages; |   disk.desc = (struct virtq_desc *) disk.pages; | ||||||
|   disk.avail = (uint16*)(((char*)disk.desc) + NUM*sizeof(struct VRingDesc)); |   disk.avail = (struct virtq_avail *)(disk.pages + NUM*sizeof(struct virtq_desc)); | ||||||
|   disk.used = (struct UsedArea *) (disk.pages + PGSIZE); |   disk.used = (struct virtq_used *) (disk.pages + PGSIZE); | ||||||
| 
 | 
 | ||||||
|  |   // all NUM descriptors start out unused.
 | ||||||
|   for(int i = 0; i < NUM; i++) |   for(int i = 0; i < NUM; i++) | ||||||
|     disk.free[i] = 1; |     disk.free[i] = 1; | ||||||
| 
 | 
 | ||||||
|  | @ -156,6 +180,8 @@ free_chain(int i) | ||||||
|   } |   } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | // allocate three descriptors (they need not be contiguous).
 | ||||||
|  | // disk transfers always use three descriptors.
 | ||||||
| static int | static int | ||||||
| alloc3_desc(int *idx) | alloc3_desc(int *idx) | ||||||
| { | { | ||||||
|  | @ -177,9 +203,9 @@ virtio_disk_rw(struct buf *b, int write) | ||||||
| 
 | 
 | ||||||
|   acquire(&disk.vdisk_lock); |   acquire(&disk.vdisk_lock); | ||||||
| 
 | 
 | ||||||
|   // the spec says that legacy block operations use three
 |   // the spec's Section 5.2 says that legacy block operations use
 | ||||||
|   // descriptors: one for type/reserved/sector, one for
 |   // three descriptors: one for type/reserved/sector, one for the
 | ||||||
|   // the data, one for a 1-byte status result.
 |   // data, one for a 1-byte status result.
 | ||||||
| 
 | 
 | ||||||
|   // allocate the three descriptors.
 |   // allocate the three descriptors.
 | ||||||
|   int idx[3]; |   int idx[3]; | ||||||
|  | @ -193,11 +219,7 @@ virtio_disk_rw(struct buf *b, int write) | ||||||
|   // format the three descriptors.
 |   // format the three descriptors.
 | ||||||
|   // qemu's virtio-blk.c reads them.
 |   // qemu's virtio-blk.c reads them.
 | ||||||
| 
 | 
 | ||||||
|   struct virtio_blk_outhdr { |   struct virtio_blk_req buf0; | ||||||
|     uint32 type; |  | ||||||
|     uint32 reserved; |  | ||||||
|     uint64 sector; |  | ||||||
|   } buf0; |  | ||||||
| 
 | 
 | ||||||
|   if(write) |   if(write) | ||||||
|     buf0.type = VIRTIO_BLK_T_OUT; // write the disk
 |     buf0.type = VIRTIO_BLK_T_OUT; // write the disk
 | ||||||
|  | @ -232,15 +254,13 @@ virtio_disk_rw(struct buf *b, int write) | ||||||
|   b->disk = 1; |   b->disk = 1; | ||||||
|   disk.info[idx[0]].b = b; |   disk.info[idx[0]].b = b; | ||||||
| 
 | 
 | ||||||
|   // avail[0] is flags (always zero)
 |   // tell the device the first index in our chain of descriptors.
 | ||||||
|   // avail[1] is an index into avail[2...] telling where we'll write next
 |   disk.avail->ring[disk.avail->idx % NUM] = idx[0]; | ||||||
|   // avail[2...] is a ring of NUM indices the device should process
 |  | ||||||
|   // we only tell device the first index in our chain of descriptors.
 |  | ||||||
|   disk.avail[2 + (disk.avail[1] % NUM)] = idx[0]; |  | ||||||
| 
 | 
 | ||||||
|   __sync_synchronize(); |   __sync_synchronize(); | ||||||
| 
 | 
 | ||||||
|   disk.avail[1] = disk.avail[1] + 1; // not % NUM ...
 |   // tell the device another avail ring entry is available.
 | ||||||
|  |   disk.avail->idx += 1; // not % NUM ...
 | ||||||
| 
 | 
 | ||||||
|   __sync_synchronize(); |   __sync_synchronize(); | ||||||
| 
 | 
 | ||||||
|  | @ -262,16 +282,22 @@ virtio_disk_intr() | ||||||
| { | { | ||||||
|   acquire(&disk.vdisk_lock); |   acquire(&disk.vdisk_lock); | ||||||
| 
 | 
 | ||||||
|   // this ack may race with the device writing new notifications to
 |   // the device won't raise another interrupt until we tell it
 | ||||||
|   // the "used" ring, in which case we may get an interrupt we don't
 |   // we've seen this interrupt, which the following line does.
 | ||||||
|   // need, which is harmless.
 |   // this may race with the device writing new entries to
 | ||||||
|  |   // the "used" ring, in which case we may process the new
 | ||||||
|  |   // completion entries in this interrupt, and have nothing to do
 | ||||||
|  |   // in the next interrupt, which is harmless.
 | ||||||
|   *R(VIRTIO_MMIO_INTERRUPT_ACK) = *R(VIRTIO_MMIO_INTERRUPT_STATUS) & 0x3; |   *R(VIRTIO_MMIO_INTERRUPT_ACK) = *R(VIRTIO_MMIO_INTERRUPT_STATUS) & 0x3; | ||||||
| 
 | 
 | ||||||
|   __sync_synchronize(); |   __sync_synchronize(); | ||||||
| 
 | 
 | ||||||
|   while(disk.used_idx != disk.used->id){ |   // the device increments disk.used->idx when it
 | ||||||
|  |   // adds an entry to the used ring.
 | ||||||
|  | 
 | ||||||
|  |   while(disk.used_idx != disk.used->idx){ | ||||||
|     __sync_synchronize(); |     __sync_synchronize(); | ||||||
|     int id = disk.used->elems[disk.used_idx % NUM].id; |     int id = disk.used->ring[disk.used_idx % NUM].id; | ||||||
| 
 | 
 | ||||||
|     if(disk.info[id].status != 0) |     if(disk.info[id].status != 0) | ||||||
|       panic("virtio_disk_intr status"); |       panic("virtio_disk_intr status"); | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Robert Morris
						Robert Morris