In some idle time I came up with various communication schemes for PC to NES, with a focus on their software implementation and maximum throughput. Most are controller-port based. Of them I've only implemented asynchronous and synchronous serial, none of the multi-bit ones.
sync = controller CLK line clocks USART
no framing = known series of bytes without any start/stop flags encoded
sync = controller CLK line clocks USART
no framing = known series of bytes without any start/stop flags encoded
Code:
; 310 async 57600 bps: 5.8KB/sec
lda #1
@wait:
bit $4017
beq @wait
ldy #8
@bit:
nop
nop
lda $4017
eor #1
lsr a
ror <0
nop
nop
nop
bit <0
dey
bne @bit
bit <0
lda #1
eor $4017
lsr a
lda <0
ror a
; 96 sync: 18.6KB/sec
lda #1
@wait:
bit $4017
beq @wait
.repeat 7
lda $4017
lsr a
ror <0
.endrepeat
lda $4017
lsr a
lda <0
ror a
; 76 sync faster: 23.5KB/sec
lda #1
@wait:
bit $4017
beq @wait
.repeat 8
lsr $4017
ror a
.endrepeat
bit $4017
; 64 sync faster no framing: 30.0KB/sec
.repeat 8
lsr $4017
ror a
.endrepeat
; 50 dual (data on D4 & D0, ready on D3): 35.8KB/sec
@wait:
lda $4017
and #$19
beq @wait
.repeat 3
asl a
sta <0
lda $4017
and #$19
eor <0
.endrepeat
; 48 dual no framing: 37.3KB/sec
lda $4017
and #$19
.repeat 3
asl a
sta <0
lda $4017
and #$19
eor <0
.endrepeat
; 24 quad (D0-D3 data, D4 ready): 74.6KB/sec
@wait:
lda $4017
and #$1f
beq @wait
sta <0
lda $4017
asl a
asl a
asl a
eor <0
; 22 quad no framing: 81.3KB/sec
lda $4017
and #$1f
sta <0
lda $4017
asl a
asl a
asl a
eor <0
; 10 octal: 179.0KB/sec
@wait:
bit $xxxx
bpl @wait
lda $xxxx
; 4 octal no framing: 447.4KB/sec
lda $xxxx
lda #1
@wait:
bit $4017
beq @wait
ldy #8
@bit:
nop
nop
lda $4017
eor #1
lsr a
ror <0
nop
nop
nop
bit <0
dey
bne @bit
bit <0
lda #1
eor $4017
lsr a
lda <0
ror a
; 96 sync: 18.6KB/sec
lda #1
@wait:
bit $4017
beq @wait
.repeat 7
lda $4017
lsr a
ror <0
.endrepeat
lda $4017
lsr a
lda <0
ror a
; 76 sync faster: 23.5KB/sec
lda #1
@wait:
bit $4017
beq @wait
.repeat 8
lsr $4017
ror a
.endrepeat
bit $4017
; 64 sync faster no framing: 30.0KB/sec
.repeat 8
lsr $4017
ror a
.endrepeat
; 50 dual (data on D4 & D0, ready on D3): 35.8KB/sec
@wait:
lda $4017
and #$19
beq @wait
.repeat 3
asl a
sta <0
lda $4017
and #$19
eor <0
.endrepeat
; 48 dual no framing: 37.3KB/sec
lda $4017
and #$19
.repeat 3
asl a
sta <0
lda $4017
and #$19
eor <0
.endrepeat
; 24 quad (D0-D3 data, D4 ready): 74.6KB/sec
@wait:
lda $4017
and #$1f
beq @wait
sta <0
lda $4017
asl a
asl a
asl a
eor <0
; 22 quad no framing: 81.3KB/sec
lda $4017
and #$1f
sta <0
lda $4017
asl a
asl a
asl a
eor <0
; 10 octal: 179.0KB/sec
@wait:
bit $xxxx
bpl @wait
lda $xxxx
; 4 octal no framing: 447.4KB/sec
lda $xxxx