summaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
authorcinap_lenrek <cinap_lenrek@gmx.de>2013-01-26 17:33:21 +0100
committercinap_lenrek <cinap_lenrek@gmx.de>2013-01-26 17:33:21 +0100
commitea108c8ca6e726ac008f75775ab83775ec233171 (patch)
tree982816b58d50e1b12b7eeb2c29fe22ca8d9c195b /sys
parent43e09c468b4c6562c93c9375a316012e238d21b2 (diff)
add tegra2 soc kernel (from sources)
Diffstat (limited to 'sys')
-rw-r--r--sys/src/9/teg2/_announce58
-rw-r--r--sys/src/9/teg2/arch.c172
-rw-r--r--sys/src/9/teg2/archtegra.c869
-rw-r--r--sys/src/9/teg2/arm.h309
-rw-r--r--sys/src/9/teg2/arm.s132
-rw-r--r--sys/src/9/teg2/cache-l2-pl310.c456
-rw-r--r--sys/src/9/teg2/cache.v7.s240
-rw-r--r--sys/src/9/teg2/caches-v7.c106
-rw-r--r--sys/src/9/teg2/caches.c198
-rw-r--r--sys/src/9/teg2/clock-tegra.c138
-rw-r--r--sys/src/9/teg2/clock.c624
-rw-r--r--sys/src/9/teg2/coproc.c200
-rw-r--r--sys/src/9/teg2/dat.h478
-rw-r--r--sys/src/9/teg2/devarch.c192
-rw-r--r--sys/src/9/teg2/devether.c530
-rw-r--r--sys/src/9/teg2/ether8169.c1675
-rw-r--r--sys/src/9/teg2/etherif.h42
-rw-r--r--sys/src/9/teg2/ethermii.c235
-rw-r--r--sys/src/9/teg2/ethermii.h116
-rw-r--r--sys/src/9/teg2/fns.h231
-rw-r--r--sys/src/9/teg2/fpi.c300
-rw-r--r--sys/src/9/teg2/fpi.h61
-rw-r--r--sys/src/9/teg2/fpiarm.c506
-rw-r--r--sys/src/9/teg2/fpimem.c136
-rw-r--r--sys/src/9/teg2/init9.s25
-rw-r--r--sys/src/9/teg2/io.h219
-rw-r--r--sys/src/9/teg2/l.s873
-rw-r--r--sys/src/9/teg2/lexception.s325
-rw-r--r--sys/src/9/teg2/lproc.s38
-rw-r--r--sys/src/9/teg2/main.c982
-rw-r--r--sys/src/9/teg2/mem.h150
-rw-r--r--sys/src/9/teg2/mkfile151
-rw-r--r--sys/src/9/teg2/mmu.c750
-rw-r--r--sys/src/9/teg2/notes/assumes-hz-under-10004
-rw-r--r--sys/src/9/teg2/notes/bug.rfe41
-rw-r--r--sys/src/9/teg2/notes/byte-order59
-rw-r--r--sys/src/9/teg2/notes/clks19
-rw-r--r--sys/src/9/teg2/notes/movm.w22
-rw-r--r--sys/src/9/teg2/notes/pci29
-rw-r--r--sys/src/9/teg2/notes/pci.2.buses78
-rw-r--r--sys/src/9/teg2/nvrambin0 -> 512 bytes
-rw-r--r--sys/src/9/teg2/pci.c853
-rw-r--r--sys/src/9/teg2/rebootcode.s208
-rw-r--r--sys/src/9/teg2/softfpu.c129
-rw-r--r--sys/src/9/teg2/syscall.c366
-rw-r--r--sys/src/9/teg2/trap.c1083
-rw-r--r--sys/src/9/teg2/ts89
-rw-r--r--sys/src/9/teg2/uarti8250.c819
-rw-r--r--sys/src/9/teg2/usbehci.h104
-rw-r--r--sys/src/9/teg2/v7-arch.c51
-rw-r--r--sys/src/9/teg2/vfp3.c516
-rw-r--r--sys/src/9/teg2/words60
52 files changed, 16047 insertions, 0 deletions
diff --git a/sys/src/9/teg2/_announce b/sys/src/9/teg2/_announce
new file mode 100644
index 000000000..38a1370a8
--- /dev/null
+++ b/sys/src/9/teg2/_announce
@@ -0,0 +1,58 @@
+This is a preliminary Plan 9 port to the Compulab Trimslice,
+containing a Tegra 2 SoC: a dual-core, (truly) dual-issue 1GHz
+Cortex-A9 v7a-architecture ARM system, *and* it comes in a case. VFP
+3 floating-point hardware is present, but 5l doesn't yet generate
+those instructions. This is the first multiprocessor ARM port we've
+done, and much of the code should be reusable in future ports. There
+are still things to be done but it can run both processors and is
+believed to have adequate kernel support for VFP 3 floating-point.
+
+
+What's implemented.
+
+Two cpus running concurrently with level 1 and 2 caches enabled.
+
+Realtek 8168 Ethernet. A slightly dimmer 8169. Has to be jabbed with
+an electric cattle prod by software about once per day when it wedges.
+
+Profiling. Charles Forsyth fixed various bugs to make user-mode
+profiling on ARMs work for the first time ever.
+
+
+What's not (yet) implemented.
+
+USB. It probably just needs initialisation.
+
+NOR flash.
+
+Video.
+
+VFP3 floating point. The go 5l generates VFP 3 floating-point
+instructions (among other changes). Attempts to transplant just that
+code into our 5l failed to generate correct code. Eventually someone
+will get this to work, and then we'll be able to use the hardware
+floating-point. [Eventually someone did, thanks.] Even with only
+software emulation of floating-point, astro runs in under 3 seconds.
+
+In-line 64-bit arithmetic in 5[cl].
+
+And the really horrid peripherals: NAND flash and MMC.
+
+
+Known problems.
+
+kprof. kprof profiling doesn't work correctly, charging all CPU time
+to _start.
+
+Reboot. After an fshalt -r reboot (or two) with cpu1 enabled,
+accesses to pci registers (notably 0x80015000) in the newly-loaded
+kernel often hang. One of three watchdogs' reset should jolt the
+system back to life and force a reboot through u-boot when this
+happens. Sometimes the ethernet goes dead instead ("waiting for
+dhcp..." forever); this could be a different symptom of pci illness.
+
+Also following a reboot, cpu1's local (not tegra SoC shared) timers
+don't interrupt. Since the local watchdogs don't seem to actually
+interrupt nor generate resets when used in anger (as opposed to
+boot-time check-out), their loss is merely a mystery. The local timer
+not interrupting is more worrying.
diff --git a/sys/src/9/teg2/arch.c b/sys/src/9/teg2/arch.c
new file mode 100644
index 000000000..864df6522
--- /dev/null
+++ b/sys/src/9/teg2/arch.c
@@ -0,0 +1,172 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include <tos.h>
+#include "ureg.h"
+
+#include "arm.h"
+
+/*
+ * A lot of this stuff doesn't belong here
+ * but this is a convenient dumping ground for
+ * later sorting into the appropriate buckets.
+ */
+
+/* Give enough context in the ureg to produce a kernel stack for
+ * a sleeping process
+ */
+void
+setkernur(Ureg* ureg, Proc* p)
+{
+ ureg->pc = p->sched.pc;
+ ureg->sp = p->sched.sp+4;
+ ureg->r14 = PTR2UINT(sched);
+}
+
+/*
+ * called in sysfile.c
+ */
+void
+evenaddr(uintptr addr)
+{
+ if(addr & 3){
+ postnote(up, 1, "sys: odd address", NDebug);
+ error(Ebadarg);
+ }
+}
+
+/* go to user space */
+void
+kexit(Ureg*)
+{
+ uvlong t;
+ Tos *tos;
+
+ /* precise time accounting, kernel exit */
+ tos = (Tos*)(USTKTOP-sizeof(Tos));
+ cycles(&t);
+ tos->kcycles += t - up->kentry;
+ tos->pcycles = up->pcycles;
+ tos->cyclefreq = m->cpuhz;
+ tos->pid = up->pid;
+
+ /* make visible immediately to user phase */
+ l1cache->wbse(tos, sizeof *tos);
+}
+
+/*
+ * return the userpc the last exception happened at
+ */
+uintptr
+userpc(void)
+{
+ Ureg *ureg = up->dbgreg;
+ return ureg->pc;
+}
+
+/* This routine must save the values of registers the user is not permitted
+ * to write from devproc and then restore the saved values before returning.
+ */
+void
+setregisters(Ureg* ureg, char* pureg, char* uva, int n)
+{
+ USED(ureg, pureg, uva, n);
+}
+
+/*
+ * this is the body for all kproc's
+ */
+static void
+linkproc(void)
+{
+ spllo();
+ up->kpfun(up->kparg);
+ pexit("kproc exiting", 0);
+}
+
+/*
+ * setup stack and initial PC for a new kernel proc. This is architecture
+ * dependent because of the starting stack location
+ */
+void
+kprocchild(Proc *p, void (*func)(void*), void *arg)
+{
+ p->sched.pc = PTR2UINT(linkproc);
+ p->sched.sp = PTR2UINT(p->kstack+KSTACK);
+
+ p->kpfun = func;
+ p->kparg = arg;
+}
+
+/*
+ * pc output by dumpaproc
+ */
+uintptr
+dbgpc(Proc* p)
+{
+ Ureg *ureg;
+
+ ureg = p->dbgreg;
+ if(ureg == 0)
+ return 0;
+
+ return ureg->pc;
+}
+
+/*
+ * set mach dependent process state for a new process
+ */
+void
+procsetup(Proc* p)
+{
+ fpusysprocsetup(p);
+}
+
+/*
+ * Save the mach dependent part of the process state.
+ */
+void
+procsave(Proc* p)
+{
+ uvlong t;
+
+ cycles(&t);
+ p->pcycles += t;
+
+ fpuprocsave(p);
+ l1cache->wbse(p, sizeof *p); /* is this needed? */
+ l1cache->wb(); /* is this needed? */
+}
+
+void
+procfork(Proc* p)
+{
+ p->kentry = up->kentry;
+ p->pcycles = -p->kentry;
+}
+
+void
+procrestore(Proc* p)
+{
+ uvlong t;
+
+ if(p->kp)
+ return;
+ cycles(&t);
+ p->pcycles -= t;
+ wakewfi(); /* in case there's another runnable proc */
+
+ /* let it fault in at first use */
+// fpuprocrestore(p);
+ l1cache->wb(); /* system is more stable with this */
+}
+
+int
+userureg(Ureg* ureg)
+{
+ return (ureg->psr & PsrMask) == PsrMusr;
+}
diff --git a/sys/src/9/teg2/archtegra.c b/sys/src/9/teg2/archtegra.c
new file mode 100644
index 000000000..41750b747
--- /dev/null
+++ b/sys/src/9/teg2/archtegra.c
@@ -0,0 +1,869 @@
+/*
+ * nvidia tegra 2 architecture-specific stuff
+ */
+
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "io.h"
+#include "arm.h"
+
+#include "../port/netif.h"
+#include "etherif.h"
+#include "../port/flashif.h"
+#include "../port/usb.h"
+#include "../port/portusbehci.h"
+#include "usbehci.h"
+
+enum {
+ /* hardware limits imposed by register contents or layouts */
+ Maxcpus = 4,
+ Maxflowcpus = 2,
+
+ Debug = 0,
+};
+
+typedef struct Clkrst Clkrst;
+typedef struct Diag Diag;
+typedef struct Flow Flow;
+typedef struct Scu Scu;
+typedef struct Power Power;
+
+struct Clkrst {
+ ulong rstsrc;
+ ulong rstdevl;
+ ulong rstdevh;
+ ulong rstdevu;
+
+ ulong clkoutl;
+ ulong clkouth;
+ ulong clkoutu;
+
+ uchar _pad0[0x24-0x1c];
+ ulong supcclkdiv; /* super cclk divider */
+ ulong _pad1;
+ ulong supsclkdiv; /* super sclk divider */
+
+ uchar _pad4[0x4c-0x30];
+ ulong clkcpu;
+
+ uchar _pad1[0xe0-0x50];
+ ulong pllxbase; /* pllx controls CPU clock speed */
+ ulong pllxmisc;
+ ulong pllebase; /* plle is dedicated to pcie */
+ ulong pllemisc;
+
+ uchar _pad2[0x340-0xf0];
+ ulong cpuset;
+ ulong cpuclr;
+};
+
+enum {
+ /* rstsrc bits */
+ Wdcpurst = 1<<0,
+ Wdcoprst = 1<<1,
+ Wdsysrst = 1<<2,
+ Wdsel = 1<<4, /* tmr1 or tmr2? */
+ Wdena = 1<<5,
+
+ /* devl bits */
+ Sysreset = 1<<2,
+
+ /* clkcpu bits */
+ Cpu1stop = 1<<9,
+ Cpu0stop = 1<<8,
+
+ /* cpu* bits */
+ Cpu1dbgreset = 1<<13,
+ Cpu0dbgreset = 1<<12,
+ Cpu1wdreset = 1<<9,
+ Cpu0wdreset = 1<<8,
+ Cpu1dereset = 1<<5,
+ Cpu0dereset = 1<<4,
+ Cpu1reset = 1<<1,
+ Cpu0reset = 1<<0,
+};
+
+struct Power {
+ ulong ctl; /* mainly for rtc clock signals */
+ ulong secregdis;
+ ulong swrst;
+
+ ulong wakevmask;
+ ulong waklvl;
+ ulong waksts;
+ ulong swwaksts;
+
+ ulong dpdpadsovr; /* deep power down pads override */
+ ulong dpdsample;
+ ulong dpden;
+
+ ulong gatetimroff;
+ ulong gatetimron;
+ ulong toggle;
+ ulong unclamp;
+ ulong gatests; /* ro */
+
+ ulong goodtmr;
+ ulong blinktmr;
+
+ ulong noiopwr;
+ ulong detect;
+ ulong detlatch;
+
+ ulong scratch[24];
+ ulong secscratch[6];
+
+ ulong cpupwrgoodtmr;
+ ulong cpupwrofftmr;
+
+ ulong pgmask[2];
+
+ ulong autowaklvl;
+ ulong autowaklvlmask;
+ ulong wakdelay;
+
+ ulong detval;
+ ulong ddr;
+ ulong usbdebdel; /* usb de-bounce delay */
+ ulong usbao;
+ ulong cryptoop;
+ ulong pllpwb0ovr;
+ ulong scratch24[42-24+1];
+ ulong boundoutmirr[3];
+ ulong sys33ven;
+ ulong boundoutmirracc;
+ ulong gate;
+};
+
+enum {
+ /* toggle bits */
+ Start = 1<<8,
+ /* partition ids */
+ Partpcie= 3,
+ Partl2 = 4,
+};
+
+struct Scu {
+ ulong ctl;
+ ulong cfg; /* ro */
+ ulong cpupwrsts;
+ ulong inval;
+
+ uchar _pad0[0x40-0x10];
+ ulong filtstart;
+ ulong filtend;
+
+ uchar _pad1[0x50-0x48];
+ ulong accctl; /* initially 0 */
+ ulong nsaccctl;
+};
+
+enum {
+ /* ctl bits */
+ Scuenable = 1<<0,
+ Filter = 1<<1,
+ Scuparity = 1<<2,
+ Specfill = 1<<3, /* only for PL310 */
+ Allport0 = 1<<4,
+ Standby = 1<<5,
+ Icstandby = 1<<6,
+};
+
+struct Flow {
+ ulong haltcpu0;
+ ulong haltcop;
+ ulong cpu0;
+ ulong cop;
+ ulong xrq;
+ ulong haltcpu1;
+ ulong cpu1;
+};
+
+enum {
+ /* haltcpu* bits */
+ Stop = 2<<29,
+
+ /* cpu* bits */
+ Event = 1<<14, /* w1c */
+ Waitwfebitsshift = 4,
+ Waitwfebitsmask = MASK(2),
+ Eventenable = 1<<1,
+ Cpuenable = 1<<0,
+};
+
+struct Diag {
+ Cacheline c0;
+ Lock;
+ long cnt;
+ long sync;
+ Cacheline c1;
+};
+
+extern ulong testmem;
+
+/*
+ * number of cpus available. contrast with conf.nmach, which is number
+ * of running cpus.
+ */
+int navailcpus;
+Isolated l1ptstable;
+
+Soc soc = {
+ .clkrst = 0x60006000, /* clock & reset signals */
+ .power = 0x7000e400,
+ .exceptvec = PHYSEVP, /* undocumented magic */
+ .sema = 0x60001000,
+ .l2cache= PHYSL2BAG, /* pl310 bag on the side */
+ .flow = 0x60007000,
+
+ /* 4 non-gic controllers */
+// .intr = { 0x60004000, 0x60004100, 0x60004200, 0x60004300, },
+
+ /* private memory region */
+ .scu = 0x50040000,
+ /* we got this address from the `cortex-a series programmer's guide'. */
+ .intr = 0x50040100, /* per-cpu interface */
+ .glbtmr = 0x50040200,
+ .loctmr = 0x50040600,
+ .intrdist=0x50041000,
+
+ .uart = { 0x70006000, 0x70006040,
+ 0x70006200, 0x70006300, 0x70006400, },
+
+ .rtc = 0x7000e000,
+ .tmr = { 0x60005000, 0x60005008, 0x60005050, 0x60005058, },
+ .µs = 0x60005010,
+
+ .pci = 0x80000000,
+ .ether = 0xa0024000,
+
+ .nand = 0x70008000,
+ .nor = 0x70009000, /* also VIRTNOR */
+
+ .ehci = P2VAHB(0xc5000000), /* 1st of 3 */
+ .ide = P2VAHB(0xc3000000),
+
+ .gpio = { 0x6000d000, 0x6000d080, 0x6000d100, 0x6000d180,
+ 0x6000d200, 0x6000d280, 0x6000d300, },
+ .spi = { 0x7000d400, 0x7000d600, 0x7000d800, 0x7000da00, },
+ .twsi = 0x7000c000,
+ .mmc = { P2VAHB(0xc8000000), P2VAHB(0xc8000200),
+ P2VAHB(0xc8000400), P2VAHB(0xc8000600), },
+};
+
+static volatile Diag diag;
+static int missed;
+
+void
+dumpcpuclks(void) /* run CPU at full speed */
+{
+ Clkrst *clk = (Clkrst *)soc.clkrst;
+
+ iprint("pllx base %#lux misc %#lux\n", clk->pllxbase, clk->pllxmisc);
+ iprint("plle base %#lux misc %#lux\n", clk->pllebase, clk->pllemisc);
+ iprint("super cclk divider %#lux\n", clk->supcclkdiv);
+ iprint("super sclk divider %#lux\n", clk->supsclkdiv);
+}
+
+static char *
+devidstr(ulong)
+{
+ return "ARM Cortex-A9";
+}
+
+void
+archtegralink(void)
+{
+}
+
+/* convert AddrDevid register to a string in buf and return buf */
+char *
+cputype2name(char *buf, int size)
+{
+ ulong r;
+
+ r = cpidget(); /* main id register */
+ assert((r >> 24) == 'A');
+ seprint(buf, buf + size, "Cortex-A9 r%ldp%ld",
+ (r >> 20) & MASK(4), r & MASK(4));
+ return buf;
+}
+
+static void
+errata(void)
+{
+ ulong reg, r, p;
+
+ /* apply cortex-a9 errata workarounds */
+ r = cpidget(); /* main id register */
+ assert((r >> 24) == 'A');
+ p = r & MASK(4); /* minor revision */
+ r >>= 20;
+ r &= MASK(4); /* major revision */
+
+ /* this is an undocumented `diagnostic register' that linux knows */
+ reg = cprdsc(0, CpDTLB, 0, 1);
+ if (r < 2 || r == 2 && p <= 2)
+ reg |= 1<<4; /* 742230 */
+ if (r == 2 && p <= 2)
+ reg |= 1<<6 | 1<<12 | 1<<22; /* 743622, 2×742231 */
+ if (r < 3)
+ reg |= 1<<11; /* 751472 */
+ cpwrsc(0, CpDTLB, 0, 1, reg);
+}
+
+void
+archconfinit(void)
+{
+ char *p;
+ ulong hz;
+
+ assert(m != nil);
+ m->cpuhz = 1000 * Mhz; /* trimslice speed */
+ p = getconf("*cpumhz");
+ if (p) {
+ hz = atoi(p) * Mhz;
+ if (hz >= 100*Mhz && hz <= 3600UL*Mhz)
+ m->cpuhz = hz;
+ }
+ m->delayloop = m->cpuhz/2000; /* initial estimate */
+ errata();
+}
+
+int
+archether(unsigned ctlrno, Ether *ether)
+{
+ switch(ctlrno) {
+ case 0:
+ ether->type = "rtl8169"; /* pci-e ether */
+ ether->ctlrno = ctlrno;
+ ether->irq = Pcieirq; /* non-msi pci-e intr */
+ ether->nopt = 0;
+ ether->mbps = 1000;
+ return 1;
+ }
+ return -1;
+}
+
+void
+dumpscustate(void)
+{
+ Scu *scu = (Scu *)soc.scu;
+
+ print("cpu%d scu: accctl %#lux\n", m->machno, scu->accctl);
+ print("cpu%d scu: smp cpu bit map %#lo for %ld cpus; ", m->machno,
+ (scu->cfg >> 4) & MASK(4), (scu->cfg & MASK(2)) + 1);
+ print("cpus' power %#lux\n", scu->cpupwrsts);
+}
+
+void
+scuon(void)
+{
+ Scu *scu = (Scu *)soc.scu;
+
+ if (scu->ctl & Scuenable)
+ return;
+ scu->inval = MASK(16);
+ coherence();
+ scu->ctl = Scuparity | Scuenable | Specfill;
+ coherence();
+}
+
+int
+getncpus(void)
+{
+ int n;
+ char *p;
+ Scu *scu;
+
+ if (navailcpus == 0) {
+ scu = (Scu *)soc.scu;
+ navailcpus = (scu->cfg & MASK(2)) + 1;
+ if (navailcpus > MAXMACH)
+ navailcpus = MAXMACH;
+
+ p = getconf("*ncpu");
+ if (p && *p) {
+ n = atoi(p);
+ if (n > 0 && n < navailcpus)
+ navailcpus = n;
+ }
+ }
+ return navailcpus;
+}
+
+void
+cpuidprint(void)
+{
+ char name[64];
+
+ cputype2name(name, sizeof name);
+ delay(50); /* let uart catch up */
+ iprint("cpu%d: %lldMHz ARM %s %s-endian\n",
+ m->machno, m->cpuhz / Mhz, name,
+ getpsr() & PsrBigend? "big": "little");
+}
+
+static void
+clockson(void)
+{
+ Clkrst *clk = (Clkrst *)soc.clkrst;
+
+ /* enable all by clearing resets */
+ clk->rstdevl = clk->rstdevh = clk->rstdevu = 0;
+ coherence();
+ clk->clkoutl = clk->clkouth = clk->clkoutu = ~0; /* enable all clocks */
+ coherence();
+
+ clk->rstsrc = Wdcpurst | Wdcoprst | Wdsysrst | Wdena;
+ coherence();
+}
+
+/* we could be shutting down ourself (if cpu == m->machno), so take care. */
+void
+stopcpu(uint cpu)
+{
+ Flow *flow = (Flow *)soc.flow;
+ Clkrst *clk = (Clkrst *)soc.clkrst;
+
+ if (cpu == 0) {
+ iprint("stopcpu: may not stop cpu0\n");
+ return;
+ }
+
+ machoff(cpu);
+ lock(&active);
+ active.stopped |= 1 << cpu;
+ unlock(&active);
+ l1cache->wb();
+
+ /* shut down arm7 avp coproc so it can't cause mischief. */
+ /* could try watchdog without stopping avp. */
+ flow->haltcop = Stop;
+ coherence();
+ flow->cop = 0; /* no Cpuenable */
+ coherence();
+ delay(10);
+
+ assert(cpu < Maxflowcpus);
+ *(cpu == 0? &flow->haltcpu0: &flow->haltcpu1) = Stop;
+ coherence();
+ *(cpu == 0? &flow->cpu0: &flow->cpu1) = 0; /* no Cpuenable */
+ coherence();
+ delay(10);
+
+ /* cold reset */
+ assert(cpu < Maxcpus);
+ clk->cpuset = (Cpu0reset | Cpu0dbgreset | Cpu0dereset) << cpu;
+ coherence();
+ delay(1);
+
+ l1cache->wb();
+}
+
+static void
+synccpus(volatile long *cntp, int n)
+{
+ ainc(cntp);
+ while (*cntp < n)
+ ;
+ /* all cpus should now be here */
+}
+
+static void
+pass1(int pass, volatile Diag *dp)
+{
+ int i;
+
+ if(m->machno == 0)
+ iprint(" %d", pass);
+ for (i = 1000*1000; --i > 0; ) {
+ ainc(&dp->cnt);
+ adec(&dp->cnt);
+ }
+
+ synccpus(&dp->sync, navailcpus);
+ /* all cpus are now here */
+
+ ilock(dp);
+ if(dp->cnt != 0)
+ panic("cpu%d: diag: failed w count %ld", m->machno, dp->cnt);
+ iunlock(dp);
+
+ synccpus(&dp->sync, 2 * navailcpus);
+ /* all cpus are now here */
+ adec(&dp->sync);
+ adec(&dp->sync);
+}
+
+/*
+ * try to confirm coherence of l1 caches.
+ * assume that all available cpus will be started.
+ */
+void
+l1diag(void)
+{
+ int pass;
+ volatile Diag *dp;
+
+ if (!Debug)
+ return;
+
+ l1cache->wb();
+
+ /*
+ * synchronise and print
+ */
+ dp = &diag;
+ ilock(dp);
+ if (m->machno == 0)
+ iprint("l1: waiting for %d cpus... ", navailcpus);
+ iunlock(dp);
+
+ synccpus(&dp->sync, navailcpus);
+
+ ilock(dp);
+ if (m->machno == 0)
+ iprint("cache coherency pass");
+ iunlock(dp);
+
+ synccpus(&dp->sync, 2 * navailcpus);
+ adec(&dp->sync);
+ adec(&dp->sync);
+
+ /*
+ * cpus contend
+ */
+ for (pass = 0; pass < 3; pass++)
+ pass1(pass, dp);
+
+ /*
+ * synchronise and check sanity
+ */
+ synccpus(&dp->sync, navailcpus);
+
+ if(dp->sync < navailcpus || dp->sync >= 2 * navailcpus)
+ panic("cpu%d: diag: failed w dp->sync %ld", m->machno,
+ dp->sync);
+ if(dp->cnt != 0)
+ panic("cpu%d: diag: failed w dp->cnt %ld", m->machno,
+ dp->cnt);
+
+ ilock(dp);
+ iprint(" cpu%d ok", m->machno);
+ iunlock(dp);
+
+ synccpus(&dp->sync, 2 * navailcpus);
+ adec(&dp->sync);
+ adec(&dp->sync);
+ l1cache->wb();
+
+ /*
+ * all done, print
+ */
+ ilock(dp);
+ if (m->machno == 0)
+ iprint("\n");
+ iunlock(dp);
+}
+
+static void
+unfreeze(uint cpu)
+{
+ Clkrst *clk = (Clkrst *)soc.clkrst;
+ Flow *flow = (Flow *)soc.flow;
+
+ assert(cpu < Maxcpus);
+
+ clk->clkcpu &= ~(Cpu0stop << cpu);
+ coherence();
+ /* out of reset */
+ clk->cpuclr = (Cpu0reset | Cpu0wdreset | Cpu0dbgreset | Cpu0dereset) <<
+ cpu;
+ coherence();
+
+ assert(cpu < Maxflowcpus);
+ *(cpu == 0? &flow->cpu0: &flow->cpu1) = 0;
+ coherence();
+ *(cpu == 0? &flow->haltcpu0: &flow->haltcpu1) = 0; /* normal operat'n */
+ coherence();
+}
+
+/*
+ * this is all a bit magic. the soc.exceptvec register is effectively
+ * undocumented. we had to look at linux and experiment, alas. this is the
+ * sort of thing that should be standardised as part of the cortex mpcore spec.
+ * even intel document their equivalent procedure.
+ */
+int
+startcpu(uint cpu)
+{
+ int i, r;
+ ulong oldvec, rstaddr;
+ ulong *evp = (ulong *)soc.exceptvec; /* magic */
+
+ r = 0;
+ if (getncpus() < 2 || cpu == m->machno ||
+ cpu >= MAXMACH || cpu >= navailcpus)
+ return -1;
+
+ oldvec = *evp;
+ l1cache->wb(); /* start next cpu w same view of ram */
+ *evp = rstaddr = PADDR(_vrst); /* will start cpu executing at _vrst */
+ coherence();
+ l1cache->wb();
+ unfreeze(cpu);
+
+ for (i = 2000; i > 0 && *evp == rstaddr; i--)
+ delay(1);
+ if (i <= 0 || *evp != cpu) {
+ iprint("cpu%d: didn't start!\n", cpu);
+ stopcpu(cpu); /* make sure it's stopped */
+ r = -1;
+ }
+ *evp = oldvec;
+ return r;
+}
+
+static void
+cksecure(void)
+{
+ ulong db;
+ extern ulong getdebug(void);
+
+ if (getscr() & 1)
+ panic("cpu%d: running non-secure", m->machno);
+ db = getdebug();
+ if (db)
+ iprint("cpu%d: debug enable reg %#lux\n", m->machno, db);
+}
+
+ulong
+smpon(void)
+{
+ ulong aux;
+
+ /* cortex-a9 model-specific configuration */
+ aux = getauxctl();
+ putauxctl(aux | CpACsmp | CpACmaintbcast);
+ return aux;
+}
+
+void
+cortexa9cachecfg(void)
+{
+ /* cortex-a9 model-specific configuration */
+ putauxctl(getauxctl() | CpACparity | CpAClwr0line | CpACl2pref);
+}
+
+/*
+ * called on a cpu other than 0 from cpureset in l.s,
+ * from _vrst in lexception.s.
+ * mmu and l1 (and system-wide l2) caches and coherency (smpon) are on,
+ * but interrupts are disabled.
+ * our mmu is using an exact copy of cpu0's l1 page table
+ * as it was after userinit ran.
+ */
+void
+cpustart(void)
+{
+ int ms;
+ ulong *evp;
+ Power *pwr;
+
+ up = nil;
+ if (active.machs & (1<<m->machno)) {
+ serialputc('?');
+ serialputc('r');
+ panic("cpu%d: resetting after start", m->machno);
+ }
+ assert(m->machno != 0);
+
+ errata();
+ cortexa9cachecfg();
+ memdiag(&testmem);
+
+ machinit(); /* bumps nmach, adds bit to machs */
+ machoff(m->machno); /* not ready to go yet */
+
+ /* clock signals and scu are system-wide and already on */
+ clockshutdown(); /* kill any watch-dog timer */
+
+ trapinit();
+ clockinit(); /* sets loop delay */
+ timersinit();
+ cpuidprint();
+
+ /*
+ * notify cpu0 that we're up so it can proceed to l1diag.
+ */
+ evp = (ulong *)soc.exceptvec; /* magic */
+ *evp = m->machno;
+ coherence();
+
+ l1diag(); /* contend with other cpus to verify sanity */
+
+ /*
+ * pwr->noiopwr == 0
+ * pwr->detect == 0x1ff (default, all disabled)
+ */
+ pwr = (Power *)soc.power;
+ assert(pwr->gatests == MASK(7)); /* everything has power */
+
+ /*
+ * 8169 has to initialise before we get past this, thus cpu0
+ * has to schedule processes first.
+ */
+ if (Debug)
+ iprint("cpu%d: waiting for 8169\n", m->machno);
+ for (ms = 0; !l1ptstable.word && ms < 5000; ms += 10) {
+ delay(10);
+ cachedinvse(&l1ptstable.word, sizeof l1ptstable.word);
+ }
+ if (!l1ptstable.word)
+ iprint("cpu%d: 8169 unreasonably slow; proceeding\n", m->machno);
+ /* now safe to copy cpu0's l1 pt in mmuinit */
+
+ mmuinit(); /* update our l1 pt from cpu0's */
+ fpon();
+ machon(m->machno); /* now ready to go and be scheduled */
+
+ if (Debug)
+ iprint("cpu%d: scheding\n", m->machno);
+ schedinit();
+ panic("cpu%d: schedinit returned", m->machno);
+}
+
+/* mainly used to break out of wfi */
+void
+sgintr(Ureg *ureg, void *)
+{
+ iprint("cpu%d: got sgi\n", m->machno);
+ /* try to prod cpu1 into life when it gets stuck */
+ if (m->machno != 0)
+ clockprod(ureg);
+}
+
+void
+archreset(void)
+{
+ static int beenhere;
+
+ if (beenhere)
+ return;
+ beenhere = 1;
+
+ /* conservative temporary values until archconfinit runs */
+ m->cpuhz = 1000 * Mhz; /* trimslice speed */
+ m->delayloop = m->cpuhz/2000; /* initial estimate */
+
+ prcachecfg();
+
+ clockson();
+ /* all partitions were powered up by u-boot, so needn't do anything */
+ archconfinit();
+// resetusb();
+ fpon();
+
+ if (irqtooearly)
+ panic("archreset: too early for irqenable");
+ irqenable(Cpu0irq, sgintr, nil, "cpu0");
+ irqenable(Cpu1irq, sgintr, nil, "cpu1");
+ /* ... */
+}
+
+void
+archreboot(void)
+{
+ Clkrst *clk = (Clkrst *)soc.clkrst;
+
+ assert(m->machno == 0);
+ iprint("archreboot: reset!\n");
+ delay(20);
+
+ clk->rstdevl |= Sysreset;
+ coherence();
+ delay(500);
+
+ /* shouldn't get here */
+ splhi();
+ iprint("awaiting reset");
+ for(;;) {
+ delay(1000);
+ print(".");
+ }
+}
+
+void
+kbdinit(void)
+{
+}
+
+static void
+missing(ulong addr, char *name)
+{
+ static int firstmiss = 1;
+
+ if (addr == 0) {
+ iprint("address zero for %s\n", name);
+ return;
+ }
+ if (probeaddr(addr) >= 0)
+ return;
+ missed++;
+ if (firstmiss) {
+ iprint("missing:");
+ firstmiss = 0;
+ } else
+ iprint(",\n\t");
+ iprint(" %s at %#lux", name, addr);
+}
+
+/* verify that all the necessary device registers are accessible */
+void
+chkmissing(void)
+{
+ delay(10);
+ missing(KZERO, "dram");
+ missing(soc.intr, "intr ctlr");
+ missing(soc.intrdist, "intr distrib");
+ missing(soc.tmr[0], "tegra timer1");
+ missing(soc.uart[0], "console uart");
+ missing(soc.pci, "pcie");
+ missing(soc.ether, "ether8169");
+ missing(soc.µs, "µs counter");
+ if (missed)
+ iprint("\n");
+ delay(10);
+}
+
+void
+archflashwp(Flash*, int)
+{
+}
+
+/*
+ * for ../port/devflash.c:/^flashreset
+ * retrieve flash type, virtual base and length and return 0;
+ * return -1 on error (no flash)
+ */
+int
+archflashreset(int bank, Flash *f)
+{
+ if(bank != 0)
+ return -1;
+panic("archflashreset: rewrite for nor & nand flash on ts");
+ /*
+ * this is set up for the igepv2 board.
+ */
+ f->type = "onenand";
+ f->addr = (void*)VIRTNOR; /* mapped here by archreset */
+ f->size = 0; /* done by probe */
+ f->width = 1;
+ f->interleave = 0;
+ return 0;
+}
diff --git a/sys/src/9/teg2/arm.h b/sys/src/9/teg2/arm.h
new file mode 100644
index 000000000..f2a13b5cb
--- /dev/null
+++ b/sys/src/9/teg2/arm.h
@@ -0,0 +1,309 @@
+/*
+ * arm-specific definitions for cortex-a8 and -a9
+ * these are used in C and assembler
+ *
+ * `cortex' refers to the cortex-a8 or -a9.
+ */
+
+#define NREGS 15 /* general-purpose regs, R0 through R14 */
+
+/*
+ * Program Status Registers
+ */
+#define PsrMusr 0x00000010 /* mode */
+#define PsrMfiq 0x00000011
+#define PsrMirq 0x00000012
+#define PsrMsvc 0x00000013 /* `protected mode for OS' */
+#define PsrMmon 0x00000016 /* `secure monitor' (trustzone hyper) */
+#define PsrMabt 0x00000017
+#define PsrMund 0x0000001B
+#define PsrMsys 0x0000001F /* `privileged user mode for OS' (trustzone) */
+#define PsrMask 0x0000001F
+
+#define PsrThumb 0x00000020 /* beware hammers */
+#define PsrDfiq 0x00000040 /* disable FIQ interrupts */
+#define PsrDirq 0x00000080 /* disable IRQ interrupts */
+#define PsrDasabt 0x00000100 /* disable asynch aborts */
+#define PsrBigend 0x00000200
+
+#define PsrJaz 0x01000000 /* java mode */
+
+#define PsrV 0x10000000 /* overflow */
+#define PsrC 0x20000000 /* carry/borrow/extend */
+#define PsrZ 0x40000000 /* zero */
+#define PsrN 0x80000000 /* negative/less than */
+
+#define PsrMbz (PsrJaz|PsrThumb|PsrBigend) /* these bits must be 0 */
+
+/*
+ * MCR and MRC are anti-mnemonic.
+ * MTCP coproc, opcode1, Rd, CRn, CRm[, opcode2] # arm -> coproc
+ * MFCP coproc, opcode1, Rd, CRn, CRm[, opcode2] # coproc -> arm
+ */
+
+#define MTCP MCR
+#define MFCP MRC
+
+/* instruction decoding */
+#define ISCPOP(op) ((op) == 0xE || ((op) & ~1) == 0xC)
+#define ISFPAOP(cp, op) ((cp) == CpOFPA && ISCPOP(op))
+#define ISVFPOP(cp, op) (((cp) == CpDFP || (cp) == CpFP) && ISCPOP(op))
+
+/*
+ * Coprocessors
+ */
+#define CpOFPA 1 /* ancient 7500 FPA */
+#define CpFP 10 /* float FP, VFP cfg. */
+#define CpDFP 11 /* double FP */
+#define CpSC 15 /* System Control */
+
+/*
+ * Primary (CRn) CpSC registers.
+ */
+#define CpID 0 /* ID and cache type */
+#define CpCONTROL 1 /* miscellaneous control */
+#define CpTTB 2 /* Translation Table Base(s) */
+#define CpDAC 3 /* Domain Access Control */
+#define CpFSR 5 /* Fault Status */
+#define CpFAR 6 /* Fault Address */
+#define CpCACHE 7 /* cache/write buffer control */
+#define CpTLB 8 /* TLB control */
+#define CpCLD 9 /* L2 Cache Lockdown, op1==1 */
+#define CpTLD 10 /* TLB Lockdown, with op2 */
+#define CpVECS 12 /* vector bases, op1==0, Crm==0, op2s (cortex) */
+#define CpPID 13 /* Process ID */
+#define CpDTLB 15 /* TLB, L1 cache stuff (cortex) */
+
+/*
+ * CpTTB op1==0, Crm==0 opcode2 values.
+ */
+#define CpTTB0 0 /* secure ttb */
+#define CpTTB1 1 /* non-secure ttb (v7) */
+#define CpTTBctl 2 /* v7 */
+
+/*
+ * CpFSR op1==0, Crm==0 opcode 2 values.
+ */
+#define CpDFSR 0 /* data fault status */
+#define CpIFSR 1 /* instruction fault status */
+
+/*
+ * CpFAR op1==0, Crm==0 opcode 2 values.
+ */
+#define CpDFAR 0 /* data fault address */
+#define CpIFAR 2 /* instruction fault address */
+
+/*
+ * CpID Secondary (CRm) registers.
+ */
+#define CpIDidct 0
+
+/*
+ * CpID CpIDidct op1==0 opcode2 fields.
+ */
+#define CpIDid 0 /* main ID */
+#define CpIDct 1 /* cache type */
+#define CpIDtlb 3 /* tlb type (cortex) */
+#define CpIDmpid 5 /* multiprocessor id (cortex) */
+
+/* CpIDid op1 values */
+#define CpIDcsize 1 /* cache size (cortex) */
+#define CpIDcssel 2 /* cache size select (cortex) */
+
+/*
+ * CpID CpIDidct op1==CpIDcsize opcode2 fields.
+ */
+#define CpIDcasize 0 /* cache size */
+#define CpIDclvlid 1 /* cache-level id */
+
+/*
+ * CpCONTROL op2 codes, op1==0, Crm==0.
+ */
+#define CpMainctl 0 /* sctlr */
+#define CpAuxctl 1
+#define CpCPaccess 2
+
+/*
+ * CpCONTROL: op1==0, CRm==0, op2==CpMainctl.
+ * main control register.
+ * cortex/armv7 has more ops and CRm values.
+ */
+#define CpCmmu 0x00000001 /* M: MMU enable */
+#define CpCalign 0x00000002 /* A: alignment fault enable */
+#define CpCdcache 0x00000004 /* C: data cache on */
+#define CpBigend (1<<7)
+#define CpCsw (1<<10) /* SW: SWP(B) enable (deprecated in v7) */
+#define CpCpredict 0x00000800 /* Z: branch prediction (armv7) */
+#define CpCicache 0x00001000 /* I: instruction cache on */
+#define CpChv 0x00002000 /* V: high vectors */
+#define CpCrr (1<<14) /* RR: round robin vs random cache replacement */
+#define CpCha (1<<17) /* HA: hw access flag enable */
+#define CpCdz (1<<19) /* DZ: divide by zero fault enable (not cortex-a9) */
+#define CpCfi (1<<21) /* FI: fast intrs */
+#define CpCve (1<<24) /* VE: intr vectors enable */
+#define CpCee (1<<25) /* EE: exception endianness: big */
+#define CpCnmfi (1<<27) /* NMFI: non-maskable fast intrs. (RO) */
+#define CpCtre (1<<28) /* TRE: TEX remap enable */
+#define CpCafe (1<<29) /* AFE: access flag (ttb) enable */
+#define CpCte (1<<30) /* TE: thumb exceptions */
+
+#define CpCsbz (1<<31 | CpCte | CpCafe | CpCtre | 1<<26 | CpCee | CpCve | \
+ CpCfi | 3<<19 | CpCha | 1<<15 | 3<<8 | CpBigend) /* must be 0 (armv7) */
+#define CpCsbo (3<<22 | 1<<18 | 1<<16 | CpChv | CpCsw | 017<<3) /* must be 1 (armv7) */
+
+/*
+ * CpCONTROL: op1==0, CRm==0, op2==CpAuxctl.
+ * Auxiliary control register on cortex-a9.
+ * these differ from even the cortex-a8 bits.
+ */
+#define CpACparity (1<<9)
+#define CpACca1way (1<<8) /* cache in a single way */
+#define CpACcaexcl (1<<7) /* exclusive cache */
+#define CpACsmp (1<<6) /* SMP l1 caches coherence; needed for ldrex/strex */
+#define CpAClwr0line (1<<3) /* write full cache line of 0s; see Fullline0 */
+#define CpACl1pref (1<<2) /* l1 prefetch enable */
+#define CpACl2pref (1<<1) /* l2 prefetch enable */
+#define CpACmaintbcast (1<<0) /* broadcast cache & tlb maint. ops */
+
+/*
+ * CpCONTROL Secondary (CRm) registers and opcode2 fields.
+ */
+#define CpCONTROLscr 1
+
+#define CpSCRscr 0 /* secure configuration */
+
+/*
+ * CpCACHE Secondary (CRm) registers and opcode2 fields. op1==0.
+ * In ARM-speak, 'flush' means invalidate and 'clean' means writeback.
+ */
+#define CpCACHEintr 0 /* interrupt (op2==4) */
+#define CpCACHEisi 1 /* inner-sharable I cache (v7) */
+#define CpCACHEpaddr 4 /* 0: phys. addr (cortex) */
+#define CpCACHEinvi 5 /* instruction, branch table */
+#define CpCACHEinvd 6 /* data or unified */
+// #define CpCACHEinvu 7 /* unified (not on cortex) */
+#define CpCACHEva2pa 8 /* va -> pa translation (cortex) */
+#define CpCACHEwb 10 /* writeback */
+#define CpCACHEinvdse 11 /* data or unified by mva */
+#define CpCACHEwbi 14 /* writeback+invalidate */
+
+#define CpCACHEall 0 /* entire (not for invd nor wb(i) on cortex) */
+#define CpCACHEse 1 /* single entry */
+#define CpCACHEsi 2 /* set/index (set/way) */
+#define CpCACHEtest 3 /* test loop */
+#define CpCACHEwait 4 /* wait (prefetch flush on cortex) */
+#define CpCACHEdmbarr 5 /* wb only (cortex) */
+#define CpCACHEflushbtc 6 /* flush branch-target cache (cortex) */
+#define CpCACHEflushbtse 7 /* ⋯ or just one entry in it (cortex) */
+
+/*
+ * CpTLB Secondary (CRm) registers and opcode2 fields.
+ */
+#define CpTLBinvi 5 /* instruction */
+#define CpTLBinvd 6 /* data */
+#define CpTLBinvu 7 /* unified */
+
+#define CpTLBinv 0 /* invalidate all */
+#define CpTLBinvse 1 /* invalidate single entry */
+#define CpTBLasid 2 /* by ASID (cortex) */
+
+/*
+ * CpCLD Secondary (CRm) registers and opcode2 fields for op1==0. (cortex)
+ */
+#define CpCLDena 12 /* enables */
+#define CpCLDcyc 13 /* cycle counter */
+#define CpCLDuser 14 /* user enable */
+
+#define CpCLDenapmnc 0
+#define CpCLDenacyc 1
+
+/*
+ * CpCLD Secondary (CRm) registers and opcode2 fields for op1==1.
+ */
+#define CpCLDl2 0 /* l2 cache */
+
+#define CpCLDl2aux 2 /* auxiliary control */
+
+/*
+ * l2 cache aux. control
+ */
+#define CpCl2ecc (1<<28) /* use ecc, not parity */
+#define CpCl2noldforw (1<<27) /* no ld forwarding */
+#define CpCl2nowrcomb (1<<25) /* no write combining */
+#define CpCl2nowralldel (1<<24) /* no write allocate delay */
+#define CpCl2nowrallcomb (1<<23) /* no write allocate combine */
+#define CpCl2nowralloc (1<<22) /* no write allocate */
+#define CpCl2eccparity (1<<21) /* enable ecc or parity */
+#define CpCl2inner (1<<16) /* inner cacheability */
+/* other bits are tag ram & data ram latencies */
+
+/*
+ * CpTLD Secondary (CRm) registers and opcode2 fields.
+ */
+#define CpTLDlock 0 /* TLB lockdown registers */
+#define CpTLDpreload 1 /* TLB preload */
+
+#define CpTLDi 0 /* TLB instr. lockdown reg. */
+#define CpTLDd 1 /* " data " " */
+
+/*
+ * CpVECS Secondary (CRm) registers and opcode2 fields.
+ */
+#define CpVECSbase 0
+
+#define CpVECSnorm 0 /* (non-)secure base addr */
+#define CpVECSmon 1 /* secure monitor base addr */
+
+/*
+ * MMU page table entries.
+ * memory must be cached, buffered, sharable and wralloc to participate in
+ * automatic L1 cache coherency.
+ */
+#define Mbz (0<<4) /* L1 page tables: must be 0 */
+#define Noexecsect (1<<4) /* L1 sections: no execute */
+#define Fault 0x00000000 /* L[12] pte: unmapped */
+
+#define Coarse (Mbz|1) /* L1: page table */
+#define Section (Mbz|2) /* L1 1MB */
+/*
+ * next 2 bits (L1wralloc & L1sharable) and Buffered and Cached must be
+ * set in l1 ptes for LDREX/STREX to work.
+ */
+#define L1wralloc (1<<12) /* L1 TEX */
+#define L1sharable (1<<16)
+#define L1nonglobal (1<<17) /* tied to asid */
+#define Nonsecuresect (1<<19) /* L1 sections */
+
+#define Large 0x00000001 /* L2 64KB */
+#define Noexecsmall 1 /* L2: no execute */
+#define Small 0x00000002 /* L2 4KB */
+/*
+ * next 3 bits (Buffered, Cached, L2wralloc) & L2sharable must be set in
+ * l2 ptes for memory containing locks because LDREX/STREX require them.
+ */
+#define Buffered 0x00000004 /* L[12]: 0 write-thru, 1 -back */
+#define Cached 0x00000008 /* L[12] */
+#define L2wralloc (1<<6) /* L2 TEX (small pages) */
+#define L2apro (1<<9) /* L2 AP: read only */
+#define L2sharable (1<<10)
+#define L2nonglobal (1<<11) /* tied to asid */
+#define Dom0 0
+
+/* attributes for memory containing locks */
+#define L1ptedramattrs (Cached | Buffered | L1wralloc | L1sharable)
+#define L2ptedramattrs (Cached | Buffered | L2wralloc | L2sharable)
+
+#define Noaccess 0 /* AP, DAC */
+#define Krw 1 /* AP */
+/* armv7 deprecates AP[2] == 1 & AP[1:0] == 2 (Uro), prefers 3 (new in v7) */
+#define Uro 2 /* AP */
+#define Urw 3 /* AP */
+#define Client 1 /* DAC */
+#define Manager 3 /* DAC */
+
+#define AP(n, v) F((v), ((n)*2)+4, 2)
+#define L1AP(ap) (AP(3, (ap)))
+#define L2AP(ap) (AP(0, (ap))) /* armv7 */
+#define DAC(n, v) F((v), (n)*2, 2)
+
+#define HVECTORS 0xffff0000
diff --git a/sys/src/9/teg2/arm.s b/sys/src/9/teg2/arm.s
new file mode 100644
index 000000000..25e4851e4
--- /dev/null
+++ b/sys/src/9/teg2/arm.s
@@ -0,0 +1,132 @@
+/*
+ * nvidia tegra 2 machine assist, definitions
+ * dual-core cortex-a9 processor
+ *
+ * R9 and R10 are used for `extern register' variables.
+ * R11 is used by the loader as a temporary, so avoid it.
+ */
+
+#include "mem.h"
+#include "arm.h"
+
+#undef B /* B is for 'botch' */
+
+#define KADDR(pa) (KZERO | ((pa) & ~KSEGM))
+#define PADDR(va) (PHYSDRAM | ((va) & ~KSEGM))
+
+#define L1X(va) (((((va))>>20) & 0x0fff)<<2)
+
+#define MACHADDR (L1-MACHSIZE) /* only room for cpu0's */
+
+/* L1 pte values */
+#define PTEDRAM (Dom0|L1AP(Krw)|Section|L1ptedramattrs)
+#define PTEIO (Dom0|L1AP(Krw)|Section)
+
+#define DOUBLEMAPMBS 512 /* megabytes of low dram to double-map */
+
+/* steps on R0 */
+#define DELAY(label, mloops) \
+ MOVW $((mloops)*1000000), R0; \
+label: \
+ SUB.S $1, R0; \
+ BNE label
+
+/* print a byte on the serial console; clobbers R0 & R6; needs R12 (SB) set */
+#define PUTC(c) \
+ BARRIERS; \
+ MOVW $(c), R0; \
+ MOVW $PHYSCONS, R6; \
+ MOVW R0, (R6); \
+ BARRIERS
+
+/*
+ * new instructions
+ */
+
+#define SMC WORD $0xe1600070 /* low 4-bits are call # (trustzone) */
+/* flush branch-target cache */
+#define FLBTC MTCP CpSC, 0, PC, C(CpCACHE), C(CpCACHEinvi), CpCACHEflushbtc
+/* flush one entry of the branch-target cache, va in R0 (cortex) */
+#define FLBTSE MTCP CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvi), CpCACHEflushbtse
+
+/* arm v7 arch defines these */
+#define DSB WORD $0xf57ff04f /* data synch. barrier; last f = SY */
+#define DMB WORD $0xf57ff05f /* data mem. barrier; last f = SY */
+#define ISB WORD $0xf57ff06f /* instr. sync. barrier; last f = SY */
+
+#define WFI WORD $0xe320f003 /* wait for interrupt */
+#define NOOP WORD $0xe320f000
+
+#define CLZ(s, d) WORD $(0xe16f0f10 | (d) << 12 | (s)) /* count leading 0s */
+
+#define SETEND(o) WORD $(0xf1010000 | (o) << 9) /* o==0, little-endian */
+
+#define CPSIE WORD $0xf1080080 /* intr enable: zeroes I bit */
+#define CPSID WORD $0xf10c00c0 /* intr disable: sets I,F bits */
+#define CPSAE WORD $0xf1080100 /* async abt enable: zeroes A bit */
+#define CPSMODE(m) WORD $(0xf1020000 | (m)) /* switch to mode m (PsrM*) */
+
+#define CLREX WORD $0xf57ff01f
+
+/* floating point */
+#define VMRS(fp, cpu) WORD $(0xeef00a10 | (fp)<<16 | (cpu)<<12) /* FP → arm */
+#define VMSR(cpu, fp) WORD $(0xeee00a10 | (fp)<<16 | (cpu)<<12) /* arm → FP */
+
+/*
+ * a popular code sequence used to write a pte for va is:
+ *
+ * MOVW R(n), TTB[LnX(va)]
+ * // clean the cache line
+ * DSB
+ * // invalidate tlb entry for va
+ * FLBTC
+ * DSB
+ * PFF (now ISB)
+ */
+#define BARRIERS FLBTC; DSB; ISB
+
+/*
+ * invoked with PTE bits in R2, pa in R3, PTE pointed to by R4.
+ * fill PTE pointed to by R4 and increment R4 past it.
+ * increment R3 by a MB. clobbers R1.
+ */
+#define FILLPTE() \
+ ORR R3, R2, R1; /* pte bits in R2, pa in R3 */ \
+ MOVW R1, (R4); \
+ ADD $4, R4; /* bump PTE address */ \
+ ADD $MiB, R3; /* bump pa */ \
+
+/* zero PTE pointed to by R4 and increment R4 past it. assumes R0 is 0. */
+#define ZEROPTE() \
+ MOVW R0, (R4); \
+ ADD $4, R4; /* bump PTE address */
+
+/*
+ * set kernel SB for zero segment (instead of usual KZERO segment).
+ * NB: the next line puts rubbish in R12:
+ * MOVW $setR12-KZERO(SB), R12
+ */
+#define SETZSB \
+ MOVW $setR12(SB), R12; /* load kernel's SB */ \
+ SUB $KZERO, R12; \
+ ADD $PHYSDRAM, R12
+
+/*
+ * note that 5a's RFE is not the v6/7 arch. instruction (0xf8900a00),
+ * which loads CPSR from the word after the PC at (R13), but rather
+ * the pre-v6 simulation `MOVM.IA.S.W (R13), [R15]' (0xe8fd8000 since
+ * MOVM is LDM in this case), which loads CPSR not from memory but
+ * from SPSR due to `.S'.
+ */
+#define RFEV7(r) WORD $(0xf8900a00 | (r) << 16)
+#define RFEV7W(r) WORD $(0xf8900a00 | (r) << 16 | 0x00200000) /* RFE.W */
+#define RFEV7DB(r) WORD $(0xf9100a00 | (r) << 16) /* RFE.DB */
+#define RFEV7DBW(r) WORD $(0xf9100a00 | (r) << 16 | 0x00200000) /* RFE.DB.W */
+
+#define CKPSR(psr, tmp, bad)
+#define CKCPSR(psrtmp, tmp, bad)
+
+/* return with cpu id in r and condition codes set from "r == 0" */
+#define CPUID(r) \
+ MFCP CpSC, 0, r, C(CpID), C(CpIDidct), CpIDmpid; \
+ AND.S $(MAXMACH-1), r /* mask out non-cpu-id bits */
diff --git a/sys/src/9/teg2/cache-l2-pl310.c b/sys/src/9/teg2/cache-l2-pl310.c
new file mode 100644
index 000000000..275d8699c
--- /dev/null
+++ b/sys/src/9/teg2/cache-l2-pl310.c
@@ -0,0 +1,456 @@
+/*
+ * PL310 level 2 cache (non-architectural bag on the side)
+ *
+ * guaranteed to work incorrectly with default settings; must set Sharovr.
+ *
+ * clean & invalidate (wbinv) is buggy, so we work around erratum 588369
+ * by disabling write-back and cache line-fill before, and restoring after.
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "../port/error.h"
+#include "arm.h"
+
+#define NWAYS(l2p) ((l2p)->auxctl & Assoc16way? 16: 8)
+#define L2P ((L2pl310 *)soc.l2cache)
+
+enum {
+ L2size = 1024 * 1024, /* according to the tegra 2 manual */
+ Wayszgran = 16 * KiB, /* granularity of way sizes */
+};
+
+typedef struct L2pl310 L2pl310;
+typedef struct Pl310op Pl310op;
+
+struct Pl310op {
+ ulong pa;
+ ulong _pad;
+ ulong indexway;
+ ulong way;
+};
+
+struct L2pl310 {
+ ulong id;
+ ulong type;
+ uchar _pad0[0x100 - 0x8];
+ ulong ctl;
+ ulong auxctl;
+
+ uchar _pad1[0x730 - 0x108]; /* boring regs */
+ ulong sync;
+ uchar _pad2[0x740 - 0x734];
+ ulong r3p0sync; /* workaround for r3p0 bug */
+ uchar _pad3[0x770 - 0x744];
+ Pl310op inv; /* inv.indexway doesn't exist */
+ uchar _pad4[0x7b0 - 0x780];
+ Pl310op clean;
+ uchar _pad5[0x7f0 - 0x7c0];
+ Pl310op cleaninv;
+ uchar _pad6[0xc00 - 0x7d0];
+ ulong filtstart;
+ ulong filtend;
+ uchar _pad6[0xf40 - 0xc08];
+ ulong debug;
+ /* ... */
+};
+
+enum {
+ /* ctl bits */
+ L2enable = 1,
+
+ /* auxctl bits */
+ Ipref = 1<<29, /* prefetch enables */
+ Dpref = 1<<28,
+ Mbo = 1<<25,
+ Sharovr = 1<<22, /* shared attribute override (i.e., work right!) */
+ Parity = 1<<21,
+ Waycfgshift= 17,
+ Waycfgmask = (1<<3) - 1,
+ Assoc16way = 1<<16,
+ /*
+ * optim'n to 0 cache lines; must be enabled in a9(?!).
+ * set CpAClwr0line on all cpus 1st.
+ */
+ Fullline0= 1<<0,
+
+ /* debug bits */
+ Wt = 1<<1, /* write-through, not write-back */
+ Nolinefill= 1<<0,
+
+ Basecfg = Wt | Nolinefill,
+};
+
+static Lock l2lock;
+static int disallowed; /* by user: *l2off= in plan9.ini */
+static int l2ison;
+static int bg_op_running;
+static ulong waysmask;
+
+static Cacheimpl l2cacheimpl;
+
+static void
+awaitbgop(void)
+{
+ while (bg_op_running)
+ ;
+}
+
+static void
+getlock(void)
+{
+ awaitbgop(); /* wait at normal PL first */
+ ilock(&l2lock);
+ awaitbgop(); /* wait under lock */
+}
+
+static void
+l2pl310sync(void)
+{
+ L2P->sync = 0;
+ coherence();
+}
+
+/* call this first to set sets/ways configuration */
+void
+l2pl310init(void)
+{
+ int waysz, nways;
+ ulong new;
+ L2pl310 *l2p = L2P;
+ static int configed;
+
+ if (getconf("*l2off") != nil) {
+// iprint("l2 cache (pl310) disabled\n");
+ disallowed = 1;
+ return;
+ }
+ if (l2ison || configed)
+ return;
+ l2cache = &l2cacheimpl;
+ cachedwb();
+
+ /*
+ * default config is:
+ * l2: ext unified, 8 ways 512 sets 32 bytes/line => 128KB
+ * but the tegra 2 manual says there's 1MB available.
+ * ways or way-size may be fixed by hardware; the only way to tell
+ * is to try to change the setting and read it back.
+ */
+ l2pl310sync();
+ l2cache->inv();
+
+ /* figure out number of ways */
+ l2pl310sync();
+ nways = NWAYS(l2p);
+ if (!(l2p->auxctl & Assoc16way)) {
+ l2p->auxctl |= Assoc16way;
+ coherence();
+ l2pl310sync();
+ nways = NWAYS(l2p);
+// iprint("\nl2: was set for 8 ways, asked for 16, got %d\n", nways);
+ }
+ waysmask = MASK(nways);
+
+ /* figure out way size (and thus number of sets) */
+ waysz = L2size / nways;
+ new = l2p->auxctl & ~(Waycfgmask << Waycfgshift) |
+ (log2(waysz / Wayszgran) + 1) << Waycfgshift;
+ l2p->auxctl = new;
+ coherence();
+ l2pl310sync();
+ l2cache->inv();
+
+// iprint("\nl2: configed %d ways, %d sets (way size %d)\n", nways,
+// waysz / CACHELINESZ, waysz);
+ if (l2p->auxctl != new)
+ iprint("l2 config %#8.8lux didn't stick; is now %#8.8lux\n",
+ new, l2p->auxctl);
+ configed++;
+}
+
+void
+l2pl310info(Memcache *cp)
+{
+ int pow2;
+ ulong waysz;
+ L2pl310 *l2p = L2P;
+
+ memset(cp, 0, sizeof *cp);
+ if (!l2ison)
+ return;
+
+ l2pl310init();
+ assert((l2p->id >> 24) == 'A');
+ cp->level = 2;
+ cp->type = Unified;
+ cp->external = Extcache;
+ cp->setsways = Cara | Cawa | Cawt | Cawb;
+ cp->l1ip = 3<<14; /* PIPT */
+ cp->setsh = cp->waysh = 0; /* bag on the side */
+
+ cp->linelen = CACHELINESZ;
+ cp->log2linelen = log2(CACHELINESZ);
+
+ cp->nways = NWAYS(l2p);
+ pow2 = ((l2p->auxctl >> Waycfgshift) & Waycfgmask) - 1;
+ if (pow2 < 0)
+ pow2 = 0;
+ waysz = (1 << pow2) * Wayszgran;
+ cp->nsets = waysz / CACHELINESZ;
+}
+
+void
+l2pl310on(void)
+{
+ ulong ctl;
+ L2pl310 *l2p = L2P;
+
+ if (getconf("*l2off") != nil) {
+// iprint("l2 cache (pl310) disabled\n");
+ disallowed = 1;
+ return;
+ }
+ if (l2ison)
+ return;
+
+ l2pl310init();
+ l2cache->inv();
+
+ /*
+ * drain l1. can't turn it off (which would make locks not work)
+ * because doing so makes references below to the l2 registers wedge
+ * the system.
+ */
+ cacheuwbinv();
+ cacheiinv();
+
+ /*
+ * this is only called once, on cpu0 at startup,
+ * so we don't need locks here.
+ * must do all configuration before enabling l2 cache.
+ */
+ l2p->filtend = 0;
+ coherence();
+ l2p->filtstart = 0; /* no enable bit */
+ l2p->debug = 0; /* write-back, line fills allowed */
+ coherence();
+
+ ctl = l2p->auxctl;
+ /* don't change number of sets & ways, but reset all else. */
+ ctl &= Waycfgmask << Waycfgshift | Assoc16way;
+ ctl |= Sharovr; /* actually work correctly for a change */
+ ctl |= Mbo | Ipref | Dpref | Parity | Fullline0;
+ l2p->auxctl = ctl;
+ coherence();
+
+ l2p->ctl |= L2enable;
+ coherence();
+
+ l2ison = 1;
+
+// iprint("l2 cache (pl310) now on\n");
+}
+
+void
+l2pl310off(void)
+{
+ if (!l2ison)
+ return;
+ l2cache->wbinv();
+ getlock();
+ L2P->ctl &= ~L2enable;
+ coherence();
+ l2ison = 0;
+ iunlock(&l2lock);
+}
+
+
+static void
+applyrange(ulong *reg, void *ava, int len)
+{
+ uintptr va, endva;
+
+ if (disallowed || !l2ison)
+ return;
+ if (len < 0)
+ panic("l2cache*se called with negative length");
+ endva = (uintptr)ava + len;
+ for (va = (uintptr)ava & ~(CACHELINESZ-1); va < endva;
+ va += CACHELINESZ)
+ *reg = PADDR(va);
+ l2pl310sync();
+}
+
+void
+l2pl310invse(void *va, int bytes)
+{
+ uintptr start, end;
+ L2pl310 *l2p = L2P;
+
+ /*
+ * if start & end addresses are not on cache-line boundaries,
+ * flush first & last cachelines before invalidating.
+ */
+ start = (uintptr)va;
+ end = start + bytes;
+ getlock();
+ if (start % CACHELINESZ != 0) {
+// iprint("l2pl310invse: unaligned start %#p from %#p\n", start,
+// getcallerpc(&va));
+ applyrange(&l2p->clean.pa, va, 1);
+ }
+ if (end % CACHELINESZ != 0) {
+// iprint("l2pl310invse: unaligned end %#p from %#p\n", end,
+// getcallerpc(&va));
+ applyrange(&l2p->clean.pa, (char *)va + bytes, 1);
+ }
+
+ applyrange(&l2p->inv.pa, va, bytes);
+ iunlock(&l2lock);
+}
+
+void
+l2pl310wbse(void *va, int bytes)
+{
+ getlock();
+ applyrange(&L2P->clean.pa, va, bytes);
+ iunlock(&l2lock);
+}
+
+/*
+ * assume that ldrex/strex (thus locks) won't work when Wt in is effect,
+ * so don't manipulate locks between setting and clearing Wt.
+ */
+void
+l2pl310wbinvse(void *va, int bytes)
+{
+ int odb;
+ L2pl310 *l2p = L2P;
+
+ if (!l2ison)
+ return;
+ getlock();
+ applyrange(&l2p->clean.pa, va, bytes); /* paranoia */
+
+ odb = l2p->debug;
+ l2p->debug |= Wt | Nolinefill; /* erratum workaround */
+ coherence();
+
+ applyrange(&l2p->cleaninv.pa, va, bytes);
+
+ l2p->debug = odb;
+ iunlock(&l2lock);
+}
+
+
+/*
+ * we want to wait for completion at normal PL.
+ * if waiting is interrupted, interrupt code that calls
+ * these ops could deadlock on a uniprocessor, so we only
+ * give up l2lock before waiting on multiprocessors.
+ * in this port, only cpu 0 gets interrupts other than local timer ones.
+ */
+
+void
+l2pl310inv(void)
+{
+ L2pl310 *l2p = L2P;
+
+ if (disallowed)
+ return;
+
+ getlock();
+ bg_op_running = 1;
+ l2p->inv.way = waysmask;
+ coherence();
+ if (conf.nmach > 1)
+ iunlock(&l2lock);
+
+ while (l2p->inv.way & waysmask)
+ ;
+
+ if (conf.nmach > 1)
+ ilock(&l2lock);
+ l2pl310sync();
+ bg_op_running = 0;
+ iunlock(&l2lock);
+}
+
+/*
+ * maximum time seen is 2542µs, typical is 625µs.
+ */
+void
+l2pl310wb(void)
+{
+ L2pl310 *l2p = L2P;
+
+ if (disallowed || !l2ison)
+ return;
+
+ getlock();
+ bg_op_running = 1;
+ l2p->clean.way = waysmask;
+ coherence();
+ if (conf.nmach > 1)
+ iunlock(&l2lock);
+
+ while (l2p->clean.way & waysmask)
+ ;
+
+ if (conf.nmach > 1)
+ ilock(&l2lock);
+ l2pl310sync();
+ bg_op_running = 0;
+ iunlock(&l2lock);
+}
+
+void
+l2pl310wbinv(void)
+{
+ int odb;
+ L2pl310 *l2p = L2P;
+
+ if (disallowed || !l2ison)
+ return;
+
+ l2pl310wb(); /* paranoia */
+
+ getlock();
+ bg_op_running = 1;
+ odb = l2p->debug;
+ l2p->debug |= Wt | Nolinefill; /* erratum workaround */
+ coherence();
+
+ l2p->cleaninv.way = waysmask;
+ coherence();
+ if (conf.nmach > 1)
+ iunlock(&l2lock);
+
+ while (l2p->cleaninv.way & waysmask)
+ ;
+
+ if (conf.nmach > 1)
+ ilock(&l2lock);
+ l2pl310sync();
+ l2p->debug = odb;
+ bg_op_running = 0;
+ iunlock(&l2lock);
+}
+
+static Cacheimpl l2cacheimpl = {
+ .info = l2pl310info,
+ .on = l2pl310on,
+ .off = l2pl310off,
+
+ .inv = l2pl310inv,
+ .wb = l2pl310wb,
+ .wbinv = l2pl310wbinv,
+
+ .invse = l2pl310invse,
+ .wbse = l2pl310wbse,
+ .wbinvse= l2pl310wbinvse,
+};
diff --git a/sys/src/9/teg2/cache.v7.s b/sys/src/9/teg2/cache.v7.s
new file mode 100644
index 000000000..15c94b6d4
--- /dev/null
+++ b/sys/src/9/teg2/cache.v7.s
@@ -0,0 +1,240 @@
+/*
+ * cortex arm arch v7 cache flushing and invalidation
+ * included by l.s and rebootcode.s
+ */
+
+TEXT cacheiinv(SB), $-4 /* I invalidate */
+ MOVW $0, R0
+ MTCP CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvi), CpCACHEall /* ok on cortex */
+ ISB
+ RET
+
+/*
+ * set/way operators, passed a suitable set/way value in R0.
+ */
+TEXT cachedwb_sw(SB), $-4
+ MTCP CpSC, 0, R0, C(CpCACHE), C(CpCACHEwb), CpCACHEsi
+ RET
+
+TEXT cachedwbinv_sw(SB), $-4
+ MTCP CpSC, 0, R0, C(CpCACHE), C(CpCACHEwbi), CpCACHEsi
+ RET
+
+TEXT cachedinv_sw(SB), $-4
+ MTCP CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvd), CpCACHEsi
+ RET
+
+ /* set cache size select */
+TEXT setcachelvl(SB), $-4
+ MTCP CpSC, CpIDcssel, R0, C(CpID), C(CpIDidct), 0
+ ISB
+ RET
+
+ /* return cache sizes */
+TEXT getwayssets(SB), $-4
+ MFCP CpSC, CpIDcsize, R0, C(CpID), C(CpIDidct), 0
+ RET
+
+/*
+ * l1 cache operations.
+ * l1 and l2 ops are intended to be called from C, thus need save no
+ * caller's regs, only those we need to preserve across calls.
+ */
+
+TEXT cachedwb(SB), $-4
+ MOVW.W R14, -8(R13)
+ MOVW $cachedwb_sw(SB), R0
+ MOVW $1, R8
+ BL wholecache(SB)
+ MOVW.P 8(R13), R15
+
+TEXT cachedwbinv(SB), $-4
+ MOVW.W R14, -8(R13)
+ MOVW $cachedwbinv_sw(SB), R0
+ MOVW $1, R8
+ BL wholecache(SB)
+ MOVW.P 8(R13), R15
+
+TEXT cachedinv(SB), $-4
+ MOVW.W R14, -8(R13)
+ MOVW $cachedinv_sw(SB), R0
+ MOVW $1, R8
+ BL wholecache(SB)
+ MOVW.P 8(R13), R15
+
+TEXT cacheuwbinv(SB), $-4
+ MOVM.DB.W [R14], (R13) /* save lr on stack */
+ MOVW CPSR, R1
+ CPSID /* splhi */
+
+ MOVM.DB.W [R1], (R13) /* save R1 on stack */
+
+ BL cachedwbinv(SB)
+ BL cacheiinv(SB)
+
+ MOVM.IA.W (R13), [R1] /* restore R1 (saved CPSR) */
+ MOVW R1, CPSR
+ MOVM.IA.W (R13), [R14] /* restore lr */
+ RET
+
+/*
+ * architectural l2 cache operations
+ */
+
+TEXT _l2cacheuwb(SB), $-4
+ MOVW.W R14, -8(R13)
+ MOVW $cachedwb_sw(SB), R0
+ MOVW $2, R8
+ BL wholecache(SB)
+ MOVW.P 8(R13), R15 /* return */
+
+TEXT _l2cacheuwbinv(SB), $-4
+ MOVW.W R14, -8(R13)
+ MOVW CPSR, R1
+ CPSID /* splhi */
+
+ MOVM.DB.W [R1], (R13) /* save R1 on stack */
+
+ MOVW $cachedwbinv_sw(SB), R0
+ MOVW $2, R8
+ BL wholecache(SB)
+
+ BL _l2cacheuinv(SB)
+
+ MOVM.IA.W (R13), [R1] /* restore R1 (saved CPSR) */
+ MOVW R1, CPSR
+ MOVW.P 8(R13), R15 /* return */
+
+TEXT _l2cacheuinv(SB), $-4
+ MOVW.W R14, -8(R13)
+ MOVW $cachedinv_sw(SB), R0
+ MOVW $2, R8
+ BL wholecache(SB)
+ MOVW.P 8(R13), R15 /* return */
+
+/*
+ * callers are assumed to be the above l1 and l2 ops.
+ * R0 is the function to call in the innermost loop.
+ * R8 is the cache level (1-origin: 1 or 2).
+ *
+ * R0 func to call at entry
+ * R1 func to call after entry
+ * R2 nsets
+ * R3 way shift (computed from R8)
+ * R4 set shift (computed from R8)
+ * R5 nways
+ * R6 set scratch
+ * R7 way scratch
+ * R8 cache level, 0-origin
+ * R9 extern reg up
+ * R10 extern reg m
+ *
+ * initial translation by 5c, then massaged by hand.
+ */
+TEXT wholecache+0(SB), $-4
+ MOVW CPSR, R2
+ MOVM.DB.W [R2,R14], (SP) /* save regs on stack */
+
+ MOVW R0, R1 /* save argument for inner loop in R1 */
+ SUB $1, R8 /* convert cache level to zero origin */
+
+ /* we might not have the MMU on yet, so map R1 (func) to R14's space */
+ MOVW R14, R0 /* get R14's segment ... */
+ AND $KSEGM, R0
+ BIC $KSEGM, R1 /* strip segment from func address */
+ ORR R0, R1 /* combine them */
+
+ /* get cache sizes */
+ SLL $1, R8, R0 /* R0 = (cache - 1) << 1 */
+ MTCP CpSC, CpIDcssel, R0, C(CpID), C(CpIDidct), 0 /* set cache select */
+ ISB
+ MFCP CpSC, CpIDcsize, R0, C(CpID), C(CpIDidct), 0 /* get cache sizes */
+
+ /* compute # of ways and sets for this cache level */
+ SRA $3, R0, R5 /* R5 (ways) = R0 >> 3 */
+ AND $((1<<10)-1), R5 /* R5 = (R0 >> 3) & MASK(10) */
+ ADD $1, R5 /* R5 (ways) = ((R0 >> 3) & MASK(10)) + 1 */
+
+ SRA $13, R0, R2 /* R2 = R0 >> 13 */
+ AND $((1<<15)-1), R2 /* R2 = (R0 >> 13) & MASK(15) */
+ ADD $1, R2 /* R2 (sets) = ((R0 >> 13) & MASK(15)) + 1 */
+
+ /* precompute set/way shifts for inner loop */
+ MOVW $(CACHECONF+0), R3 /* +0 = l1waysh */
+ MOVW $(CACHECONF+4), R4 /* +4 = l1setsh */
+ CMP $0, R8 /* cache == 1? */
+ ADD.NE $(4*2), R3 /* no, assume l2: +8 = l2waysh */
+ ADD.NE $(4*2), R3 /* +12 = l2setsh */
+
+ MOVW R14, R0 /* get R14's segment ... */
+ AND $KSEGM, R0
+
+ BIC $KSEGM, R3 /* strip segment from address */
+ ORR R0, R3 /* combine them */
+ BIC $KSEGM, R4 /* strip segment from address */
+ ORR R0, R4 /* combine them */
+ MOVW (R3), R3
+ MOVW (R4), R4
+
+ CMP $0, R3 /* sanity checks */
+ BEQ wbuggery
+ CMP $0, R4
+ BEQ sbuggery
+
+ CPSID /* splhi to make entire op atomic */
+ BARRIERS
+
+ /* iterate over ways */
+ MOVW $0, R7 /* R7: way */
+outer:
+ /* iterate over sets */
+ MOVW $0, R6 /* R6: set */
+inner:
+ /* compute set/way register contents */
+ SLL R3, R7, R0 /* R0 = way << R3 (L?WAYSH) */
+ ORR R8<<1, R0 /* R0 = way << L?WAYSH | (cache - 1) << 1 */
+ ORR R6<<R4, R0 /* R0 = way<<L?WAYSH | (cache-1)<<1 |set<<R4 */
+
+ BL (R1) /* call set/way operation with R0 arg. */
+
+ ADD $1, R6 /* set++ */
+ CMP R2, R6 /* set >= sets? */
+ BLT inner /* no, do next set */
+
+ ADD $1, R7 /* way++ */
+ CMP R5, R7 /* way >= ways? */
+ BLT outer /* no, do next way */
+
+ MOVM.IA.W (SP), [R2,R14] /* restore regs */
+ BARRIERS
+ MOVW R2, CPSR /* splx */
+
+ RET
+
+wbuggery:
+ PUTC('?')
+ PUTC('c')
+ PUTC('w')
+ B topanic
+sbuggery:
+ PUTC('?')
+ PUTC('c')
+ PUTC('s')
+topanic:
+ MOVW $.string<>+0(SB), R0
+ BIC $KSEGM, R0 /* strip segment from address */
+ MOVW R14, R1 /* get R14's segment ... */
+ AND $KSEGM, R1
+ ORR R1, R0 /* combine them */
+ SUB $12, R13 /* not that it matters, since we're panicing */
+ MOVW R14, 8(R13)
+ BL panic(SB) /* panic("msg %#p", LR) */
+bugloop:
+ WFI
+ B bugloop
+
+ DATA .string<>+0(SB)/8,$"bad cach"
+ DATA .string<>+8(SB)/8,$"e params"
+ DATA .string<>+16(SB)/8,$"\073 pc %\043p"
+ DATA .string<>+24(SB)/1,$"\z"
+ GLOBL .string<>+0(SB),$25
diff --git a/sys/src/9/teg2/caches-v7.c b/sys/src/9/teg2/caches-v7.c
new file mode 100644
index 000000000..5d8a831e7
--- /dev/null
+++ b/sys/src/9/teg2/caches-v7.c
@@ -0,0 +1,106 @@
+/*
+ * caches defined by arm v7 architecture
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "io.h"
+#include "arm.h"
+
+static char *
+l1iptype(uint type)
+{
+ static char *types[] = {
+ "reserved",
+ "asid-tagged VIVT",
+ "VIPT",
+ "PIPT",
+ };
+
+ if (type >= nelem(types) || types[type] == nil)
+ return "GOK";
+ return types[type];
+}
+
+static char *catype[] = {
+ "none,",
+ "i,",
+ "d,",
+ "split i&d,",
+ "unified,",
+ "gok,",
+ "gok,",
+ "gok,",
+};
+
+void
+cacheinfo(int level, Memcache *cp, int ext, int type)
+{
+ ulong setsways;
+
+ memset(cp, 0, sizeof *cp);
+ if (type == Nocache)
+ return;
+ cp->level = level;
+ cp->type = type;
+ cp->external = ext;
+ if (level == 2) { /* external PL310 */
+ allcache->info(cp);
+ setsways = cp->setsways;
+ } else {
+ /* select internal cache level */
+ cpwrsc(CpIDcssel, CpID, CpIDid, 0, (level - 1) << 1);
+
+ setsways = cprdsc(CpIDcsize, CpID, CpIDid, 0);
+ cp->l1ip = cpctget();
+ cp->nways = ((setsways >> 3) & MASK(10)) + 1;
+ cp->nsets = ((setsways >> 13) & MASK(15)) + 1;
+ cp->log2linelen = (setsways & MASK(2)) + 2 + 2;
+ }
+ cp->linelen = 1 << cp->log2linelen;
+ cp->setsways = setsways;
+ cp->setsh = cp->log2linelen;
+ cp->waysh = 32 - log2(cp->nways);
+}
+
+void
+allcacheinfo(Memcache *mc)
+{
+ int n;
+ ulong lvl;
+
+ lvl = cprdsc(CpIDcsize, CpID, CpIDidct, CpIDclvlid);
+ n = 1;
+ for (lvl &= MASK(21); lvl; lvl >>= 3)
+ cacheinfo(n, &mc[n], Intcache, lvl & MASK(3));
+// cacheinfo(2, &mc[2], Extcache, Unified); /* PL310 */
+}
+
+void
+prcachecfg(void)
+{
+ int cache;
+ Memcache *mc;
+
+ for (cache = 1; cache < 8 && cachel[cache].type; cache++) {
+ mc = &cachel[cache];
+ iprint("l%d: %s %-10s %2d ways %4d sets %d bytes/line; can W[",
+ mc->level, mc->external? "ext": "int", catype[mc->type],
+ mc->nways, mc->nsets, mc->linelen);
+ if (mc->linelen != CACHELINESZ)
+ iprint(" *should* be %d", CACHELINESZ);
+ if (mc->setsways & Cawt)
+ iprint("T");
+ if (mc->setsways & Cawb)
+ iprint("B");
+ if (mc->setsways & Cawa)
+ iprint("A");
+ iprint("]");
+ if (cache == 1)
+ iprint("; l1-i %s", l1iptype((mc->l1ip >> 14) & MASK(2)));
+ iprint("\n");
+ }
+}
diff --git a/sys/src/9/teg2/caches.c b/sys/src/9/teg2/caches.c
new file mode 100644
index 000000000..6c04f0557
--- /dev/null
+++ b/sys/src/9/teg2/caches.c
@@ -0,0 +1,198 @@
+/*
+ * operations on all memory data or unified caches, a no-op cache,
+ * and an l1-only cache ops cache.
+ * i-caches are not handled here.
+ *
+ * there are only three cache operations that we care about:
+ * force cache contents to memory (before dma out or shutdown),
+ * ignore cache contents in favour of memory (initialisation, after dma in),
+ * both (update page tables and force cpu to read new contents).
+ */
+
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "../port/error.h"
+
+static Cacheimpl allcaches, nullcaches, l1caches;
+
+void
+cachesinfo(Memcache *cp)
+{
+ memset(cp, 0, sizeof *cp);
+ cp->setsways = Cara | Cawa | Cawt | Cawb;
+ cp->l1ip = 3<<14; /* PIPT */
+ cp->log2linelen = log2(CACHELINESZ);
+}
+
+void
+allcacheson(void)
+{
+ l2pl310init();
+ allcache = &allcaches;
+ nocache = &nullcaches;
+ l1cache = &l1caches;
+}
+
+void
+cachesoff(void)
+{
+ l2cache->off();
+}
+
+void
+cachesinvse(void *va, int bytes)
+{
+ int s;
+
+ s = splhi();
+ l2cache->invse(va, bytes);
+ cachedinvse(va, bytes);
+ splx(s);
+}
+
+void
+cacheswbse(void *va, int bytes)
+{
+ int s;
+
+ s = splhi();
+ cachedwbse(va, bytes);
+ l2cache->wbse(va, bytes);
+ splx(s);
+}
+
+void
+cacheswbinvse(void *va, int bytes)
+{
+ int s;
+
+ s = splhi();
+ cachedwbse(va, bytes);
+ l2cache->wbinvse(va, bytes);
+ cachedwbinvse(va, bytes);
+ splx(s);
+}
+
+
+void
+cachesinv(void)
+{
+ int s;
+
+ s = splhi();
+ l2cache->inv();
+ cachedinv();
+ splx(s);
+}
+
+void
+cacheswb(void)
+{
+ int s;
+
+ s = splhi();
+ cachedwb();
+ l2cache->wb();
+ splx(s);
+}
+
+void
+cacheswbinv(void)
+{
+ int s;
+
+ s = splhi();
+ cachedwb();
+ l2cache->wbinv();
+ cachedwbinv();
+ splx(s);
+}
+
+static Cacheimpl allcaches = {
+ .info = cachesinfo,
+ .on = allcacheson,
+ .off = cachesoff,
+
+ .inv = cachesinv,
+ .wb = cacheswb,
+ .wbinv = cacheswbinv,
+
+ .invse = cachesinvse,
+ .wbse = cacheswbse,
+ .wbinvse= cacheswbinvse,
+};
+
+
+/*
+ * null cache ops
+ */
+
+void
+nullinfo(Memcache *cp)
+{
+ memset(cp, 0, sizeof *cp);
+ cp->log2linelen = 2;
+}
+
+void
+nullon(void)
+{
+ nocache = &nullcaches;
+}
+
+void
+nullop(void)
+{
+}
+
+void
+nullse(void *, int)
+{
+}
+
+static Cacheimpl nullcaches = {
+ .info = nullinfo,
+ .on = nullon,
+ .off = nullop,
+
+ .inv = nullop,
+ .wb = nullop,
+ .wbinv = nullop,
+
+ .invse = nullse,
+ .wbse = nullse,
+ .wbinvse= nullse,
+};
+
+/*
+ * l1-only ops
+ */
+
+void
+l1cachesinfo(Memcache *)
+{
+}
+
+void
+l1cacheson(void)
+{
+ l1cache = &l1caches;
+}
+
+static Cacheimpl l1caches = {
+ .info = l1cachesinfo,
+ .on = l1cacheson,
+ .off = nullop,
+
+ .inv = cachedinv,
+ .wb = cachedwb,
+ .wbinv = cachedwbinv,
+
+ .invse = cachedinvse,
+ .wbse = cachedwbse,
+ .wbinvse= cachedwbinvse,
+};
diff --git a/sys/src/9/teg2/clock-tegra.c b/sys/src/9/teg2/clock-tegra.c
new file mode 100644
index 000000000..a90e9b973
--- /dev/null
+++ b/sys/src/9/teg2/clock-tegra.c
@@ -0,0 +1,138 @@
+/*
+ * tegra 2 SoC clocks; excludes cortex-a timers.
+ *
+ * SoC provides these shared clocks:
+ * 4 29-bit count-down `timers' @ 1MHz,
+ * 1 32-bit count-up time-stamp counter @ 1MHz,
+ * and a real-time clock @ 32KHz.
+ * the tegra watchdog (tegra 2 ref man §5.4.1) is tied to timers, not rtc.
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "arm.h"
+
+typedef struct Shrdtmr Shrdtmr;
+typedef struct µscnt µscnt;
+
+/* tegra2 shared-intr timer registers */
+struct Shrdtmr { /* 29-bit count-down timer (4); unused */
+ ulong trigger;
+ ulong prescnt;
+};
+
+enum {
+ /* trigger bits */
+ Enable = 1u<<31,
+ Periodintr = 1<<30,
+ Countmask = MASK(29),
+
+ /* prescnt bits */
+ Intrclr = 1<<30,
+ /* Countmask is ro */
+};
+
+struct µscnt { /* tegra2 shared 32-bit count-up µs counter (1) */
+ ulong cntr;
+ /*
+ * oscillator clock fraction - 1; initially 0xb (11) from u-boot
+ * for 12MHz periphclk.
+ */
+ ulong cfg;
+ uchar _pad0[0x3c - 0x8];
+ ulong freeze;
+};
+
+enum {
+ /* cfg bits */
+ Dividendshift = 8,
+ Dividendmask = MASK(8),
+ Divisorshift = 0,
+ Divisormask = MASK(8),
+};
+
+void
+tegclockintr(void)
+{
+ int junk;
+ Shrdtmr *tmr;
+
+ /* appease the tegra dog */
+ tmr = (Shrdtmr *)soc.tmr[0];
+ junk = tmr->trigger;
+ USED(junk);
+}
+
+/*
+ * if on cpu0, shutdown the shared tegra2 watchdog timer.
+ */
+void
+tegclockshutdown(void)
+{
+ Shrdtmr *tmr;
+
+ if (m->machno == 0) {
+ tmr = (Shrdtmr *)soc.tmr[0];
+ tmr->prescnt = tmr->trigger = 0;
+ coherence();
+ }
+}
+
+void
+tegwdogintr(Ureg *, void *v)
+{
+ int junk;
+ Shrdtmr *tmr;
+
+ tmr = (Shrdtmr *)v;
+ tmr->prescnt |= Intrclr;
+ coherence();
+ /* the lousy documentation says we also have to read trigger */
+ junk = tmr->trigger;
+ USED(junk);
+}
+
+/* start tegra2 shared watch dog */
+void
+tegclock0init(void)
+{
+ Shrdtmr *tmr;
+
+ tmr = (Shrdtmr *)soc.tmr[0];
+ irqenable(Tn0irq, tegwdogintr, tmr, "tegra watchdog");
+
+ /*
+ * tegra watchdog only fires on the second missed interrupt, thus /2.
+ */
+ tmr->trigger = (Dogsectimeout * Mhz / 2 - 1) | Periodintr | Enable;
+ coherence();
+}
+
+/*
+ * µscnt is a freerunning timer (cycle counter); it needs no
+ * initialisation, wraps and does not dispatch interrupts.
+ */
+void
+tegclockinit(void)
+{
+ ulong old;
+ µscnt *µs = (µscnt *)soc.µs;
+
+ /* verify µs counter sanity */
+ assert(µs->cfg == 0xb); /* set by u-boot */
+ old = µs->cntr;
+ delay(1);
+ assert(old != µs->cntr);
+}
+
+ulong
+perfticks(void) /* MHz rate, assumed by timing loops */
+{
+ ulong v;
+
+ /* keep it non-zero to prevent m->fastclock ever going to zero. */
+ v = ((µscnt *)soc.µs)->cntr;
+ return v == 0? 1: v;
+}
diff --git a/sys/src/9/teg2/clock.c b/sys/src/9/teg2/clock.c
new file mode 100644
index 000000000..d54722957
--- /dev/null
+++ b/sys/src/9/teg2/clock.c
@@ -0,0 +1,624 @@
+/*
+ * cortex-a clocks; excludes tegra 2 SoC clocks
+ *
+ * cortex-a processors include private `global' and local timers
+ * at soc.scu + 0x200 (global) and + 0x600 (local).
+ * the global timer is a single count-up timer shared by all cores
+ * but with per-cpu comparator and auto-increment registers.
+ * a local count-down timer can be used as a watchdog.
+ *
+ * v7 arch provides a 32-bit count-up cycle counter (at about 1GHz in our case)
+ * but it's unsuitable as our source of fastticks, because it stops advancing
+ * when the cpu is suspended by WFI.
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "arm.h"
+
+enum {
+ Debug = 0,
+
+ Basetickfreq = Mhz, /* soc.µs rate in Hz */
+ /* the local timers seem to run at half the expected rate */
+ Clockfreqbase = 250*Mhz / 2, /* private timer rate (PERIPHCLK/2) */
+ Tcycles = Clockfreqbase / HZ, /* cycles per clock tick */
+
+ MinPeriod = Tcycles / 100,
+ MaxPeriod = Tcycles,
+
+ Dogtimeout = Dogsectimeout * Clockfreqbase,
+};
+
+typedef struct Ltimer Ltimer;
+typedef struct Pglbtmr Pglbtmr;
+typedef struct Ploctmr Ploctmr;
+
+/*
+ * cortex-a private-intr local timer registers. all cpus see their
+ * own local timers at the same base address.
+ */
+struct Ltimer {
+ ulong load; /* new value + 1 */
+ ulong cnt; /* counts down */
+ ulong ctl;
+ ulong isr;
+
+ /* watchdog only */
+ ulong wdrst;
+ ulong wddis; /* wo */
+
+ ulong _pad0[2];
+};
+struct Ploctmr {
+ Ltimer loc;
+ Ltimer wd;
+};
+
+enum {
+ /* ctl bits */
+ Tmrena = 1<<0, /* timer enabled */
+ Wdogena = Tmrena, /* watchdog enabled */
+ Xreload = 1<<1, /* reload on intr; periodic interrupts */
+ Tintena = 1<<2, /* enable irq 29 at cnt==0 (30 for watchdog) */
+ Wdog = 1<<3, /* watchdog, not timer, mode */
+ Xsclrshift = 8,
+ Xsclrmask = MASK(8),
+
+ /* isr bits */
+ Xisrclk = 1<<0, /* write to clear */
+
+ /* wdrst bits */
+ Wdrst = 1<<0,
+
+ /* wddis values */
+ Wdon = 1,
+ Wdoff1 = 0x12345678, /* send these two to switch to timer mode */
+ Wdoff2 = 0x87654321,
+};
+
+/* cortex-a private-intr globl timer registers */
+struct Pglbtmr {
+ ulong cnt[2]; /* counts up; little-endian uvlong */
+ ulong ctl;
+ ulong isr;
+ ulong cmp[2]; /* little-endian uvlong */
+ ulong inc;
+};
+
+enum {
+ /* unique ctl bits (otherwise see X* above) */
+ Gcmp = 1<<1,
+// Gtintena= 1<<2, /* enable irq 27 */
+ Gincr = 1<<3,
+};
+
+/*
+ * until 5[cl] inline vlong ops, avoid them where possible,
+ * they are currently slow function calls.
+ */
+typedef union Vlong Vlong;
+union Vlong {
+ uvlong uvl;
+ struct { /* little-endian */
+ ulong low;
+ ulong high;
+ };
+};
+
+static int fired;
+static int ticking[MAXMACH];
+
+/* no lock is needed to update our local timer. splhi keeps it tight. */
+static void
+setltimer(Ltimer *tn, ulong ticks)
+{
+ int s;
+
+ assert(ticks <= Clockfreqbase);
+ s = splhi();
+ tn->load = ticks - 1;
+ coherence();
+ tn->ctl = Tmrena | Tintena | Xreload;
+ coherence();
+ splx(s);
+}
+
+static void
+ckstuck(int cpu, long myticks, long histicks)
+{
+ if (labs(histicks - myticks) > HZ) {
+// iprint("cpu%d: clock ticks %ld (vs myticks %ld cpu0 %ld); "
+// "apparently stopped\n",
+// cpu, histicks, myticks, MACHP(0)->ticks);
+ if (!ticking[cpu])
+ panic("cpu%d: clock not interrupting", cpu);
+ }
+}
+
+static void
+mpclocksanity(void)
+{
+ int cpu, mycpu;
+ long myticks, histicks;
+
+ if (conf.nmach <= 1 || active.exiting || navailcpus == 0)
+ return;
+
+ mycpu = m->machno;
+ myticks = m->ticks;
+ if (myticks == HZ)
+ ticking[mycpu] = 1;
+
+ if (myticks < 5*HZ)
+ return;
+
+ for (cpu = 0; cpu < navailcpus; cpu++) {
+ if (cpu == mycpu)
+ continue;
+ histicks = MACHP(cpu)->ticks;
+ if (myticks == 5*HZ || histicks > 1)
+ ckstuck(cpu, myticks, histicks);
+ }
+}
+
+static void
+clockintr(Ureg* ureg, void *arg)
+{
+ Ltimer *wd, *tn;
+ Ploctmr *lt;
+
+ lt = (Ploctmr *)arg;
+ tn = &lt->loc;
+ tn->isr = Xisrclk;
+ coherence();
+
+ timerintr(ureg, 0);
+
+#ifdef watchdog_not_bloody_useless
+ /* appease the dogs */
+ wd = &lt->wd;
+ if (wd->cnt == 0 &&
+ (wd->ctl & (Wdog | Wdogena | Tintena)) == (Wdog | Wdogena))
+ panic("cpu%d: zero watchdog count but no system reset",
+ m->machno);
+ wd->load = Dogtimeout - 1;
+ coherence();
+#endif
+ SET(wd); USED(wd);
+ tegclockintr();
+
+ mpclocksanity();
+}
+
+void
+clockprod(Ureg *ureg)
+{
+ Ltimer *tn;
+
+ timerintr(ureg, 0);
+ tegclockintr();
+ if (m->machno != 0) { /* cpu1 gets stuck */
+ tn = &((Ploctmr *)soc.loctmr)->loc;
+ setltimer(tn, Tcycles);
+ }
+}
+
+static void
+clockreset(Ltimer *tn)
+{
+ if (probeaddr((uintptr)tn) < 0)
+ panic("no clock at %#p", tn);
+ tn->ctl = 0;
+ coherence();
+}
+
+void
+watchdogoff(Ltimer *wd)
+{
+ wd->ctl &= ~Wdogena;
+ coherence();
+ wd->wddis = Wdoff1;
+ coherence();
+ wd->wddis = Wdoff2;
+ coherence();
+}
+
+/* clear any pending watchdog intrs or causes */
+void
+wdogclrintr(Ltimer *wd)
+{
+#ifdef watchdog_not_bloody_useless
+ wd->isr = Xisrclk;
+ coherence();
+ wd->wdrst = Wdrst;
+ coherence();
+#endif
+ USED(wd);
+}
+
+/*
+ * stop clock interrupts on this cpu and disable the local watchdog timer,
+ * and, if on cpu0, shutdown the shared tegra2 watchdog timer.
+ */
+void
+clockshutdown(void)
+{
+ Ploctmr *lt;
+
+ lt = (Ploctmr *)soc.loctmr;
+ clockreset(&lt->loc);
+ watchdogoff(&lt->wd);
+
+ tegclockshutdown();
+}
+
+enum {
+ Instrs = 10*Mhz,
+};
+
+/* we assume that perfticks are microseconds */
+static long
+issue1loop(void)
+{
+ register int i;
+ long st;
+
+ i = Instrs;
+ st = perfticks();
+ do {
+ --i; --i; --i; --i; --i; --i; --i; --i; --i; --i;
+ --i; --i; --i; --i; --i; --i; --i; --i; --i; --i;
+ --i; --i; --i; --i; --i; --i; --i; --i; --i; --i;
+ --i; --i; --i; --i; --i; --i; --i; --i; --i; --i;
+ --i; --i; --i; --i; --i; --i; --i; --i; --i; --i;
+ --i; --i; --i; --i; --i; --i; --i; --i; --i; --i;
+ --i; --i; --i; --i; --i; --i; --i; --i; --i; --i;
+ --i; --i; --i; --i; --i; --i; --i; --i; --i; --i;
+ --i; --i; --i; --i; --i; --i; --i; --i; --i; --i;
+ --i; --i; --i; --i; --i; --i; --i; --i; --i;
+ } while(--i >= 0);
+ return perfticks() - st;
+}
+
+static long
+issue2loop(void)
+{
+ register int i, j;
+ long st;
+
+ i = Instrs / 2; /* j gets half the decrements */
+ j = 0;
+ st = perfticks();
+ do {
+ --j; --i; --j; --i; --j; --i; --j; --i; --j;
+ --i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+ --i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+ --i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+ --i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+ --i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+ --i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+ --i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+ --i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+ --i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+
+ --i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+ --i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+ --i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+ --i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+ --i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+ --i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+ --i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+ --i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+ --i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+ --i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+ } while(--i >= 0);
+ return perfticks() - st;
+}
+
+/* estimate instructions/s. */
+static void
+guessmips(long (*loop)(void), char *lab)
+{
+ int s;
+ long tcks;
+
+ do {
+ s = splhi();
+ tcks = loop();
+ splx(s);
+ if (tcks < 0)
+ iprint("again...");
+ } while (tcks < 0);
+ /*
+ * Instrs instructions took tcks ticks @ Basetickfreq Hz.
+ * round the result.
+ */
+ s = (((vlong)Basetickfreq * Instrs) / tcks + 500000) / 1000000;
+ if (Debug)
+ iprint("%ud mips (%s-issue)", s, lab);
+ USED(s);
+}
+
+void
+wdogintr(Ureg *, void *ltmr)
+{
+#ifdef watchdog_not_bloody_useless
+ Ltimer *wd;
+
+ wd = ltmr;
+ fired++;
+ wdogclrintr(wd);
+#endif
+ USED(ltmr);
+}
+
+static void
+ckcounting(Ltimer *lt)
+{
+ ulong old;
+
+ old = lt->cnt;
+ if (old == lt->cnt)
+ delay(1);
+ if (old == lt->cnt)
+ panic("cpu%d: watchdog timer not counting down", m->machno);
+}
+
+/* test fire with interrupt to see that it's working */
+static void
+ckwatchdog(Ltimer *wd)
+{
+#ifdef watchdog_not_bloody_useless
+ int s;
+
+ fired = 0;
+ wd->load = Tcycles - 1;
+ coherence();
+ /* Tintena is supposed to be ignored in watchdog mode */
+ wd->ctl |= Wdogena | Tintena;
+ coherence();
+
+ ckcounting(wd);
+
+ s = spllo();
+ delay(2 * 1000/HZ);
+ splx(s);
+ if (!fired)
+ /* useless local watchdog */
+ iprint("cpu%d: local watchdog failed to interrupt\n", m->machno);
+ /* clean up */
+ wd->ctl &= ~Wdogena;
+ coherence();
+#endif
+ USED(wd);
+}
+
+static void
+startwatchdog(void)
+{
+#ifdef watchdog_not_bloody_useless
+ Ltimer *wd;
+ Ploctmr *lt;
+
+ lt = (Ploctmr *)soc.loctmr;
+ wd = &lt->wd;
+ watchdogoff(wd);
+ wdogclrintr(wd);
+ irqenable(Wdtmrirq, wdogintr, wd, "watchdog");
+
+ ckwatchdog(wd);
+
+ /* set up for normal use, causing reset */
+ wd->ctl &= ~Tintena; /* reset, don't interrupt */
+ coherence();
+ wd->ctl |= Wdog;
+ coherence();
+ wd->load = Dogtimeout - 1;
+ coherence();
+ wd->ctl |= Wdogena;
+ coherence();
+
+ ckcounting(wd);
+#endif
+}
+
+static void
+clock0init(Ltimer *tn)
+{
+ int s;
+ ulong old, fticks;
+
+ /*
+ * calibrate fastclock
+ */
+ s = splhi();
+ tn->load = ~0ul >> 1;
+ coherence();
+ tn->ctl = Tmrena;
+ coherence();
+
+ old = perfticks();
+ fticks = tn->cnt;
+ delay(1);
+ fticks = abs(tn->cnt - fticks);
+ old = perfticks() - old;
+ splx(s);
+ if (Debug)
+ iprint("cpu%d: fastclock %ld/%ldµs = %ld fastticks/µs (MHz)\n",
+ m->machno, fticks, old, (fticks + old/2 - 1) / old);
+ USED(fticks, old);
+
+ if (Debug)
+ iprint("cpu%d: ", m->machno);
+ guessmips(issue1loop, "single");
+ if (Debug)
+ iprint(", ");
+ guessmips(issue2loop, "dual");
+ if (Debug)
+ iprint("\n");
+
+ /*
+ * m->delayloop should be the number of delay loop iterations
+ * needed to consume 1 ms. 2 is instr'ns in the delay loop.
+ */
+ m->delayloop = m->cpuhz / (1000 * 2);
+// iprint("cpu%d: m->delayloop = %lud\n", m->machno, m->delayloop);
+
+ tegclock0init();
+}
+
+/*
+ * the local timer is the interrupting timer and does not
+ * participate in measuring time. It is initially set to HZ.
+ */
+void
+clockinit(void)
+{
+ ulong old;
+ Ltimer *tn;
+ Ploctmr *lt;
+
+ clockshutdown();
+
+ /* turn my cycle counter on */
+ cpwrsc(0, CpCLD, CpCLDena, CpCLDenacyc, 1<<31);
+
+ /* turn all my counters on and clear my cycle counter */
+ cpwrsc(0, CpCLD, CpCLDena, CpCLDenapmnc, 1<<2 | 1);
+
+ /* let users read my cycle counter directly */
+ cpwrsc(0, CpCLD, CpCLDuser, CpCLDenapmnc, 1);
+
+ /* verify µs counter sanity */
+ tegclockinit();
+
+ lt = (Ploctmr *)soc.loctmr;
+ tn = &lt->loc;
+ if (m->machno == 0)
+ irqenable(Loctmrirq, clockintr, lt, "clock");
+ else
+ intcunmask(Loctmrirq);
+
+ /*
+ * verify sanity of local timer
+ */
+ tn->load = Clockfreqbase / 1000;
+ tn->isr = Xisrclk;
+ coherence();
+ tn->ctl = Tmrena;
+ coherence();
+
+ old = tn->cnt;
+ delay(5);
+ /* m->ticks won't be incremented here because timersinit hasn't run. */
+ if (tn->cnt == old)
+ panic("cpu%d: clock not ticking at all", m->machno);
+ else if ((long)tn->cnt > 0)
+ panic("cpu%d: clock ticking slowly", m->machno);
+
+ if (m->machno == 0)
+ clock0init(tn);
+
+ /* if pci gets stuck, maybe one of the many watchdogs will nuke us. */
+ startwatchdog();
+
+ /*
+ * desynchronize the processor clocks so that they all don't
+ * try to resched at the same time.
+ */
+ delay(m->machno*2);
+ setltimer(tn, Tcycles);
+}
+
+/* our fastticks are at 1MHz (Basetickfreq), so the conversion is trivial. */
+ulong
+µs(void)
+{
+ return fastticks2us(fastticks(nil));
+}
+
+/* Tval is supposed to be in fastticks units. */
+void
+timerset(Tval next)
+{
+ int s;
+ long offset;
+ Ltimer *tn;
+
+ tn = &((Ploctmr *)soc.loctmr)->loc;
+ s = splhi();
+ offset = fastticks2us(next - fastticks(nil));
+ /* offset is now in µs (MHz); convert to Clockfreqbase Hz. */
+ offset *= Clockfreqbase / Mhz;
+ if(offset < MinPeriod)
+ offset = MinPeriod;
+ else if(offset > MaxPeriod)
+ offset = MaxPeriod;
+
+ setltimer(tn, offset);
+ splx(s);
+}
+
+static ulong
+cpucycles(void) /* cpu clock rate, except when waiting for intr (unused) */
+{
+ ulong v;
+
+ /* reads 32-bit cycle counter (counting up) */
+// v = cprdsc(0, CpCLD, CpCLDcyc, 0);
+ v = getcyc(); /* fast asm */
+ /* keep it non-negative; prevent m->fastclock ever going to 0 */
+ return v == 0? 1: v;
+}
+
+long
+lcycles(void)
+{
+ return perfticks();
+}
+
+uvlong
+fastticks(uvlong *hz)
+{
+ int s;
+ ulong newticks;
+ Vlong *fcp;
+
+ if(hz)
+ *hz = Basetickfreq;
+
+ fcp = (Vlong *)&m->fastclock;
+ /* avoid reentry on interrupt or trap, to prevent recursion */
+ s = splhi();
+ newticks = perfticks();
+ if(newticks < fcp->low) /* low word must have wrapped */
+ fcp->high++;
+ fcp->low = newticks;
+ splx(s);
+
+ if (fcp->low == 0 && fcp->high == 0 && m->ticks > HZ/10)
+ panic("fastticks: zero m->fastclock; ticks %lud fastclock %#llux",
+ m->ticks, m->fastclock);
+ return m->fastclock;
+}
+
+void
+microdelay(int l)
+{
+ for (l = l * (vlong)m->delayloop / 1000; --l >= 0; )
+ ;
+}
+
+void
+delay(int l)
+{
+ int i, d;
+
+ d = m->delayloop;
+ while(--l >= 0)
+ for (i = d; --i >= 0; )
+ ;
+}
diff --git a/sys/src/9/teg2/coproc.c b/sys/src/9/teg2/coproc.c
new file mode 100644
index 000000000..27612bd97
--- /dev/null
+++ b/sys/src/9/teg2/coproc.c
@@ -0,0 +1,200 @@
+/*
+ * arm co-processors
+ * mainly to cope with arm hard-wiring register numbers into instructions.
+ *
+ * CP15 (system control) is the one that gets used the most in practice.
+ * these routines must be callable from KZERO space or the 0 segment.
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+
+#include "arm.h"
+
+enum {
+ /* alternates: 0xe12fff1e BX (R14); last e is R14 */
+ /* 0xe28ef000 B 0(R14); second e is R14 (ken) */
+ Retinst = 0xe1a0f00e, /* MOV R14, R15 */
+
+ Opmask = MASK(3),
+ Regmask = MASK(4),
+};
+
+typedef ulong (*Pufv)(void);
+typedef void (*Pvfu)(ulong);
+
+static void
+setupcpop(ulong instr[2], ulong opcode, int cp, int op1, int crn, int crm,
+ int op2)
+{
+ ulong instrsz[2];
+
+ op1 &= Opmask;
+ op2 &= Opmask;
+ crn &= Regmask;
+ crm &= Regmask;
+ cp &= Regmask;
+ instr[0] = opcode | op1 << 21 | crn << 16 | cp << 8 | op2 << 5 | crm;
+ instr[1] = Retinst;
+
+ cachedwbse(instr, sizeof instrsz);
+ cacheiinv();
+}
+
+ulong
+cprd(int cp, int op1, int crn, int crm, int op2)
+{
+ int s, r;
+ volatile ulong instr[2];
+ Pufv fp;
+
+ s = splhi();
+ /*
+ * MRC. return value will be in R0, which is convenient.
+ * Rt will be R0.
+ */
+ setupcpop(instr, 0xee100010, cp, op1, crn, crm, op2);
+ fp = (Pufv)instr;
+ r = fp();
+ splx(s);
+ return r;
+}
+
+void
+cpwr(int cp, int op1, int crn, int crm, int op2, ulong val)
+{
+ int s;
+ volatile ulong instr[2];
+ Pvfu fp;
+
+ s = splhi();
+ setupcpop(instr, 0xee000010, cp, op1, crn, crm, op2); /* MCR, Rt is R0 */
+ fp = (Pvfu)instr;
+ fp(val);
+ coherence();
+ splx(s);
+}
+
+ulong
+cprdsc(int op1, int crn, int crm, int op2)
+{
+ return cprd(CpSC, op1, crn, crm, op2);
+}
+
+void
+cpwrsc(int op1, int crn, int crm, int op2, ulong val)
+{
+ cpwr(CpSC, op1, crn, crm, op2, val);
+}
+
+/* floating point */
+
+/* fp coproc control */
+static void
+setupfpctlop(ulong instr[2], int opcode, int fpctlreg)
+{
+ ulong instrsz[2];
+
+ fpctlreg &= Nfpctlregs - 1;
+ instr[0] = opcode | fpctlreg << 16 | 0 << 12 | CpFP << 8;
+ instr[1] = Retinst;
+
+ cachedwbse(instr, sizeof instrsz);
+ cacheiinv();
+}
+
+ulong
+fprd(int fpreg)
+{
+ int s, r;
+ volatile ulong instr[2];
+ Pufv fp;
+
+ if (!m->fpon) {
+ dumpstack();
+ panic("fprd: cpu%d fpu off", m->machno);
+ }
+ s = splhi();
+ /*
+ * VMRS. return value will be in R0, which is convenient.
+ * Rt will be R0.
+ */
+ setupfpctlop(instr, 0xeef00010, fpreg);
+ fp = (Pufv)instr;
+ r = fp();
+ splx(s);
+ return r;
+}
+
+void
+fpwr(int fpreg, ulong val)
+{
+ int s;
+ volatile ulong instr[2];
+ Pvfu fp;
+
+ /* fpu might be off and this VMSR might enable it */
+ s = splhi();
+ setupfpctlop(instr, 0xeee00010, fpreg); /* VMSR, Rt is R0 */
+ fp = (Pvfu)instr;
+ fp(val);
+ coherence();
+ splx(s);
+}
+
+/* fp register access; don't bother with single precision */
+static void
+setupfpop(ulong instr[2], int opcode, int fpreg)
+{
+ ulong instrsz[2];
+
+ instr[0] = opcode | 0 << 16 | (fpreg & (16 - 1)) << 12;
+ if (fpreg >= 16)
+ instr[0] |= 1 << 22; /* high bit of dfp reg # */
+ instr[1] = Retinst;
+
+ cachedwbse(instr, sizeof instrsz);
+ cacheiinv();
+}
+
+ulong
+fpsavereg(int fpreg, uvlong *fpp)
+{
+ int s, r;
+ volatile ulong instr[2];
+ ulong (*fp)(uvlong *);
+
+ if (!m->fpon)
+ panic("fpsavereg: cpu%d fpu off", m->machno);
+ s = splhi();
+ /*
+ * VSTR. pointer will be in R0, which is convenient.
+ * Rt will be R0.
+ */
+ setupfpop(instr, 0xed000000 | CpDFP << 8, fpreg);
+ fp = (ulong (*)(uvlong *))instr;
+ r = fp(fpp);
+ splx(s);
+ coherence();
+ return r; /* not too meaningful */
+}
+
+void
+fprestreg(int fpreg, uvlong val)
+{
+ int s;
+ volatile ulong instr[2];
+ void (*fp)(uvlong *);
+
+ if (!m->fpon)
+ panic("fprestreg: cpu%d fpu off", m->machno);
+ s = splhi();
+ setupfpop(instr, 0xed100000 | CpDFP << 8, fpreg); /* VLDR, Rt is R0 */
+ fp = (void (*)(uvlong *))instr;
+ fp(&val);
+ coherence();
+ splx(s);
+}
diff --git a/sys/src/9/teg2/dat.h b/sys/src/9/teg2/dat.h
new file mode 100644
index 000000000..83420dfbd
--- /dev/null
+++ b/sys/src/9/teg2/dat.h
@@ -0,0 +1,478 @@
+/*
+ * Time.
+ *
+ * HZ should divide 1000 evenly, ideally.
+ * 100, 125, 200, 250 and 333 are okay.
+ */
+#define HZ 100 /* clock frequency */
+#define MS2HZ (1000/HZ) /* millisec per clock tick */
+#define TK2SEC(t) ((t)/HZ) /* ticks to seconds */
+
+enum {
+ Mhz = 1000 * 1000,
+ Dogsectimeout = 4, /* must be ≤ 34 s. to fit in a ulong */
+};
+
+/*
+ * More accurate time
+ */
+#define MS2TMR(t) ((ulong)(((uvlong)(t) * m->cpuhz)/1000))
+#define US2TMR(t) ((ulong)(((uvlong)(t) * m->cpuhz)/1000000))
+
+#define CONSOLE 0
+
+typedef struct Conf Conf;
+typedef struct Confmem Confmem;
+typedef struct FPsave FPsave;
+typedef struct ISAConf ISAConf;
+typedef struct Isolated Isolated;
+typedef struct Label Label;
+typedef struct Lock Lock;
+typedef struct Lowmemcache Lowmemcache;
+typedef struct Memcache Memcache;
+typedef struct MMMU MMMU;
+typedef struct Mach Mach;
+typedef u32int Mreg; /* Msr - bloody UART */
+typedef struct Notsave Notsave;
+typedef struct Page Page;
+typedef struct Pcisiz Pcisiz;
+typedef struct Pcidev Pcidev;
+typedef struct PhysUart PhysUart;
+typedef struct PMMU PMMU;
+typedef struct Proc Proc;
+typedef u32int PTE;
+typedef struct Soc Soc;
+typedef struct Uart Uart;
+typedef struct Ureg Ureg;
+typedef uvlong Tval;
+
+#pragma incomplete Pcidev
+#pragma incomplete Ureg
+
+#define MAXSYSARG 5 /* for mount(fd, mpt, flag, arg, srv) */
+
+/*
+ * parameters for sysproc.c
+ */
+#define AOUT_MAGIC (E_MAGIC)
+
+struct Lock
+{
+ ulong key;
+ u32int sr;
+ uintptr pc;
+ Proc* p;
+ Mach* m;
+ int isilock;
+};
+
+struct Label
+{
+ uintptr sp;
+ uintptr pc;
+};
+
+enum {
+ Maxfpregs = 32, /* could be 16 or 32, see Mach.fpnregs */
+ Nfpctlregs = 16,
+};
+
+/*
+ * emulated or vfp3 floating point
+ */
+struct FPsave
+{
+ ulong status;
+ ulong control;
+ /*
+ * vfp3 with ieee fp regs; uvlong is sufficient for hardware but
+ * each must be able to hold an Internal from fpi.h for sw emulation.
+ */
+ ulong regs[Maxfpregs][3];
+
+ int fpstate;
+ uintptr pc; /* of failed fp instr. */
+};
+
+/*
+ * FPsave.fpstate
+ */
+enum
+{
+ FPinit,
+ FPactive,
+ FPinactive,
+ FPemu,
+
+ /* bit or'd with the state */
+ FPillegal= 0x100,
+};
+
+struct Confmem
+{
+ uintptr base;
+ usize npage;
+ uintptr limit;
+ uintptr kbase;
+ uintptr klimit;
+};
+
+struct Conf
+{
+ ulong nmach; /* processors */
+ ulong nproc; /* processes */
+ Confmem mem[1]; /* physical memory */
+ ulong npage; /* total physical pages of memory */
+ usize upages; /* user page pool */
+ ulong copymode; /* 0 is copy on write, 1 is copy on reference */
+ ulong ialloc; /* max interrupt time allocation in bytes */
+ ulong pipeqsize; /* size in bytes of pipe queues */
+ ulong nimage; /* number of page cache image headers */
+ ulong nswap; /* number of swap pages */
+ int nswppo; /* max # of pageouts per segment pass */
+ ulong hz; /* processor cycle freq */
+ ulong mhz;
+ int monitor; /* flag */
+};
+
+/*
+ * things saved in the Proc structure during a notify
+ */
+struct Notsave {
+ int emptiness;
+};
+
+/*
+ * MMU stuff in Mach.
+ */
+struct MMMU
+{
+ PTE* mmul1; /* l1 for this processor */
+ int mmul1lo;
+ int mmul1hi;
+ int mmupid;
+};
+
+/*
+ * MMU stuff in proc
+ */
+#define NCOLOR 1 /* 1 level cache, don't worry about VCE's */
+struct PMMU
+{
+ Page* mmul2;
+ Page* mmul2cache; /* free mmu pages */
+};
+
+#include "../port/portdat.h"
+
+struct Mach
+{
+ /* offsets known to asm */
+ int machno; /* physical id of processor */
+ uintptr splpc; /* pc of last caller to splhi */
+
+ Proc* proc; /* current process */
+
+ MMMU;
+ /* end of offsets known to asm */
+ int flushmmu; /* flush current proc mmu state */
+
+ ulong ticks; /* of the clock since boot time */
+ Label sched; /* scheduler wakeup */
+ Lock alarmlock; /* access to alarm list */
+ void* alarm; /* alarms bound to this clock */
+ int inclockintr;
+
+ Proc* readied; /* for runproc */
+ ulong schedticks; /* next forced context switch */
+
+ int cputype;
+ ulong delayloop;
+
+ /* stats */
+ int tlbfault;
+ int tlbpurge;
+ int pfault;
+ int cs;
+ int syscall;
+ int load;
+ int intr;
+ uvlong fastclock; /* last sampled value */
+ uvlong inidle; /* time spent in idlehands() */
+ ulong spuriousintr;
+ int lastintr;
+ int ilockdepth;
+ Perf perf; /* performance counters */
+
+ int probing; /* probeaddr() state */
+ int trapped;
+ Lock probelock;
+ int inidlehands;
+
+ int cpumhz;
+ uvlong cpuhz; /* speed of cpu */
+ uvlong cyclefreq; /* Frequency of user readable cycle counter */
+
+ /* vfp3 fpu */
+ int havefp;
+ int havefpvalid;
+ int fpon;
+ int fpconfiged;
+ int fpnregs;
+ ulong fpscr; /* sw copy */
+ int fppid; /* pid of last fault */
+ uintptr fppc; /* addr of last fault */
+ int fpcnt; /* how many consecutive at that addr */
+
+ /* save areas for exceptions, hold R0-R4 */
+ u32int sfiq[5];
+ u32int sirq[5];
+ u32int sund[5];
+ u32int sabt[5];
+ u32int smon[5]; /* probably not needed */
+ u32int ssys[5];
+
+ int stack[1];
+};
+
+/*
+ * Fake kmap.
+ */
+typedef void KMap;
+#define VA(k) ((uintptr)(k))
+#define kmap(p) (KMap*)((p)->pa|kseg0)
+#define kunmap(k)
+
+struct
+{
+ Lock;
+ int machs; /* bitmap of active CPUs */
+ int wfi; /* bitmap of CPUs in WFI state */
+ int stopped; /* bitmap of CPUs stopped */
+ int exiting; /* shutdown */
+ int ispanic; /* shutdown in response to a panic */
+ int thunderbirdsarego; /* lets the added processors continue to schedinit */
+}active;
+
+extern register Mach* m; /* R10 */
+extern register Proc* up; /* R9 */
+
+/* an object guaranteed to be in its own cache line */
+typedef uchar Cacheline[CACHELINESZ];
+struct Isolated {
+ Cacheline c0;
+ ulong word;
+ Cacheline c1;
+};
+
+extern Memcache cachel[]; /* arm arch v7 supports 1-7 */
+extern ulong intrcount[MAXMACH];
+extern int irqtooearly;
+extern uintptr kseg0;
+extern Isolated l1ptstable;
+extern uchar *l2pages;
+extern Mach* machaddr[MAXMACH];
+extern ulong memsize;
+extern int navailcpus;
+extern int normalprint;
+
+/*
+ * a parsed plan9.ini line
+ */
+#define NISAOPT 8
+
+struct ISAConf {
+ char *type;
+ ulong port;
+ int irq;
+ ulong dma;
+ ulong mem;
+ ulong size;
+ ulong freq;
+
+ int nopt;
+ char *opt[NISAOPT];
+};
+
+#define MACHP(n) machaddr[n]
+
+/*
+ * Horrid. But the alternative is 'defined'.
+ */
+#ifdef _DBGC_
+#define DBGFLG (dbgflg[_DBGC_])
+#else
+#define DBGFLG (0)
+#endif /* _DBGC_ */
+
+int vflag;
+extern char dbgflg[256];
+
+#define dbgprint print /* for now */
+
+/*
+ * hardware info about a device
+ */
+typedef struct {
+ ulong port;
+ int size;
+} Devport;
+
+struct DevConf
+{
+ ulong intnum; /* interrupt number */
+ char *type; /* card type, malloced */
+ int nports; /* Number of ports */
+ Devport *ports; /* The ports themselves */
+};
+
+/* characteristics of a given arm cache level */
+struct Memcache {
+ uint waysh; /* shifts for set/way register */
+ uint setsh;
+
+ uint log2linelen;
+
+ uint level; /* 1 is nearest processor, 2 further away */
+ uint type;
+ uint external; /* flag */
+ uint l1ip; /* l1 I policy */
+
+ uint nways; /* associativity */
+ uint nsets;
+ uint linelen; /* bytes per cache line */
+ uint setsways;
+};
+enum Cachetype {
+ Nocache,
+ Ionly,
+ Donly,
+ Splitid,
+ Unified,
+};
+enum {
+ Intcache,
+ Extcache,
+};
+
+/*
+ * characteristics of cache level, kept at low, fixed address (CACHECONF).
+ * all offsets are known to cache.v7.s.
+ */
+struct Lowmemcache {
+ uint l1waysh; /* shifts for set/way register */
+ uint l1setsh;
+ uint l2waysh;
+ uint l2setsh;
+};
+
+/*
+ * cache capabilities. write-back vs write-through is controlled
+ * by the Buffered bit in PTEs.
+ *
+ * see cache.v7.s and Memcache in dat.h
+ */
+enum {
+ Cawt = 1 << 31,
+ Cawb = 1 << 30,
+ Cara = 1 << 29,
+ Cawa = 1 << 28,
+};
+
+/* non-architectural L2 cache */
+typedef struct Cacheimpl Cacheimpl;
+struct Cacheimpl {
+ void (*info)(Memcache *);
+ void (*on)(void);
+ void (*off)(void);
+
+ void (*inv)(void);
+ void (*wb)(void);
+ void (*wbinv)(void);
+
+ void (*invse)(void *, int);
+ void (*wbse)(void *, int);
+ void (*wbinvse)(void *, int);
+};
+/* extern */ Cacheimpl *l2cache, *allcache, *nocache, *l1cache;
+
+enum Dmamode {
+ Const,
+ Postincr,
+ Index,
+ Index2,
+};
+
+/* pmu = power management unit */
+enum Irqs {
+ /*
+ * 1st 32 gic irqs reserved for cpu; private interrupts.
+ * 0—15 are software-generated by other cpus;
+ * 16—31 are private peripheral intrs.
+ */
+ Cpu0irq = 0,
+ Cpu1irq,
+ /* ... */
+ Cpu15irq = 15,
+ Glbtmrirq = 27,
+ Loctmrirq = 29,
+ Wdtmrirq = 30,
+
+ /* shared interrupts */
+ Ctlr0base = (1+0)*32, /* primary ctlr */
+ Tn0irq = Ctlr0base + 0, /* tegra timers */
+ Tn1irq = Ctlr0base + 1,
+ Rtcirq = Ctlr0base + 2,
+
+ Ctlr1base = (1+1)*32, /* secondary ctlr */
+ Uartirq = Ctlr1base + 4,
+ Tn2irq = Ctlr1base + 9, /* tegra timers */
+ Tn3irq = Ctlr1base + 10,
+ /* +24 is cpu0_pmu_intr, +25 is cpu1_pum_intr */
+
+ Ctlr2base = (1+2)*32, /* ternary ctlr */
+ Extpmuirq = Ctlr2base + 22,
+
+ Ctlr3base = (1+3)*32, /* quad ctlr */
+ Pcieirq = Ctlr3base + 2,
+};
+
+struct Soc { /* addr's of SoC controllers */
+ uintptr clkrst;
+ uintptr power;
+ uintptr exceptvec;
+ uintptr sema;
+ uintptr l2cache;
+ uintptr flow;
+
+ /* private memory region */
+ uintptr scu;
+ uintptr intr; /* `cpu interface' */
+ /* private-peripheral-interrupt cortex-a clocks */
+ uintptr glbtmr;
+ uintptr loctmr;
+
+ uintptr intrdist;
+
+ uintptr uart[5];
+
+ /* shared-peripheral-interrupt tegra2 clocks */
+ uintptr rtc; /* real-time clock */
+ uintptr tmr[4];
+ uintptr µs;
+
+ uintptr pci;
+ uintptr ether;
+
+ uintptr ehci;
+ uintptr ide;
+
+ uintptr nand;
+ uintptr nor;
+
+ uintptr spi[4];
+ uintptr twsi;
+ uintptr mmc[4];
+ uintptr gpio[7];
+} soc;
+extern Soc soc;
diff --git a/sys/src/9/teg2/devarch.c b/sys/src/9/teg2/devarch.c
new file mode 100644
index 000000000..d89e888e5
--- /dev/null
+++ b/sys/src/9/teg2/devarch.c
@@ -0,0 +1,192 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "io.h"
+
+#include "../ip/ip.h"
+
+enum {
+ Qdir = 0,
+ Qbase,
+
+ Qmax = 16,
+};
+
+typedef long Rdwrfn(Chan*, void*, long, vlong);
+
+static Rdwrfn *readfn[Qmax];
+static Rdwrfn *writefn[Qmax];
+
+static Dirtab archdir[Qmax] = {
+ ".", { Qdir, 0, QTDIR }, 0, 0555,
+};
+
+Lock archwlock; /* the lock is only for changing archdir */
+int narchdir = Qbase;
+
+/*
+ * Add a file to the #P listing. Once added, you can't delete it.
+ * You can't add a file with the same name as one already there,
+ * and you get a pointer to the Dirtab entry so you can do things
+ * like change the Qid version. Changing the Qid path is disallowed.
+ */
+Dirtab*
+addarchfile(char *name, int perm, Rdwrfn *rdfn, Rdwrfn *wrfn)
+{
+ int i;
+ Dirtab d;
+ Dirtab *dp;
+
+ memset(&d, 0, sizeof d);
+ strcpy(d.name, name);
+ d.perm = perm;
+
+ lock(&archwlock);
+ if(narchdir >= Qmax){
+ unlock(&archwlock);
+ return nil;
+ }
+
+ for(i=0; i<narchdir; i++)
+ if(strcmp(archdir[i].name, name) == 0){
+ unlock(&archwlock);
+ return nil;
+ }
+
+ d.qid.path = narchdir;
+ archdir[narchdir] = d;
+ readfn[narchdir] = rdfn;
+ writefn[narchdir] = wrfn;
+ dp = &archdir[narchdir++];
+ unlock(&archwlock);
+
+ return dp;
+}
+
+static Chan*
+archattach(char* spec)
+{
+ return devattach('P', spec);
+}
+
+Walkqid*
+archwalk(Chan* c, Chan *nc, char** name, int nname)
+{
+ return devwalk(c, nc, name, nname, archdir, narchdir, devgen);
+}
+
+static int
+archstat(Chan* c, uchar* dp, int n)
+{
+ return devstat(c, dp, n, archdir, narchdir, devgen);
+}
+
+static Chan*
+archopen(Chan* c, int omode)
+{
+ return devopen(c, omode, archdir, narchdir, devgen);
+}
+
+static void
+archclose(Chan*)
+{
+}
+
+static long
+archread(Chan *c, void *a, long n, vlong offset)
+{
+ Rdwrfn *fn;
+
+ switch((ulong)c->qid.path){
+ case Qdir:
+ return devdirread(c, a, n, archdir, narchdir, devgen);
+
+ default:
+ if(c->qid.path < narchdir && (fn = readfn[c->qid.path]))
+ return fn(c, a, n, offset);
+ error(Eperm);
+ break;
+ }
+
+ return 0;
+}
+
+static long
+archwrite(Chan *c, void *a, long n, vlong offset)
+{
+ Rdwrfn *fn;
+
+ if(c->qid.path < narchdir && (fn = writefn[c->qid.path]))
+ return fn(c, a, n, offset);
+ error(Eperm);
+
+ return 0;
+}
+
+void archinit(void);
+
+Dev archdevtab = {
+ 'P',
+ "arch",
+
+ devreset,
+ archinit,
+ devshutdown,
+ archattach,
+ archwalk,
+ archstat,
+ archopen,
+ devcreate,
+ archclose,
+ archread,
+ devbread,
+ archwrite,
+ devbwrite,
+ devremove,
+ devwstat,
+};
+
+static long
+cputyperead(Chan*, void *a, long n, vlong offset)
+{
+ char name[64], str[128];
+
+ cputype2name(name, sizeof name);
+ snprint(str, sizeof str, "ARM %s %llud\n", name, m->cpuhz / Mhz);
+ return readstr(offset, a, n, str);
+}
+
+static long
+tbread(Chan*, void *a, long n, vlong offset)
+{
+ char str[16];
+ uvlong tb;
+
+ cycles(&tb);
+
+ snprint(str, sizeof(str), "%16.16llux", tb);
+ return readstr(offset, a, n, str);
+}
+
+static long
+nsread(Chan*, void *a, long n, vlong offset)
+{
+ char str[16];
+ uvlong tb;
+
+ cycles(&tb);
+
+ snprint(str, sizeof(str), "%16.16llux", (tb/700)* 1000);
+ return readstr(offset, a, n, str);
+}
+
+void
+archinit(void)
+{
+ addarchfile("cputype", 0444, cputyperead, nil);
+ addarchfile("timebase",0444, tbread, nil);
+// addarchfile("nsec", 0444, nsread, nil);
+}
diff --git a/sys/src/9/teg2/devether.c b/sys/src/9/teg2/devether.c
new file mode 100644
index 000000000..ad314a125
--- /dev/null
+++ b/sys/src/9/teg2/devether.c
@@ -0,0 +1,530 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "../port/error.h"
+
+#include "../port/netif.h"
+#include "etherif.h"
+
+static Ether *etherxx[MaxEther];
+
+Chan*
+etherattach(char* spec)
+{
+ int ctlrno;
+ char *p;
+ Chan *chan;
+
+ ctlrno = 0;
+ if(spec && *spec){
+ ctlrno = strtoul(spec, &p, 0);
+ if((ctlrno == 0 && p == spec) || *p != 0)
+ error(Ebadarg);
+ if(ctlrno < 0 || ctlrno >= MaxEther)
+ error(Ebadarg);
+ }
+ if(etherxx[ctlrno] == 0)
+ error(Enodev);
+
+ chan = devattach('l', spec);
+ if(waserror()){
+ chanfree(chan);
+ nexterror();
+ }
+ chan->dev = ctlrno;
+ if(etherxx[ctlrno]->attach)
+ etherxx[ctlrno]->attach(etherxx[ctlrno]);
+ poperror();
+ return chan;
+}
+
+static Walkqid*
+etherwalk(Chan* chan, Chan* nchan, char** name, int nname)
+{
+ return netifwalk(etherxx[chan->dev], chan, nchan, name, nname);
+}
+
+static int
+etherstat(Chan* chan, uchar* dp, int n)
+{
+ return netifstat(etherxx[chan->dev], chan, dp, n);
+}
+
+static Chan*
+etheropen(Chan* chan, int omode)
+{
+ return netifopen(etherxx[chan->dev], chan, omode);
+}
+
+static Chan*
+ethercreate(Chan*, char*, int, ulong)
+{
+ error(Eperm);
+ return 0;
+}
+
+static void
+etherclose(Chan* chan)
+{
+ netifclose(etherxx[chan->dev], chan);
+}
+
+static long
+etherread(Chan* chan, void* buf, long n, vlong off)
+{
+ Ether *ether;
+ ulong offset = off;
+
+ ether = etherxx[chan->dev];
+ if((chan->qid.type & QTDIR) == 0 && ether->ifstat){
+ /*
+ * With some controllers it is necessary to reach
+ * into the chip to extract statistics.
+ */
+ if(NETTYPE(chan->qid.path) == Nifstatqid)
+ return ether->ifstat(ether, buf, n, offset);
+ else if(NETTYPE(chan->qid.path) == Nstatqid)
+ ether->ifstat(ether, buf, 0, offset);
+ }
+
+ return netifread(ether, chan, buf, n, offset);
+}
+
+static Block*
+etherbread(Chan* chan, long n, ulong offset)
+{
+ return netifbread(etherxx[chan->dev], chan, n, offset);
+}
+
+static int
+etherwstat(Chan* chan, uchar* dp, int n)
+{
+ return netifwstat(etherxx[chan->dev], chan, dp, n);
+}
+
+static void
+etherrtrace(Netfile* f, Etherpkt* pkt, int len)
+{
+ int i, n;
+ Block *bp;
+
+ if(qwindow(f->in) <= 0)
+ return;
+ if(len > 58)
+ n = 58;
+ else
+ n = len;
+ bp = iallocb(64);
+ if(bp == nil)
+ return;
+ memmove(bp->wp, pkt->d, n);
+ i = TK2MS(MACHP(0)->ticks);
+ bp->wp[58] = len>>8;
+ bp->wp[59] = len;
+ bp->wp[60] = i>>24;
+ bp->wp[61] = i>>16;
+ bp->wp[62] = i>>8;
+ bp->wp[63] = i;
+ bp->wp += 64;
+ qpass(f->in, bp);
+}
+
+Block*
+etheriq(Ether* ether, Block* bp, int fromwire)
+{
+ Etherpkt *pkt;
+ ushort type;
+ int len, multi, tome, fromme;
+ Netfile **ep, *f, **fp, *fx;
+ Block *xbp;
+
+ ether->inpackets++;
+
+ pkt = (Etherpkt*)bp->rp;
+ len = BLEN(bp);
+ type = (pkt->type[0]<<8)|pkt->type[1];
+ fx = 0;
+ ep = &ether->f[Ntypes];
+
+ multi = pkt->d[0] & 1;
+ /* check for valid multicast addresses */
+ if(multi && memcmp(pkt->d, ether->bcast, sizeof(pkt->d)) != 0 &&
+ ether->prom == 0){
+ if(!activemulti(ether, pkt->d, sizeof(pkt->d))){
+ if(fromwire){
+ freeb(bp);
+ bp = 0;
+ }
+ return bp;
+ }
+ }
+ /* is it for me? */
+ tome = memcmp(pkt->d, ether->ea, sizeof(pkt->d)) == 0;
+ fromme = memcmp(pkt->s, ether->ea, sizeof(pkt->s)) == 0;
+
+ /*
+ * Multiplex the packet to all the connections which want it.
+ * If the packet is not to be used subsequently (fromwire != 0),
+ * attempt to simply pass it into one of the connections, thereby
+ * saving a copy of the data (usual case hopefully).
+ */
+ for(fp = ether->f; fp < ep; fp++){
+ if((f = *fp) != nil && (f->type == type || f->type < 0) &&
+ (tome || multi || f->prom)){
+ /* Don't want to hear bridged packets */
+ if(f->bridge && !fromwire && !fromme)
+ continue;
+ if(!f->headersonly){
+ if(fromwire && fx == 0)
+ fx = f;
+ else if(xbp = iallocb(len)){
+ memmove(xbp->wp, pkt, len);
+ xbp->wp += len;
+ if(qpass(f->in, xbp) < 0)
+ ether->soverflows++;
+ }
+ else
+ ether->soverflows++;
+ }
+ else
+ etherrtrace(f, pkt, len);
+ }
+ }
+
+ if(fx){
+ if(qpass(fx->in, bp) < 0)
+ ether->soverflows++;
+ return 0;
+ }
+ if(fromwire){
+ freeb(bp);
+ return 0;
+ }
+ return bp;
+}
+
+static int
+etheroq(Ether* ether, Block* bp)
+{
+ int len, loopback, s;
+ Etherpkt *pkt;
+
+ ether->outpackets++;
+
+ /*
+ * Check if the packet has to be placed back onto the input queue,
+ * i.e. if it's a loopback or broadcast packet or the interface is
+ * in promiscuous mode.
+ * If it's a loopback packet indicate to etheriq that the data isn't
+ * needed and return, etheriq will pass-on or free the block.
+ * To enable bridging to work, only packets that were originated
+ * by this interface are fed back.
+ */
+ pkt = (Etherpkt*)bp->rp;
+ len = BLEN(bp);
+ loopback = memcmp(pkt->d, ether->ea, sizeof(pkt->d)) == 0;
+ if(loopback || memcmp(pkt->d, ether->bcast, sizeof(pkt->d)) == 0 || ether->prom){
+ s = splhi();
+ etheriq(ether, bp, 0);
+ splx(s);
+ }
+
+ if(!loopback){
+ qbwrite(ether->oq, bp);
+ if(ether->transmit != nil)
+ ether->transmit(ether);
+ } else
+ freeb(bp);
+
+ return len;
+}
+
+static long
+etherwrite(Chan* chan, void* buf, long n, vlong)
+{
+ Ether *ether;
+ Block *bp;
+ int nn, onoff;
+ Cmdbuf *cb;
+
+ ether = etherxx[chan->dev];
+ if(NETTYPE(chan->qid.path) != Ndataqid) {
+ nn = netifwrite(ether, chan, buf, n);
+ if(nn >= 0)
+ return nn;
+ cb = parsecmd(buf, n);
+ if(cb->f[0] && strcmp(cb->f[0], "nonblocking") == 0){
+ if(cb->nf <= 1)
+ onoff = 1;
+ else
+ onoff = atoi(cb->f[1]);
+ qnoblock(ether->oq, onoff);
+ free(cb);
+ return n;
+ }
+ free(cb);
+ if(ether->ctl!=nil)
+ return ether->ctl(ether,buf,n);
+
+ error(Ebadctl);
+ }
+
+ if(n > ether->maxmtu)
+ error(Etoobig);
+ if(n < ether->minmtu)
+ error(Etoosmall);
+
+ bp = allocb(n);
+ if(waserror()){
+ freeb(bp);
+ nexterror();
+ }
+ memmove(bp->rp, buf, n);
+ memmove(bp->rp+Eaddrlen, ether->ea, Eaddrlen);
+ poperror();
+ bp->wp += n;
+
+ return etheroq(ether, bp);
+}
+
+static long
+etherbwrite(Chan* chan, Block* bp, ulong)
+{
+ Ether *ether;
+ long n;
+
+ n = BLEN(bp);
+ if(NETTYPE(chan->qid.path) != Ndataqid){
+ if(waserror()) {
+ freeb(bp);
+ nexterror();
+ }
+ n = etherwrite(chan, bp->rp, n, 0);
+ poperror();
+ freeb(bp);
+ return n;
+ }
+ ether = etherxx[chan->dev];
+
+ if(n > ether->maxmtu){
+ freeb(bp);
+ error(Etoobig);
+ }
+ if(n < ether->minmtu){
+ freeb(bp);
+ error(Etoosmall);
+ }
+
+ return etheroq(ether, bp);
+}
+
+static struct {
+ char* type;
+ int (*reset)(Ether*);
+} cards[MaxEther+1];
+
+void
+addethercard(char* t, int (*r)(Ether*))
+{
+ static int ncard;
+
+ if(ncard == MaxEther)
+ panic("too many ether cards");
+ cards[ncard].type = t;
+ cards[ncard].reset = r;
+ ncard++;
+}
+
+int
+parseether(uchar *to, char *from)
+{
+ char nip[4];
+ char *p;
+ int i;
+
+ p = from;
+ for(i = 0; i < Eaddrlen; i++){
+ if(*p == 0)
+ return -1;
+ nip[0] = *p++;
+ if(*p == 0)
+ return -1;
+ nip[1] = *p++;
+ nip[2] = 0;
+ to[i] = strtoul(nip, 0, 16);
+ if(*p == ':')
+ p++;
+ }
+ return 0;
+}
+
+static void
+etherreset(void)
+{
+ Ether *ether;
+ int i, n, ctlrno;
+ char name[KNAMELEN], buf[128];
+
+ for(ether = 0, ctlrno = 0; ctlrno < MaxEther; ctlrno++){
+ if(ether == 0)
+ ether = malloc(sizeof(Ether));
+ memset(ether, 0, sizeof(Ether));
+ ether->ctlrno = ctlrno;
+ ether->mbps = 10;
+ ether->minmtu = ETHERMINTU;
+ ether->maxmtu = ETHERMAXTU;
+
+ if(archether(ctlrno, ether) <= 0)
+ continue;
+
+ if(isaconfig("ether", ctlrno, ether) == 0){
+ free(ether);
+// return nil;
+ continue;
+ }
+ for(n = 0; cards[n].type; n++){
+ if(cistrcmp(cards[n].type, ether->type))
+ continue;
+ for(i = 0; i < ether->nopt; i++)
+ if(cistrncmp(ether->opt[i], "ea=", 3) == 0){
+ if(parseether(ether->ea,
+ &ether->opt[i][3]) == -1)
+ memset(ether->ea, 0, Eaddrlen);
+ } else if(cistrcmp(ether->opt[i],
+ "100BASE-TXFD") == 0)
+ ether->mbps = 100;
+ if(cards[n].reset(ether))
+ break;
+ snprint(name, sizeof(name), "ether%d", ctlrno);
+
+ if(ether->interrupt != nil && ether->irq >= 0)
+ intrenable(ether->irq, ether->interrupt,
+ ether, 0, name);
+
+ i = snprint(buf, sizeof buf,
+ "#l%d: %s: %dMbps port %#lux irq %d",
+ ctlrno, ether->type, ether->mbps, ether->port,
+ ether->irq);
+ if(ether->mem)
+ i += snprint(buf+i, sizeof buf - i,
+ " addr %#lux", PADDR(ether->mem));
+ if(ether->size)
+ i += snprint(buf+i, sizeof buf - i,
+ " size %#luX", ether->size);
+ i += snprint(buf+i, sizeof buf - i,
+ ": %2.2ux%2.2ux%2.2ux%2.2ux%2.2ux%2.2ux",
+ ether->ea[0], ether->ea[1], ether->ea[2],
+ ether->ea[3], ether->ea[4], ether->ea[5]);
+ snprint(buf+i, sizeof buf - i, "\n");
+ iprint("%s", buf); /* it may be too early for print */
+
+ if(ether->mbps >= 1000)
+ netifinit(ether, name, Ntypes, 4*1024*1024);
+ else if(ether->mbps >= 100)
+ netifinit(ether, name, Ntypes, 1024*1024);
+ else
+ netifinit(ether, name, Ntypes, 65*1024);
+ if(ether->oq == 0)
+ ether->oq = qopen(ether->limit, Qmsg, 0, 0);
+ if(ether->oq == 0)
+ panic("etherreset %s", name);
+ ether->alen = Eaddrlen;
+ memmove(ether->addr, ether->ea, Eaddrlen);
+ memset(ether->bcast, 0xFF, Eaddrlen);
+
+ etherxx[ctlrno] = ether;
+ ether = 0;
+ break;
+ }
+ }
+ if(ether)
+ free(ether);
+}
+
+static void
+ethershutdown(void)
+{
+ Ether *ether;
+ int i;
+
+ for(i = 0; i < MaxEther; i++){
+ ether = etherxx[i];
+ if(ether == nil)
+ continue;
+ if(ether->shutdown == nil) {
+ print("#l%d: no shutdown function\n", i);
+ continue;
+ }
+ (*ether->shutdown)(ether);
+ }
+}
+
+
+#define POLY 0xedb88320
+
+/* really slow 32 bit crc for ethers */
+ulong
+ethercrc(uchar *p, int len)
+{
+ int i, j;
+ ulong crc, b;
+
+ crc = 0xffffffff;
+ for(i = 0; i < len; i++){
+ b = *p++;
+ for(j = 0; j < 8; j++){
+ crc = (crc>>1) ^ (((crc^b) & 1) ? POLY : 0);
+ b >>= 1;
+ }
+ }
+ return crc;
+}
+
+void
+dumpoq(Queue *oq)
+{
+ if (oq == nil)
+ print("no outq! ");
+ else if (qisclosed(oq))
+ print("outq closed ");
+ else if (qfull(oq))
+ print("outq full ");
+ else
+ print("outq %d ", qlen(oq));
+}
+
+void
+dumpnetif(Netif *netif)
+{
+ print("netif %s ", netif->name);
+ print("limit %d mbps %d link %d ",
+ netif->limit, netif->mbps, netif->link);
+ print("inpkts %lld outpkts %lld errs %d\n",
+ netif->inpackets, netif->outpackets,
+ netif->crcs + netif->oerrs + netif->frames + netif->overflows +
+ netif->buffs + netif->soverflows);
+}
+
+Dev etherdevtab = {
+ 'l',
+ "ether",
+
+ etherreset,
+ devinit,
+ ethershutdown,
+ etherattach,
+ etherwalk,
+ etherstat,
+ etheropen,
+ ethercreate,
+ etherclose,
+ etherread,
+ etherbread,
+ etherwrite,
+ etherbwrite,
+ devremove,
+ etherwstat,
+};
diff --git a/sys/src/9/teg2/ether8169.c b/sys/src/9/teg2/ether8169.c
new file mode 100644
index 000000000..0f3b1ec9d
--- /dev/null
+++ b/sys/src/9/teg2/ether8169.c
@@ -0,0 +1,1675 @@
+/*
+ * Realtek RTL8110/8168/8169 Gigabit Ethernet Controllers.
+ * There are some magic register values used which are not described in
+ * any datasheet or driver but seem to be necessary.
+ * There are slight differences between the chips in the series so some
+ * tweaks may be needed.
+ *
+ * we use l1 and l2 cache ops; data must reach ram for dma.
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "../port/error.h"
+#include "../port/netif.h"
+
+#include "etherif.h"
+#include "ethermii.h"
+
+typedef struct Ctlr Ctlr;
+typedef struct D D; /* Transmit/Receive Descriptor */
+typedef struct Dtcc Dtcc;
+
+enum {
+ Debug = 0, /* beware: > 1 interferes with correct operation */
+};
+
+enum { /* registers */
+ Idr0 = 0x00, /* MAC address */
+ Mar0 = 0x08, /* Multicast address */
+ Dtccr = 0x10, /* Dump Tally Counter Command */
+ Tnpds = 0x20, /* Transmit Normal Priority Descriptors */
+ Thpds = 0x28, /* Transmit High Priority Descriptors */
+ Flash = 0x30, /* Flash Memory Read/Write */
+ Erbcr = 0x34, /* Early Receive Byte Count */
+ Ersr = 0x36, /* Early Receive Status */
+ Cr = 0x37, /* Command Register */
+ Tppoll = 0x38, /* Transmit Priority Polling */
+ Imr = 0x3C, /* Interrupt Mask */
+ Isr = 0x3E, /* Interrupt Status */
+ Tcr = 0x40, /* Transmit Configuration */
+ Rcr = 0x44, /* Receive Configuration */
+ Tctr = 0x48, /* Timer Count */
+ Mpc = 0x4C, /* Missed Packet Counter */
+ Cr9346 = 0x50, /* 9346 Command Register */
+ Config0 = 0x51, /* Configuration Register 0 */
+ Config1 = 0x52, /* Configuration Register 1 */
+ Config2 = 0x53, /* Configuration Register 2 */
+ Config3 = 0x54, /* Configuration Register 3 */
+ Config4 = 0x55, /* Configuration Register 4 */
+ Config5 = 0x56, /* Configuration Register 5 */
+ Timerint = 0x58, /* Timer Interrupt */
+ Mulint = 0x5C, /* Multiple Interrupt Select */
+ Phyar = 0x60, /* PHY Access */
+ Tbicsr0 = 0x64, /* TBI Control and Status */
+ Tbianar = 0x68, /* TBI Auto-Negotiation Advertisment */
+ Tbilpar = 0x6A, /* TBI Auto-Negotiation Link Partner */
+ Phystatus = 0x6C, /* PHY Status */
+
+ Rms = 0xDA, /* Receive Packet Maximum Size */
+ Cplusc = 0xE0, /* C+ Command */
+ Coal = 0xE2, /* Interrupt Mitigation (Coalesce) */
+ Rdsar = 0xE4, /* Receive Descriptor Start Address */
+ Etx = 0xEC, /* 8169: Early Tx Threshold; 32-byte units */
+ Mtps = 0xEC, /* 8168: Maximum Transmit Packet Size */
+};
+
+enum { /* Dtccr */
+ Cmd = 0x00000008, /* Command */
+};
+
+enum { /* Cr */
+ Te = 0x04, /* Transmitter Enable */
+ Re = 0x08, /* Receiver Enable */
+ Rst = 0x10, /* Software Reset */
+};
+
+enum { /* Tppoll */
+ Fswint = 0x01, /* Forced Software Interrupt */
+ Npq = 0x40, /* Normal Priority Queue polling */
+ Hpq = 0x80, /* High Priority Queue polling */
+};
+
+enum { /* Imr/Isr */
+ Rok = 0x0001, /* Receive OK */
+ Rer = 0x0002, /* Receive Error */
+ Tok = 0x0004, /* Transmit OK */
+ Ter = 0x0008, /* Transmit Error */
+ Rdu = 0x0010, /* Receive Descriptor Unavailable */
+ Punlc = 0x0020, /* Packet Underrun or Link Change */
+ Fovw = 0x0040, /* Receive FIFO Overflow */
+ Tdu = 0x0080, /* Transmit Descriptor Unavailable */
+ Swint = 0x0100, /* Software Interrupt */
+ Timeout = 0x4000, /* Timer */
+ Serr = 0x8000, /* System Error */
+};
+
+enum { /* Tcr */
+ MtxdmaSHIFT = 8, /* Max. DMA Burst Size */
+ MtxdmaMASK = 0x00000700,
+ Mtxdmaunlimited = 0x00000700,
+ Acrc = 0x00010000, /* Append CRC (not) */
+ Lbk0 = 0x00020000, /* Loopback Test 0 */
+ Lbk1 = 0x00040000, /* Loopback Test 1 */
+ Ifg2 = 0x00080000, /* Interframe Gap 2 */
+ HwveridSHIFT = 23, /* Hardware Version ID */
+ HwveridMASK = 0x7C800000,
+ Macv01 = 0x00000000, /* RTL8169 */
+ Macv02 = 0x00800000, /* RTL8169S/8110S */
+ Macv03 = 0x04000000, /* RTL8169S/8110S */
+ Macv04 = 0x10000000, /* RTL8169SB/8110SB */
+ Macv05 = 0x18000000, /* RTL8169SC/8110SC */
+ Macv07 = 0x24800000, /* RTL8102e */
+// Macv8103e = 0x24C00000,
+ Macv25 = 0x28000000, /* RTL8168D */
+// Macv8168dp = 0x28800000,
+// Macv8168e = 0x2C000000,
+ Macv11 = 0x30000000, /* RTL8168B/8111B */
+ Macv14 = 0x30800000, /* RTL8100E */
+ Macv13 = 0x34000000, /* RTL8101E */
+ Macv07a = 0x34800000, /* RTL8102e */
+ Macv12 = 0x38000000, /* RTL8169B/8111B */
+// Macv8168spin3 = 0x38400000,
+ Macv15 = 0x38800000, /* RTL8100E */
+ Macv12a = 0x3c000000, /* RTL8169C/8111C */
+// Macv19 = 0x3c000000, /* dup Macv12a: RTL8111c-gr */
+// Macv8168cspin2 = 0x3c400000,
+// Macv8168cp = 0x3c800000,
+// Macv8139 = 0x60000000,
+// Macv8139a = 0x70000000,
+// Macv8139ag = 0x70800000,
+// Macv8139b = 0x78000000,
+// Macv8130 = 0x7C000000,
+// Macv8139c = 0x74000000,
+// Macv8139d = 0x74400000,
+// Macv8139cplus = 0x74800000,
+// Macv8101 = 0x74c00000,
+// Macv8100 = 0x78800000,
+// Macv8169_8110sbl= 0x7cc00000,
+// Macv8169_8110sce= 0x98000000,
+ Ifg0 = 0x01000000, /* Interframe Gap 0 */
+ Ifg1 = 0x02000000, /* Interframe Gap 1 */
+};
+
+enum { /* Rcr */
+ Aap = 0x00000001, /* Accept All Packets */
+ Apm = 0x00000002, /* Accept Physical Match */
+ Am = 0x00000004, /* Accept Multicast */
+ Ab = 0x00000008, /* Accept Broadcast */
+ Ar = 0x00000010, /* Accept Runt */
+ Aer = 0x00000020, /* Accept Error */
+ Sel9356 = 0x00000040, /* 9356 EEPROM used */
+ MrxdmaSHIFT = 8, /* Max. DMA Burst Size */
+ MrxdmaMASK = 0x00000700,
+ Mrxdmaunlimited = 0x00000700,
+ RxfthSHIFT = 13, /* Receive Buffer Length */
+ RxfthMASK = 0x0000E000,
+ Rxfth256 = 0x00008000,
+ Rxfthnone = 0x0000E000,
+ Rer8 = 0x00010000, /* Accept Error Packets > 8 bytes */
+ MulERINT = 0x01000000, /* Multiple Early Interrupt Select */
+};
+
+enum { /* Cr9346 */
+ Eedo = 0x01, /* */
+ Eedi = 0x02, /* */
+ Eesk = 0x04, /* */
+ Eecs = 0x08, /* */
+ Eem0 = 0x40, /* Operating Mode */
+ Eem1 = 0x80,
+};
+
+enum { /* Phyar */
+ DataMASK = 0x0000FFFF, /* 16-bit GMII/MII Register Data */
+ DataSHIFT = 0,
+ RegaddrMASK = 0x001F0000, /* 5-bit GMII/MII Register Address */
+ RegaddrSHIFT = 16,
+ Flag = 0x80000000, /* */
+};
+
+enum { /* Phystatus */
+ Fd = 0x01, /* Full Duplex */
+ Linksts = 0x02, /* Link Status */
+ Speed10 = 0x04, /* */
+ Speed100 = 0x08, /* */
+ Speed1000 = 0x10, /* */
+ Rxflow = 0x20, /* */
+ Txflow = 0x40, /* */
+ Entbi = 0x80, /* */
+};
+
+enum { /* Cplusc */
+ Init1 = 0x0001, /* 8168 */
+ Mulrw = 0x0008, /* PCI Multiple R/W Enable */
+ Dac = 0x0010, /* PCI Dual Address Cycle Enable */
+ Rxchksum = 0x0020, /* Receive Checksum Offload Enable */
+ Rxvlan = 0x0040, /* Receive VLAN De-tagging Enable */
+ Pktcntoff = 0x0080, /* 8168, 8101 */
+ Endian = 0x0200, /* Endian Mode */
+};
+
+struct D {
+ u32int control;
+ u32int vlan;
+ u32int addrlo;
+ u32int addrhi;
+};
+
+enum { /* Transmit Descriptor control */
+ TxflMASK = 0x0000FFFF, /* Transmit Frame Length */
+ TxflSHIFT = 0,
+ Tcps = 0x00010000, /* TCP Checksum Offload */
+ Udpcs = 0x00020000, /* UDP Checksum Offload */
+ Ipcs = 0x00040000, /* IP Checksum Offload */
+ Lgsen = 0x08000000, /* TSO; WARNING: contains lark's vomit */
+};
+
+enum { /* Receive Descriptor control */
+ RxflMASK = 0x00001FFF, /* Receive Frame Length */
+ Tcpf = 0x00004000, /* TCP Checksum Failure */
+ Udpf = 0x00008000, /* UDP Checksum Failure */
+ Ipf = 0x00010000, /* IP Checksum Failure */
+ Pid0 = 0x00020000, /* Protocol ID0 */
+ Pid1 = 0x00040000, /* Protocol ID1 */
+ Crce = 0x00080000, /* CRC Error */
+ Runt = 0x00100000, /* Runt Packet */
+ Res = 0x00200000, /* Receive Error Summary */
+ Rwt = 0x00400000, /* Receive Watchdog Timer Expired */
+ Fovf = 0x00800000, /* FIFO Overflow */
+ Bovf = 0x01000000, /* Buffer Overflow */
+ Bar = 0x02000000, /* Broadcast Address Received */
+ Pam = 0x04000000, /* Physical Address Matched */
+ Mar = 0x08000000, /* Multicast Address Received */
+};
+
+enum { /* General Descriptor control */
+ Ls = 0x10000000, /* Last Segment Descriptor */
+ Fs = 0x20000000, /* First Segment Descriptor */
+ Eor = 0x40000000, /* End of Descriptor Ring */
+ Own = 0x80000000, /* Ownership: belongs to hw */
+};
+
+/*
+ */
+enum { /* Ring sizes (<= 1024) */
+ Ntd = 1024, /* Transmit Ring */
+ /* at 1Gb/s, it only takes 12 ms. to fill a 1024-buffer ring */
+ Nrd = 1024, /* Receive Ring */
+ Nrb = 4096,
+
+ Mtu = ETHERMAXTU,
+ Mps = ROUNDUP(ETHERMAXTU+4, 128),
+// Mps = Mtu + 8 + 14, /* if(mtu>ETHERMAXTU) */
+};
+
+struct Dtcc {
+ u64int txok;
+ u64int rxok;
+ u64int txer;
+ u32int rxer;
+ u16int misspkt;
+ u16int fae;
+ u32int tx1col;
+ u32int txmcol;
+ u64int rxokph;
+ u64int rxokbrd;
+ u32int rxokmu;
+ u16int txabt;
+ u16int txundrn;
+};
+
+enum { /* Variants */
+ Rtl8100e = (0x8136<<16)|0x10EC, /* RTL810[01]E: pci -e */
+ Rtl8169c = (0x0116<<16)|0x16EC, /* RTL8169C+ (USR997902) */
+ Rtl8169sc = (0x8167<<16)|0x10EC, /* RTL8169SC */
+ Rtl8168b = (0x8168<<16)|0x10EC, /* RTL8168B: pci-e */
+ Rtl8169 = (0x8169<<16)|0x10EC, /* RTL8169 */
+ /*
+ * trimslice is 10ec/8168 (8168b) Macv25 (8168D) but
+ * compulab says 8111dl.
+ * oui 0x732 (aaeon) phyno 1, macv = 0x28000000 phyv = 0x0002
+ */
+};
+
+struct Ctlr {
+ void* nic;
+ int port;
+ Pcidev* pcidev;
+ Ctlr* next;
+ Ether* ether; /* point back */
+ int active;
+
+ QLock alock; /* attach */
+ Lock ilock; /* init */
+ int init; /* */
+
+ int pciv; /* */
+ int macv; /* MAC version */
+ int phyv; /* PHY version */
+ int pcie; /* flag: pci-express device? */
+
+ uvlong mchash; /* multicast hash */
+
+ Mii* mii;
+
+// Lock tlock; /* transmit */
+ Rendez trendez;
+ D* td; /* descriptor ring */
+ Block** tb; /* transmit buffers */
+ int ntd;
+
+ int tdh; /* head - producer index (host) */
+ int tdt; /* tail - consumer index (NIC) */
+ int ntdfree;
+ int ntq;
+
+ int nrb;
+
+// Lock rlock; /* receive */
+ Rendez rrendez;
+ D* rd; /* descriptor ring */
+ Block** rb; /* receive buffers */
+ int nrd;
+
+ int rdh; /* head - producer index (NIC) */
+ int rdt; /* tail - consumer index (host) */
+ int nrdfree;
+
+ Lock reglock;
+ int tcr; /* transmit configuration register */
+ int rcr; /* receive configuration register */
+ int imr;
+ int isr; /* sw copy for kprocs */
+
+ QLock slock; /* statistics */
+ Dtcc* dtcc;
+ uint txdu;
+ uint tcpf;
+ uint udpf;
+ uint ipf;
+ uint fovf;
+ uint ierrs;
+ uint rer;
+ uint rdu;
+ uint punlc;
+ uint fovw;
+ uint mcast;
+ uint frag; /* partial packets; rb was too small */
+};
+
+static Ctlr* rtl8169ctlrhead;
+static Ctlr* rtl8169ctlrtail;
+
+static Lock rblock; /* free receive Blocks */
+static Block* rbpool;
+
+#define csr8r(c, r) (*((uchar *) ((c)->nic)+(r)))
+#define csr16r(c, r) (*((u16int *)((c)->nic)+((r)/2)))
+#define csr32p(c, r) ((u32int *) ((c)->nic)+((r)/4))
+#define csr32r(c, r) (*csr32p(c, r))
+
+#define csr8w(c, r, b) (*((uchar *) ((c)->nic)+(r)) = (b), coherence())
+#define csr16w(c, r, w) (*((u16int *)((c)->nic)+((r)/2)) = (w), coherence())
+#define csr32w(c, r, v) (*csr32p(c, r) = (v), coherence())
+
+static int
+rtl8169miimir(Mii* mii, int pa, int ra)
+{
+ uint r;
+ int timeo;
+ Ctlr *ctlr;
+
+ if(pa != 1)
+ return -1;
+ ctlr = mii->ctlr;
+ r = (ra<<16) & RegaddrMASK;
+ csr32w(ctlr, Phyar, r);
+ delay(1);
+ for(timeo = 0; timeo < 2000; timeo++){
+ if((r = csr32r(ctlr, Phyar)) & Flag)
+ break;
+ microdelay(100);
+ }
+ if(!(r & Flag))
+ return -1;
+
+ return (r & DataMASK)>>DataSHIFT;
+}
+
+static int
+rtl8169miimiw(Mii* mii, int pa, int ra, int data)
+{
+ uint r;
+ int timeo;
+ Ctlr *ctlr;
+
+ if(pa != 1)
+ return -1;
+ ctlr = mii->ctlr;
+ r = Flag|((ra<<16) & RegaddrMASK)|((data<<DataSHIFT) & DataMASK);
+ csr32w(ctlr, Phyar, r);
+ delay(1);
+ for(timeo = 0; timeo < 2000; timeo++){
+ if(!((r = csr32r(ctlr, Phyar)) & Flag))
+ break;
+ microdelay(100);
+ }
+ if(r & Flag)
+ return -1;
+
+ return 0;
+}
+
+static int
+rtl8169mii(Ctlr* ctlr)
+{
+ MiiPhy *phy;
+
+ /*
+ * Link management.
+ */
+ if((ctlr->mii = malloc(sizeof(Mii))) == nil)
+ return -1;
+ ctlr->mii->mir = rtl8169miimir;
+ ctlr->mii->miw = rtl8169miimiw;
+ ctlr->mii->ctlr = ctlr;
+
+ /*
+ * Get rev number out of Phyidr2 so can config properly.
+ * There's probably more special stuff for Macv0[234] needed here.
+ */
+ ilock(&ctlr->reglock);
+ ctlr->phyv = rtl8169miimir(ctlr->mii, 1, Phyidr2) & 0x0F;
+ if(ctlr->macv == Macv02){
+ csr8w(ctlr, 0x82, 1); /* magic */
+ rtl8169miimiw(ctlr->mii, 1, 0x0B, 0x0000); /* magic */
+ }
+
+ if(mii(ctlr->mii, (1<<1)) == 0 || (phy = ctlr->mii->curphy) == nil){
+ iunlock(&ctlr->reglock);
+ free(ctlr->mii);
+ ctlr->mii = nil;
+ return -1;
+ }
+ print("rtl8169: oui %#ux phyno %d, macv = %#8.8ux phyv = %#4.4ux\n",
+ phy->oui, phy->phyno, ctlr->macv, ctlr->phyv);
+
+ miiane(ctlr->mii, ~0, ~0, ~0);
+ iunlock(&ctlr->reglock);
+
+ return 0;
+}
+
+static Block*
+rballoc(void)
+{
+ Block *bp;
+
+ ilock(&rblock);
+ if((bp = rbpool) != nil){
+ rbpool = bp->next;
+ bp->next = nil;
+ _xinc(&bp->ref); /* prevent bp from being freed */
+ }
+ iunlock(&rblock);
+ return bp;
+}
+
+static void
+rbfree(Block *bp)
+{
+ bp->wp = bp->rp = bp->lim - Mps;
+ bp->flag &= ~(Bipck | Budpck | Btcpck | Bpktck);
+
+ ilock(&rblock);
+ bp->next = rbpool;
+ rbpool = bp;
+ iunlock(&rblock);
+}
+
+static void
+rtl8169promiscuous(void* arg, int on)
+{
+ Ether *edev;
+ Ctlr * ctlr;
+
+ edev = arg;
+ ctlr = edev->ctlr;
+ ilock(&ctlr->ilock);
+ ilock(&ctlr->reglock);
+
+ if(on)
+ ctlr->rcr |= Aap;
+ else
+ ctlr->rcr &= ~Aap;
+ csr32w(ctlr, Rcr, ctlr->rcr);
+ iunlock(&ctlr->reglock);
+ iunlock(&ctlr->ilock);
+}
+
+enum {
+ /* everyone else uses 0x04c11db7, but they both produce the same crc */
+ Etherpolybe = 0x04c11db6,
+ Bytemask = (1<<8) - 1,
+};
+
+static ulong
+ethercrcbe(uchar *addr, long len)
+{
+ int i, j;
+ ulong c, crc, carry;
+
+ crc = ~0UL;
+ for (i = 0; i < len; i++) {
+ c = addr[i];
+ for (j = 0; j < 8; j++) {
+ carry = ((crc & (1UL << 31))? 1: 0) ^ (c & 1);
+ crc <<= 1;
+ c >>= 1;
+ if (carry)
+ crc = (crc ^ Etherpolybe) | carry;
+ }
+ }
+ return crc;
+}
+
+static ulong
+swabl(ulong l)
+{
+ return l>>24 | (l>>8) & (Bytemask<<8) |
+ (l<<8) & (Bytemask<<16) | l<<24;
+}
+
+static void
+rtl8169multicast(void* ether, uchar *eaddr, int add)
+{
+ Ether *edev;
+ Ctlr *ctlr;
+
+ if (!add)
+ return; /* ok to keep receiving on old mcast addrs */
+
+ edev = ether;
+ ctlr = edev->ctlr;
+ ilock(&ctlr->ilock);
+ ilock(&ctlr->reglock);
+
+ ctlr->mchash |= 1ULL << (ethercrcbe(eaddr, Eaddrlen) >> 26);
+
+ ctlr->rcr |= Am;
+ csr32w(ctlr, Rcr, ctlr->rcr);
+
+ /* pci-e variants reverse the order of the hash byte registers */
+ if (ctlr->pcie) {
+ csr32w(ctlr, Mar0, swabl(ctlr->mchash>>32));
+ csr32w(ctlr, Mar0+4, swabl(ctlr->mchash));
+ } else {
+ csr32w(ctlr, Mar0, ctlr->mchash);
+ csr32w(ctlr, Mar0+4, ctlr->mchash>>32);
+ }
+
+ iunlock(&ctlr->reglock);
+ iunlock(&ctlr->ilock);
+}
+
+static long
+rtl8169ifstat(Ether* edev, void* a, long n, ulong offset)
+{
+ char *p;
+ Ctlr *ctlr;
+ Dtcc *dtcc;
+ int i, l, r, timeo;
+
+ ctlr = edev->ctlr;
+ qlock(&ctlr->slock);
+
+ p = nil;
+ if(waserror()){
+ qunlock(&ctlr->slock);
+ free(p);
+ nexterror();
+ }
+
+ /* copy hw statistics into ctlr->dtcc */
+ dtcc = ctlr->dtcc;
+ allcache->invse(dtcc, sizeof *dtcc);
+ ilock(&ctlr->reglock);
+ csr32w(ctlr, Dtccr+4, 0);
+ csr32w(ctlr, Dtccr, PCIWADDR(dtcc)|Cmd); /* initiate dma? */
+ for(timeo = 0; timeo < 1000; timeo++){
+ if(!(csr32r(ctlr, Dtccr) & Cmd))
+ break;
+ delay(1);
+ }
+ iunlock(&ctlr->reglock);
+ if(csr32r(ctlr, Dtccr) & Cmd)
+ error(Eio);
+
+ edev->oerrs = dtcc->txer;
+ edev->crcs = dtcc->rxer;
+ edev->frames = dtcc->fae;
+ edev->buffs = dtcc->misspkt;
+ edev->overflows = ctlr->txdu + ctlr->rdu;
+
+ if(n == 0){
+ qunlock(&ctlr->slock);
+ poperror();
+ return 0;
+ }
+
+ if((p = malloc(READSTR)) == nil)
+ error(Enomem);
+
+ l = snprint(p, READSTR, "TxOk: %llud\n", dtcc->txok);
+ l += snprint(p+l, READSTR-l, "RxOk: %llud\n", dtcc->rxok);
+ l += snprint(p+l, READSTR-l, "TxEr: %llud\n", dtcc->txer);
+ l += snprint(p+l, READSTR-l, "RxEr: %ud\n", dtcc->rxer);
+ l += snprint(p+l, READSTR-l, "MissPkt: %ud\n", dtcc->misspkt);
+ l += snprint(p+l, READSTR-l, "FAE: %ud\n", dtcc->fae);
+ l += snprint(p+l, READSTR-l, "Tx1Col: %ud\n", dtcc->tx1col);
+ l += snprint(p+l, READSTR-l, "TxMCol: %ud\n", dtcc->txmcol);
+ l += snprint(p+l, READSTR-l, "RxOkPh: %llud\n", dtcc->rxokph);
+ l += snprint(p+l, READSTR-l, "RxOkBrd: %llud\n", dtcc->rxokbrd);
+ l += snprint(p+l, READSTR-l, "RxOkMu: %ud\n", dtcc->rxokmu);
+ l += snprint(p+l, READSTR-l, "TxAbt: %ud\n", dtcc->txabt);
+ l += snprint(p+l, READSTR-l, "TxUndrn: %ud\n", dtcc->txundrn);
+
+ l += snprint(p+l, READSTR-l, "txdu: %ud\n", ctlr->txdu);
+ l += snprint(p+l, READSTR-l, "tcpf: %ud\n", ctlr->tcpf);
+ l += snprint(p+l, READSTR-l, "udpf: %ud\n", ctlr->udpf);
+ l += snprint(p+l, READSTR-l, "ipf: %ud\n", ctlr->ipf);
+ l += snprint(p+l, READSTR-l, "fovf: %ud\n", ctlr->fovf);
+ l += snprint(p+l, READSTR-l, "ierrs: %ud\n", ctlr->ierrs);
+ l += snprint(p+l, READSTR-l, "rer: %ud\n", ctlr->rer);
+ l += snprint(p+l, READSTR-l, "rdu: %ud\n", ctlr->rdu);
+ l += snprint(p+l, READSTR-l, "punlc: %ud\n", ctlr->punlc);
+ l += snprint(p+l, READSTR-l, "fovw: %ud\n", ctlr->fovw);
+
+ l += snprint(p+l, READSTR-l, "tcr: %#8.8ux\n", ctlr->tcr);
+ l += snprint(p+l, READSTR-l, "rcr: %#8.8ux\n", ctlr->rcr);
+ l += snprint(p+l, READSTR-l, "multicast: %ud\n", ctlr->mcast);
+
+ if(ctlr->mii != nil && ctlr->mii->curphy != nil){
+ l += snprint(p+l, READSTR, "phy: ");
+ for(i = 0; i < NMiiPhyr; i++){
+ if(i && ((i & 0x07) == 0))
+ l += snprint(p+l, READSTR-l, "\n ");
+ r = miimir(ctlr->mii, i);
+ l += snprint(p+l, READSTR-l, " %4.4ux", r);
+ }
+ snprint(p+l, READSTR-l, "\n");
+ }
+
+ n = readstr(offset, a, n, p);
+
+ qunlock(&ctlr->slock);
+ poperror();
+ free(p);
+
+ return n;
+}
+
+static void
+rtl8169halt(Ctlr* ctlr)
+{
+ ilock(&ctlr->reglock);
+ csr32w(ctlr, Timerint, 0);
+ csr8w(ctlr, Cr, 0);
+ csr16w(ctlr, Imr, 0);
+ csr16w(ctlr, Isr, ~0);
+ iunlock(&ctlr->reglock);
+}
+
+static int
+rtl8169reset(Ctlr* ctlr)
+{
+ u32int r;
+ int timeo;
+
+ /*
+ * Soft reset the controller.
+ */
+ ilock(&ctlr->reglock);
+ csr8w(ctlr, Cr, Rst);
+ for(r = timeo = 0; timeo < 1000; timeo++){
+ r = csr8r(ctlr, Cr);
+ if(!(r & Rst))
+ break;
+ delay(1);
+ }
+ iunlock(&ctlr->reglock);
+
+ rtl8169halt(ctlr);
+
+ if(r & Rst)
+ return -1;
+ return 0;
+}
+
+static void
+rtl8169shutdown(Ether *ether)
+{
+ rtl8169reset(ether->ctlr);
+}
+
+static int
+rtl8169replenish(Ether *edev)
+{
+ int rdt;
+ Block *bp;
+ Ctlr *ctlr;
+ D *d;
+
+ ctlr = edev->ctlr;
+ if (ctlr->nrd == 0) {
+ iprint("rtl8169replenish: not yet initialised\n");
+ return -1;
+ }
+ rdt = ctlr->rdt;
+ assert(ctlr->rb);
+ assert(ctlr->rd);
+ while(NEXT(rdt, ctlr->nrd) != ctlr->rdh){
+ d = &ctlr->rd[rdt];
+ if (d == nil)
+ panic("rtl8169replenish: nil ctlr->rd[%d]", rdt);
+ if (d->control & Own) { /* ctlr owns it? shouldn't happen */
+ iprint("replenish: descriptor owned by hw\n");
+ break;
+ }
+ if(ctlr->rb[rdt] == nil){
+ bp = rballoc();
+ if(bp == nil){
+ iprint("rtl8169: no available buffers\n");
+ break;
+ }
+ ctlr->rb[rdt] = bp;
+ d->addrhi = 0;
+ coherence();
+ d->addrlo = PCIWADDR(bp->rp);
+ coherence();
+ } else
+ iprint("8169: replenish: rx overrun\n");
+ d->control = (d->control & ~RxflMASK) | Mps | Own;
+ coherence();
+
+ rdt = NEXT(rdt, ctlr->nrd);
+ ctlr->nrdfree++;
+ }
+ ctlr->rdt = rdt;
+ coherence();
+ return 0;
+}
+
+static void
+ckrderrs(Ctlr *ctlr, Block *bp, ulong control)
+{
+ if(control & Fovf)
+ ctlr->fovf++;
+ if(control & Mar)
+ ctlr->mcast++;
+
+ switch(control & (Pid1|Pid0)){
+ case Pid0:
+ if(control & Tcpf){
+ iprint("8169: bad tcp checksum\n");
+ ctlr->tcpf++;
+ break;
+ }
+ bp->flag |= Btcpck;
+ break;
+ case Pid1:
+ if(control & Udpf){
+ iprint("8169: bad udp checksum\n");
+ ctlr->udpf++;
+ break;
+ }
+ bp->flag |= Budpck;
+ break;
+ case Pid1|Pid0:
+ if(control & Ipf){
+ iprint("8169: bad ip checksum\n");
+ ctlr->ipf++;
+ break;
+ }
+ bp->flag |= Bipck;
+ break;
+ }
+}
+
+static void
+badpkt(Ether *edev, int rdh, ulong control)
+{
+ Ctlr *ctlr;
+
+ ctlr = edev->ctlr;
+ /* Res is only valid if Fs is set */
+ if(control & Res)
+ iprint("8169: rcv error; d->control %#.8lux\n", control);
+ else if (control == 0) { /* buggered? */
+ if (edev->link)
+ iprint("8169: rcv: d->control==0 (wtf?)\n");
+ } else {
+ ctlr->frag++;
+ iprint("8169: rcv'd frag; d->control %#.8lux\n", control);
+ }
+ if (ctlr->rb[rdh])
+ freeb(ctlr->rb[rdh]);
+}
+
+void
+qpkt(Ether *edev, int rdh, ulong control)
+{
+ int len;
+ Block *bp;
+ Ctlr *ctlr;
+
+ ctlr = edev->ctlr;
+ len = (control & RxflMASK) - 4;
+ if ((uint)len > Mps)
+ if (len < 0)
+ panic("8169: received pkt non-existent");
+ else if (len > Mps)
+ panic("8169: received pkt too big");
+ bp = ctlr->rb[rdh];
+ bp->wp = bp->rp + len;
+ bp->next = nil;
+
+ allcache->invse(bp->rp, len); /* clear any stale cached packet */
+ ckrderrs(ctlr, bp, control);
+ etheriq(edev, bp, 1);
+
+ if(Debug > 1)
+ iprint("R%d ", len);
+}
+
+static int
+pktstoread(void* v)
+{
+ Ctlr *ctlr = v;
+
+ return ctlr->isr & (Fovw|Rdu|Rer|Rok) &&
+ !(ctlr->rd[ctlr->rdh].control & Own);
+}
+
+static void
+rproc(void* arg)
+{
+ int rdh;
+ ulong control;
+ Ctlr *ctlr;
+ D *rd;
+ Ether *edev;
+
+ edev = arg;
+ ctlr = edev->ctlr;
+ for(;;){
+ /* wait for next interrupt */
+ ilock(&ctlr->reglock);
+ ctlr->imr |= Fovw|Rdu|Rer|Rok;
+ csr16w(ctlr, Imr, ctlr->imr);
+ iunlock(&ctlr->reglock);
+
+ sleep(&ctlr->rrendez, pktstoread, ctlr);
+
+ /* clear saved isr bits */
+ ilock(&ctlr->reglock);
+ ctlr->isr &= ~(Fovw|Rdu|Rer|Rok);
+ iunlock(&ctlr->reglock);
+
+ rdh = ctlr->rdh;
+ for (rd = &ctlr->rd[rdh]; !(rd->control & Own);
+ rd = &ctlr->rd[rdh]){
+ control = rd->control;
+ if((control & (Fs|Ls|Res)) == (Fs|Ls))
+ qpkt(edev, rdh, control);
+ else
+ badpkt(edev, rdh, control);
+ ctlr->rb[rdh] = nil;
+ coherence();
+ rd->control &= Eor;
+ coherence();
+
+ ctlr->nrdfree--;
+ rdh = NEXT(rdh, ctlr->nrd);
+ if(ctlr->nrdfree < ctlr->nrd/2) {
+ /* replenish reads ctlr->rdh */
+ ctlr->rdh = rdh;
+ rtl8169replenish(edev);
+ /* if replenish called restart, rdh is reset */
+ rdh = ctlr->rdh;
+ }
+ }
+ ctlr->rdh = rdh;
+ }
+}
+
+static int
+pktstosend(void* v)
+{
+ Ether *edev = v;
+ Ctlr *ctlr = edev->ctlr;
+
+ return ctlr->isr & (Ter|Tok) &&
+ !(ctlr->td[ctlr->tdh].control & Own) && edev->link;
+}
+
+static void
+tproc(void* arg)
+{
+ int x, len;
+ Block *bp;
+ Ctlr *ctlr;
+ D *d;
+ Ether *edev;
+
+ edev = arg;
+ ctlr = edev->ctlr;
+ for(;;){
+ /* wait for next interrupt */
+ ilock(&ctlr->reglock);
+ ctlr->imr |= Ter|Tok;
+ csr16w(ctlr, Imr, ctlr->imr);
+ iunlock(&ctlr->reglock);
+
+ sleep(&ctlr->trendez, pktstosend, edev);
+
+ /* clear saved isr bits */
+ ilock(&ctlr->reglock);
+ ctlr->isr &= ~(Ter|Tok);
+ iunlock(&ctlr->reglock);
+
+ /* reclaim transmitted Blocks */
+ for(x = ctlr->tdh; ctlr->ntq > 0; x = NEXT(x, ctlr->ntd)){
+ d = &ctlr->td[x];
+ if(d == nil || d->control & Own)
+ break;
+
+ /*
+ * Free it up.
+ * Need to clean the descriptor here? Not really.
+ * Simple freeb for now (no chain and freeblist).
+ * Use ntq count for now.
+ */
+ freeb(ctlr->tb[x]);
+ ctlr->tb[x] = nil;
+ d->control &= Eor;
+ coherence();
+
+ ctlr->ntq--;
+ }
+ ctlr->tdh = x;
+
+ if (ctlr->ntq > 0)
+ csr8w(ctlr, Tppoll, Npq); /* kick xmiter to keep it going */
+ /* copy as much of my output q as possible into output ring */
+ x = ctlr->tdt;
+ while(ctlr->ntq < (ctlr->ntd-1)){
+ if((bp = qget(edev->oq)) == nil)
+ break;
+
+ /* make sure the whole packet is in ram */
+ len = BLEN(bp);
+ allcache->wbse(bp->rp, len);
+
+ d = &ctlr->td[x];
+ assert(d);
+ assert(!(d->control & Own));
+ d->addrhi = 0;
+ d->addrlo = PCIWADDR(bp->rp);
+ ctlr->tb[x] = bp;
+ coherence();
+ d->control = (d->control & ~TxflMASK) |
+ Own | Fs | Ls | len;
+ coherence();
+
+ if(Debug > 1)
+ iprint("T%d ", len);
+
+ x = NEXT(x, ctlr->ntd);
+ ctlr->ntq++;
+
+ ctlr->tdt = x;
+ coherence();
+ csr8w(ctlr, Tppoll, Npq); /* kick xmiter again */
+ }
+ if(x != ctlr->tdt){ /* added new packet(s)? */
+ ctlr->tdt = x;
+ coherence();
+ csr8w(ctlr, Tppoll, Npq);
+ }
+ else if(ctlr->ntq >= (ctlr->ntd-1))
+ ctlr->txdu++;
+ }
+}
+
+static int
+rtl8169init(Ether* edev)
+{
+ u32int r;
+ Ctlr *ctlr;
+ ushort cplusc;
+
+ ctlr = edev->ctlr;
+ ilock(&ctlr->ilock);
+ rtl8169reset(ctlr);
+
+ ilock(&ctlr->reglock);
+ switch(ctlr->pciv){
+ case Rtl8169sc:
+ csr8w(ctlr, Cr, 0);
+ break;
+ case Rtl8168b:
+ case Rtl8169c:
+ /* 8168b manual says set c+ reg first, then command */
+ csr16w(ctlr, Cplusc, 0x2000); /* magic */
+ csr8w(ctlr, Cr, 0);
+ break;
+ }
+
+ /*
+ * MAC Address is not settable on some (all?) chips.
+ * Must put chip into config register write enable mode.
+ */
+ csr8w(ctlr, Cr9346, Eem1|Eem0);
+
+ /*
+ * Transmitter.
+ */
+ memset(ctlr->td, 0, sizeof(D)*ctlr->ntd);
+ ctlr->tdh = ctlr->tdt = 0;
+ ctlr->ntq = 0;
+ ctlr->td[ctlr->ntd-1].control = Eor;
+
+ /*
+ * Receiver.
+ * Need to do something here about the multicast filter.
+ */
+ memset(ctlr->rd, 0, sizeof(D)*ctlr->nrd);
+ ctlr->nrdfree = ctlr->rdh = ctlr->rdt = 0;
+ ctlr->rd[ctlr->nrd-1].control = Eor;
+
+ rtl8169replenish(edev);
+
+ switch(ctlr->pciv){
+ default:
+ ctlr->rcr = Rxfthnone|Mrxdmaunlimited|Ab|Apm;
+ break;
+ case Rtl8168b:
+ case Rtl8169c:
+ ctlr->rcr = Rxfthnone|6<<MrxdmaSHIFT|Ab|Apm; /* DMA max 1024 */
+ break;
+ }
+
+ /*
+ * Setting Mulrw in Cplusc disables the Tx/Rx DMA burst
+ * settings in Tcr/Rcr; the (1<<14) is magic.
+ */
+ cplusc = csr16r(ctlr, Cplusc) & ~(1<<14);
+ switch(ctlr->pciv){
+ case Rtl8168b:
+ case Rtl8169c:
+ cplusc |= Pktcntoff | Init1;
+ break;
+ }
+ cplusc |= /*Rxchksum|*/Mulrw;
+ switch(ctlr->macv){
+ default:
+ panic("ether8169: unknown macv %#08ux for vid %#ux did %#ux",
+ ctlr->macv, ctlr->pcidev->vid, ctlr->pcidev->did);
+ case Macv01:
+ break;
+ case Macv02:
+ case Macv03:
+ cplusc |= 1<<14; /* magic */
+ break;
+ case Macv05:
+ /*
+ * This is interpreted from clearly bogus code
+ * in the manufacturer-supplied driver, it could
+ * be wrong. Untested.
+ */
+ r = csr8r(ctlr, Config2) & 0x07;
+ if(r == 0x01) /* 66MHz PCI */
+ csr32w(ctlr, 0x7C, 0x0007FFFF); /* magic */
+ else
+ csr32w(ctlr, 0x7C, 0x0007FF00); /* magic */
+ pciclrmwi(ctlr->pcidev);
+ break;
+ case Macv13:
+ /*
+ * This is interpreted from clearly bogus code
+ * in the manufacturer-supplied driver, it could
+ * be wrong. Untested.
+ */
+ pcicfgw8(ctlr->pcidev, 0x68, 0x00); /* magic */
+ pcicfgw8(ctlr->pcidev, 0x69, 0x08); /* magic */
+ break;
+ case Macv04:
+ case Macv07:
+ case Macv07a:
+ case Macv11:
+ case Macv12:
+ case Macv12a:
+ case Macv14:
+ case Macv15:
+ case Macv25:
+ break;
+ }
+
+ /*
+ * Enable receiver/transmitter.
+ * Need to do this first or some of the settings below
+ * won't take.
+ */
+ switch(ctlr->pciv){
+ default:
+ csr8w(ctlr, Cr, Te|Re);
+ csr32w(ctlr, Tcr, Ifg1|Ifg0|Mtxdmaunlimited);
+ csr32w(ctlr, Rcr, ctlr->rcr);
+ break;
+ case Rtl8169sc:
+ case Rtl8168b:
+ break;
+ }
+ ctlr->mchash = 0;
+ csr32w(ctlr, Mar0, 0);
+ csr32w(ctlr, Mar0+4, 0);
+
+ /*
+ * Interrupts.
+ * Disable Tdu for now, the transmit routine will tidy.
+ * Tdu means the NIC ran out of descriptors to send (i.e., the
+ * output ring is empty), so it doesn't really need to ever be on.
+ *
+ * The timer runs at the PCI(-E) clock frequency, 125MHz for PCI-E,
+ * presumably 66MHz for PCI. Thus the units for PCI-E controllers
+ * (e.g., 8168) are 8ns, and only the buggy 8168 seems to need to use
+ * timeouts to keep from stalling.
+ */
+ csr32w(ctlr, Tctr, 0);
+ /* Tok makes the whole system run faster */
+ ctlr->imr = Serr|Fovw|Punlc|Rdu|Ter|Tok|Rer|Rok;
+ switch(ctlr->pciv){
+ case Rtl8169sc:
+ case Rtl8168b:
+ /* alleged workaround for rx fifo overflow on 8168[bd] */
+ ctlr->imr &= ~Rdu;
+ break;
+ }
+ csr16w(ctlr, Imr, ctlr->imr);
+
+ /*
+ * Clear missed-packet counter;
+ * clear early transmit threshold value;
+ * set the descriptor ring base addresses;
+ * set the maximum receive packet size;
+ * no early-receive interrupts.
+ *
+ * note: the maximum rx size is a filter. the size of the buffer
+ * in the descriptor ring is still honored. we will toss >Mtu
+ * packets because they've been fragmented into multiple
+ * rx buffers.
+ */
+ csr32w(ctlr, Mpc, 0);
+ if (ctlr->pcie)
+ csr8w(ctlr, Mtps, Mps / 128);
+ else
+ csr8w(ctlr, Etx, 0x3f); /* max; no early transmission */
+ csr32w(ctlr, Tnpds+4, 0);
+ csr32w(ctlr, Tnpds, PCIWADDR(ctlr->td));
+ csr32w(ctlr, Rdsar+4, 0);
+ csr32w(ctlr, Rdsar, PCIWADDR(ctlr->rd));
+ csr16w(ctlr, Rms, 2048); /* was Mps; see above comment */
+ r = csr16r(ctlr, Mulint) & 0xF000; /* no early rx interrupts */
+ csr16w(ctlr, Mulint, r);
+ csr16w(ctlr, Cplusc, cplusc);
+ csr16w(ctlr, Coal, 0);
+
+ /*
+ * Set configuration.
+ */
+ switch(ctlr->pciv){
+ case Rtl8169sc:
+ csr8w(ctlr, Cr, Te|Re);
+ csr32w(ctlr, Tcr, Ifg1|Ifg0|Mtxdmaunlimited);
+ csr32w(ctlr, Rcr, ctlr->rcr);
+ break;
+ case Rtl8168b:
+ case Rtl8169c:
+ csr16w(ctlr, Cplusc, 0x2000); /* magic */
+ csr8w(ctlr, Cr, Te|Re);
+ csr32w(ctlr, Tcr, Ifg1|Ifg0|6<<MtxdmaSHIFT); /* DMA max 1024 */
+ csr32w(ctlr, Rcr, ctlr->rcr);
+ break;
+ }
+ ctlr->tcr = csr32r(ctlr, Tcr);
+ csr8w(ctlr, Cr9346, 0);
+
+ iunlock(&ctlr->reglock);
+ iunlock(&ctlr->ilock);
+
+// rtl8169mii(ctlr);
+
+ return 0;
+}
+
+static void
+rtl8169attach(Ether* edev)
+{
+ int timeo, s, i;
+ char name[KNAMELEN];
+ Block *bp;
+ Ctlr *ctlr;
+
+ ctlr = edev->ctlr;
+ s = splhi();
+ qlock(&ctlr->alock);
+ if(ctlr->init || waserror()) {
+ qunlock(&ctlr->alock);
+ splx(s);
+ return;
+ }
+ ctlr->td = ucallocalign(sizeof(D)*Ntd, 256, 0);
+ ctlr->tb = malloc(Ntd*sizeof(Block*));
+ ctlr->ntd = Ntd;
+
+ ctlr->rd = ucallocalign(sizeof(D)*Nrd, 256, 0);
+ ctlr->rb = malloc(Nrd*sizeof(Block*));
+ ctlr->nrd = Nrd;
+
+ ctlr->dtcc = mallocalign(sizeof(Dtcc), 64, 0, 0);
+ if(waserror()){
+ free(ctlr->td);
+ free(ctlr->tb);
+ free(ctlr->rd);
+ free(ctlr->rb);
+ free(ctlr->dtcc);
+ nexterror();
+ }
+ if(ctlr->td == nil || ctlr->tb == nil || ctlr->rd == nil ||
+ ctlr->rb == nil || ctlr->dtcc == nil)
+ error(Enomem);
+
+ /* allocate private receive-buffer pool */
+ ctlr->nrb = Nrb;
+ for(i = 0; i < Nrb; i++){
+ if((bp = allocb(Mps)) == nil)
+ error(Enomem);
+ bp->free = rbfree;
+ freeb(bp);
+ }
+
+ rtl8169init(edev);
+ ctlr->init = 1;
+ qunlock(&ctlr->alock);
+ splx(s);
+ poperror(); /* free */
+ poperror(); /* qunlock */
+
+ /* signal secondary cpus that l1 ptes are stable */
+ l1ptstable.word = 1;
+ allcache->wbse(&l1ptstable, sizeof l1ptstable);
+
+ s = spllo();
+ /* Don't wait long for link to be ready. */
+ for(timeo = 0; timeo < 50 && miistatus(ctlr->mii) != 0; timeo++)
+// tsleep(&up->sleep, return0, 0, 100); /* fewer miistatus msgs */
+ delay(100);
+
+ while (!edev->link)
+ tsleep(&up->sleep, return0, 0, 10);
+ splx(s);
+
+ snprint(name, KNAMELEN, "#l%drproc", edev->ctlrno);
+ kproc(name, rproc, edev);
+
+ snprint(name, KNAMELEN, "#l%dtproc", edev->ctlrno);
+ kproc(name, tproc, edev);
+}
+
+/* call with ctlr->reglock held */
+static void
+rtl8169link(Ether* edev)
+{
+ uint r;
+ int limit;
+ Ctlr *ctlr;
+
+ ctlr = edev->ctlr;
+
+ if(!((r = csr8r(ctlr, Phystatus)) & Linksts)){
+ if (edev->link) {
+ edev->link = 0;
+ csr8w(ctlr, Cr, Re);
+ iprint("#l%d: link down\n", edev->ctlrno);
+ }
+ return;
+ }
+ if (edev->link == 0) {
+ edev->link = 1;
+ csr8w(ctlr, Cr, Te|Re);
+ iprint("#l%d: link up\n", edev->ctlrno);
+ }
+ limit = 256*1024;
+ if(r & Speed10){
+ edev->mbps = 10;
+ limit = 65*1024;
+ } else if(r & Speed100)
+ edev->mbps = 100;
+ else if(r & Speed1000)
+ edev->mbps = 1000;
+
+ if(edev->oq != nil)
+ qsetlimit(edev->oq, limit);
+}
+
+static void
+rtl8169transmit(Ether* edev)
+{
+ Ctlr *ctlr;
+
+ ctlr = edev->ctlr;
+ if (ctlr == nil || ctlr->ntd == 0) {
+ iprint("rtl8169transmit: not yet initialised\n");
+ return;
+ }
+ wakeup(&ctlr->trendez);
+}
+
+/*
+ * the controller has lost its mind, so reset it.
+ * call with ctlr->reglock held.
+ */
+static void
+restart(Ether *edev, char *why)
+{
+ int i, s, del;
+ Ctlr *ctlr;
+ static int inrestart;
+ static Lock rstrtlck;
+
+ /* keep other cpus out */
+ s = splhi();
+ if (inrestart) {
+ splx(s);
+ return;
+ }
+ ilock(&rstrtlck);
+
+ ctlr = edev->ctlr;
+ if (ctlr == nil || !ctlr->init) {
+ iunlock(&rstrtlck);
+ splx(s);
+ return;
+ }
+
+ if (Debug)
+ iprint("#l%d: restart due to %s\n", edev->ctlrno, why);
+ inrestart = 1;
+
+ /* process any pkts in the rings */
+ wakeup(&ctlr->rrendez);
+ coherence();
+ rtl8169transmit(edev);
+ /* allow time to drain 1024-buffer ring */
+ for (del = 0; del < 13 && ctlr->ntq > 0; del++)
+ delay(1);
+
+ iunlock(&ctlr->reglock);
+ rtl8169reset(ctlr);
+ /* free any remaining unprocessed input buffers */
+ for (i = 0; i < ctlr->nrd; i++) {
+ freeb(ctlr->rb[i]);
+ ctlr->rb[i] = nil;
+ }
+ rtl8169init(edev);
+ ilock(&ctlr->reglock);
+
+ rtl8169link(edev);
+ rtl8169transmit(edev); /* drain any output queue */
+ wakeup(&ctlr->rrendez);
+
+ inrestart = 0;
+
+ iunlock(&rstrtlck);
+ splx(s);
+}
+
+static ulong
+rcvdiag(Ether *edev, ulong isr)
+{
+ Ctlr *ctlr;
+
+ ctlr = edev->ctlr;
+ if(!(isr & (Punlc|Rok)))
+ ctlr->ierrs++;
+ if(isr & Rer)
+ ctlr->rer++;
+ if(isr & Rdu)
+ ctlr->rdu++;
+ if(isr & Punlc)
+ ctlr->punlc++;
+ if(isr & Fovw)
+ ctlr->fovw++;
+ if (isr & (Fovw|Rdu|Rer)) {
+ if (isr & ~(Tdu|Tok|Rok)) /* harmless */
+ iprint("#l%d: isr %8.8#lux\n", edev->ctlrno, isr);
+ restart(edev, "rcv error");
+ isr = ~0;
+ }
+ return isr;
+}
+
+void
+rtl8169interrupt(Ureg*, void* arg)
+{
+ Ctlr *ctlr;
+ Ether *edev;
+ u32int isr;
+
+ edev = arg;
+ ctlr = edev->ctlr;
+ ilock(&ctlr->reglock);
+
+ while((isr = csr16r(ctlr, Isr)) != 0 && isr != 0xFFFF){
+ ctlr->isr |= isr; /* merge bits for [rt]proc */
+ csr16w(ctlr, Isr, isr); /* dismiss? */
+ if((isr & ctlr->imr) == 0)
+ break;
+ if(isr & Fovw && ctlr->pciv == Rtl8168b) {
+ /*
+ * Fovw means we got behind; relatively common on 8168.
+ * this is a big hammer, but it gets things going again.
+ */
+ ctlr->fovw++;
+ restart(edev, "rx fifo overrun");
+ break;
+ }
+ if(isr & (Fovw|Punlc|Rdu|Rer|Rok)) {
+ ctlr->imr &= ~(Fovw|Rdu|Rer|Rok);
+ csr16w(ctlr, Imr, ctlr->imr);
+ wakeup(&ctlr->rrendez);
+
+ if (isr & (Fovw|Punlc|Rdu|Rer)) {
+ isr = rcvdiag(edev, isr);
+ if (isr == ~0)
+ break; /* restarted */
+ }
+ isr &= ~(Fovw|Rdu|Rer|Rok);
+ }
+ if(isr & (Ter|Tok)){
+ ctlr->imr &= ~(Ter|Tok);
+ csr16w(ctlr, Imr, ctlr->imr);
+ wakeup(&ctlr->trendez);
+
+ if (isr & Ter)
+ iprint("xmit err; isr %8.8#ux\n", isr);
+ isr &= ~(Ter|Tok);
+ }
+
+ if(isr & Punlc){
+ rtl8169link(edev);
+ isr &= ~Punlc;
+ }
+
+ /*
+ * Some of the reserved bits get set sometimes...
+ */
+ if(isr & (Serr|Fovw|Punlc|Rdu|Ter|Tok|Rer|Rok))
+ panic("rtl8169interrupt: imr %#4.4ux isr %#4.4ux",
+ csr16r(ctlr, Imr), isr);
+ }
+ if (edev->link && ctlr->ntq > 0)
+ csr8w(ctlr, Tppoll, Npq); /* kick xmiter to keep it going */
+ iunlock(&ctlr->reglock);
+ /*
+ * extinguish pci-e controller interrupt source.
+ * should be done more cleanly.
+ */
+ if (ctlr->pcie)
+ pcieintrdone();
+}
+
+int
+vetmacv(Ctlr *ctlr, uint *macv)
+{
+ *macv = csr32r(ctlr, Tcr) & HwveridMASK;
+ switch(*macv){
+ default:
+ return -1;
+ case Macv01:
+ case Macv02:
+ case Macv03:
+ case Macv04:
+ case Macv05:
+ case Macv07:
+ case Macv07a:
+ case Macv11:
+ case Macv12:
+ case Macv12a:
+ case Macv13:
+ case Macv14:
+ case Macv15:
+ case Macv25:
+ break;
+ }
+ return 0;
+}
+
+static void
+rtl8169pci(void)
+{
+ Pcidev *p;
+ Ctlr *ctlr;
+ int i, pcie;
+ uint macv, bar;
+ void *mem;
+
+ p = nil;
+ while(p = pcimatch(p, 0, 0)){
+ if(p->ccrb != 0x02 || p->ccru != 0)
+ continue;
+
+ pcie = 0;
+ switch(i = ((p->did<<16)|p->vid)){
+ default:
+ continue;
+ case Rtl8100e: /* RTL810[01]E ? */
+ case Rtl8168b: /* RTL8168B */
+ pcie = 1;
+ break;
+ case Rtl8169c: /* RTL8169C */
+ case Rtl8169sc: /* RTL8169SC */
+ case Rtl8169: /* RTL8169 */
+ break;
+ case (0xC107<<16)|0x1259: /* Corega CG-LAPCIGT */
+ i = Rtl8169;
+ break;
+ }
+
+ bar = p->mem[2].bar & ~0x0F;
+ assert(bar != 0);
+ assert(!(p->mem[2].bar & Barioaddr));
+ if(0) iprint("rtl8169: %d-bit register accesses\n",
+ ((p->mem[2].bar >> Barwidthshift) & Barwidthmask) ==
+ Barwidth32? 32: 64);
+ mem = (void *)bar; /* don't need to vmap on trimslice */
+ if(mem == 0){
+ print("rtl8169: can't map %#ux\n", bar);
+ continue;
+ }
+ ctlr = malloc(sizeof(Ctlr));
+ if(ctlr == nil)
+ error(Enomem);
+ ctlr->nic = mem;
+ ctlr->port = bar;
+ ctlr->pcidev = p;
+ ctlr->pciv = i;
+ ctlr->pcie = pcie;
+
+ if(vetmacv(ctlr, &macv) == -1){
+ free(ctlr);
+ print("rtl8169: unknown mac %.4ux %.8ux\n", p->did, macv);
+ continue;
+ }
+
+ if(pcigetpms(p) > 0){
+ pcisetpms(p, 0);
+
+ for(i = 0; i < 6; i++)
+ pcicfgw32(p, PciBAR0+i*4, p->mem[i].bar);
+ pcicfgw8(p, PciINTL, p->intl);
+ pcicfgw8(p, PciLTR, p->ltr);
+ pcicfgw8(p, PciCLS, p->cls);
+ pcicfgw16(p, PciPCR, p->pcr);
+ }
+
+ if(rtl8169reset(ctlr)){
+ free(ctlr);
+ continue;
+ }
+
+ /*
+ * Extract the chip hardware version,
+ * needed to configure each properly.
+ */
+ ctlr->macv = macv;
+
+ rtl8169mii(ctlr);
+ pcisetbme(p);
+
+ if(rtl8169ctlrhead != nil)
+ rtl8169ctlrtail->next = ctlr;
+ else
+ rtl8169ctlrhead = ctlr;
+ rtl8169ctlrtail = ctlr;
+ }
+}
+
+static int
+rtl8169pnp(Ether* edev)
+{
+ u32int r;
+ Ctlr *ctlr;
+ uchar ea[Eaddrlen];
+ static int once;
+
+ if(once == 0){
+ once = 1;
+ rtl8169pci();
+ }
+
+ /*
+ * Any adapter matches if no edev->port is supplied,
+ * otherwise the ports must match.
+ */
+ for(ctlr = rtl8169ctlrhead; ctlr != nil; ctlr = ctlr->next){
+ if(ctlr->active)
+ continue;
+ if(edev->port == 0 || edev->port == ctlr->port){
+ ctlr->active = 1;
+ break;
+ }
+ }
+ if(ctlr == nil)
+ return -1;
+
+ edev->ctlr = ctlr;
+ ctlr->ether = edev;
+ edev->port = ctlr->port;
+// edev->irq = ctlr->pcidev->intl; /* incorrect on trimslice */
+ edev->irq = Pcieirq; /* trimslice: non-msi pci-e intr */
+ edev->tbdf = ctlr->pcidev->tbdf;
+ edev->mbps = 1000;
+ edev->maxmtu = Mtu;
+
+ /*
+ * Check if the adapter's station address is to be overridden.
+ * If not, read it from the device and set in edev->ea.
+ */
+ memset(ea, 0, Eaddrlen);
+ if(memcmp(ea, edev->ea, Eaddrlen) == 0){
+ r = csr32r(ctlr, Idr0);
+ edev->ea[0] = r;
+ edev->ea[1] = r>>8;
+ edev->ea[2] = r>>16;
+ edev->ea[3] = r>>24;
+ r = csr32r(ctlr, Idr0+4);
+ edev->ea[4] = r;
+ edev->ea[5] = r>>8;
+ }
+
+ edev->attach = rtl8169attach;
+ edev->transmit = rtl8169transmit;
+ edev->interrupt = rtl8169interrupt;
+ edev->ifstat = rtl8169ifstat;
+
+ edev->arg = edev;
+ edev->promiscuous = rtl8169promiscuous;
+ edev->multicast = rtl8169multicast;
+ edev->shutdown = rtl8169shutdown;
+
+ ilock(&ctlr->reglock);
+ rtl8169link(edev);
+ iunlock(&ctlr->reglock);
+ return 0;
+}
+
+void
+ether8169link(void)
+{
+ addethercard("rtl8169", rtl8169pnp);
+}
diff --git a/sys/src/9/teg2/etherif.h b/sys/src/9/teg2/etherif.h
new file mode 100644
index 000000000..bae31be07
--- /dev/null
+++ b/sys/src/9/teg2/etherif.h
@@ -0,0 +1,42 @@
+enum
+{
+ MaxEther = 4,
+ Ntypes = 8,
+};
+
+typedef struct Ether Ether;
+struct Ether {
+ RWlock;
+ ISAConf; /* hardware info */
+
+ int ctlrno;
+ ulong tbdf;
+ int minmtu;
+ int maxmtu;
+
+ Netif;
+
+ void (*attach)(Ether*); /* filled in by reset routine */
+ void (*detach)(Ether*);
+ void (*transmit)(Ether*);
+ void (*interrupt)(Ureg*, void*);
+ long (*ifstat)(Ether*, void*, long, ulong);
+ long (*ctl)(Ether*, void*, long); /* custom ctl messages */
+ void (*power)(Ether*, int); /* power on/off */
+ void (*shutdown)(Ether*); /* shutdown hardware before reboot */
+
+ void* ctlr;
+ uchar ea[Eaddrlen];
+ void* address;
+ int irq;
+
+ Queue* oq;
+};
+
+extern Block* etheriq(Ether*, Block*, int);
+extern void addethercard(char*, int(*)(Ether*));
+extern ulong ethercrc(uchar*, int);
+extern int parseether(uchar*, char*);
+
+#define NEXT(x, l) (((x)+1)%(l))
+#define PREV(x, l) (((x) == 0) ? (l)-1: (x)-1)
diff --git a/sys/src/9/teg2/ethermii.c b/sys/src/9/teg2/ethermii.c
new file mode 100644
index 000000000..90b219b3f
--- /dev/null
+++ b/sys/src/9/teg2/ethermii.c
@@ -0,0 +1,235 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "../port/error.h"
+#include "../port/netif.h"
+
+#include "etherif.h"
+#include "ethermii.h"
+
+int
+mii(Mii* mii, int mask)
+{
+ MiiPhy *miiphy;
+ int bit, oui, phyno, r, rmask;
+
+ /*
+ * Probe through mii for PHYs in mask;
+ * return the mask of those found in the current probe.
+ * If the PHY has not already been probed, update
+ * the Mii information.
+ */
+ rmask = 0;
+ for(phyno = 0; phyno < NMiiPhy; phyno++){
+ bit = 1<<phyno;
+ if(!(mask & bit))
+ continue;
+ if(mii->mask & bit){
+ rmask |= bit;
+ continue;
+ }
+ if(mii->mir(mii, phyno, Bmsr) == -1)
+ continue;
+ r = mii->mir(mii, phyno, Phyidr1);
+ oui = (r & 0x3FFF)<<6;
+ r = mii->mir(mii, phyno, Phyidr2);
+ oui |= r>>10;
+ if(oui == 0xFFFFF || oui == 0)
+ continue;
+
+ if((miiphy = malloc(sizeof(MiiPhy))) == nil)
+ continue;
+
+ miiphy->mii = mii;
+ miiphy->oui = oui;
+ miiphy->phyno = phyno;
+
+ miiphy->anar = ~0;
+ miiphy->fc = ~0;
+ miiphy->mscr = ~0;
+
+ mii->phy[phyno] = miiphy;
+ if(mii->curphy == nil)
+ mii->curphy = miiphy;
+ mii->mask |= bit;
+ mii->nphy++;
+
+ rmask |= bit;
+ }
+ return rmask;
+}
+
+int
+miimir(Mii* mii, int r)
+{
+ if(mii == nil || mii->ctlr == nil || mii->curphy == nil)
+ return -1;
+ return mii->mir(mii, mii->curphy->phyno, r);
+}
+
+int
+miimiw(Mii* mii, int r, int data)
+{
+ if(mii == nil || mii->ctlr == nil || mii->curphy == nil)
+ return -1;
+ return mii->miw(mii, mii->curphy->phyno, r, data);
+}
+
+int
+miireset(Mii* mii)
+{
+ int bmcr;
+
+ if(mii == nil || mii->ctlr == nil || mii->curphy == nil)
+ return -1;
+ bmcr = mii->mir(mii, mii->curphy->phyno, Bmcr);
+ bmcr |= BmcrR;
+ mii->miw(mii, mii->curphy->phyno, Bmcr, bmcr);
+ microdelay(1);
+
+ return 0;
+}
+
+int
+miiane(Mii* mii, int a, int p, int e)
+{
+ int anar, bmsr, mscr, r, phyno;
+
+ if(mii == nil || mii->ctlr == nil || mii->curphy == nil)
+ return -1;
+ phyno = mii->curphy->phyno;
+
+ bmsr = mii->mir(mii, phyno, Bmsr);
+ if(!(bmsr & BmsrAna))
+ return -1;
+
+ if(a != ~0)
+ anar = (AnaTXFD|AnaTXHD|Ana10FD|Ana10HD) & a;
+ else if(mii->curphy->anar != ~0)
+ anar = mii->curphy->anar;
+ else{
+ anar = mii->mir(mii, phyno, Anar);
+ anar &= ~(AnaAP|AnaP|AnaT4|AnaTXFD|AnaTXHD|Ana10FD|Ana10HD);
+ if(bmsr & Bmsr10THD)
+ anar |= Ana10HD;
+ if(bmsr & Bmsr10TFD)
+ anar |= Ana10FD;
+ if(bmsr & Bmsr100TXHD)
+ anar |= AnaTXHD;
+ if(bmsr & Bmsr100TXFD)
+ anar |= AnaTXFD;
+ }
+ mii->curphy->anar = anar;
+
+ if(p != ~0)
+ anar |= (AnaAP|AnaP) & p;
+ else if(mii->curphy->fc != ~0)
+ anar |= mii->curphy->fc;
+ mii->curphy->fc = (AnaAP|AnaP) & anar;
+
+ if(bmsr & BmsrEs){
+ mscr = mii->mir(mii, phyno, Mscr);
+ mscr &= ~(Mscr1000TFD|Mscr1000THD);
+ if(e != ~0)
+ mscr |= (Mscr1000TFD|Mscr1000THD) & e;
+ else if(mii->curphy->mscr != ~0)
+ mscr = mii->curphy->mscr;
+ else{
+ r = mii->mir(mii, phyno, Esr);
+ if(r & Esr1000THD)
+ mscr |= Mscr1000THD;
+ if(r & Esr1000TFD)
+ mscr |= Mscr1000TFD;
+ }
+ mii->curphy->mscr = mscr;
+ mii->miw(mii, phyno, Mscr, mscr);
+ }
+ mii->miw(mii, phyno, Anar, anar);
+
+ r = mii->mir(mii, phyno, Bmcr);
+ if(!(r & BmcrR)){
+ r |= BmcrAne|BmcrRan;
+ mii->miw(mii, phyno, Bmcr, r);
+ }
+
+ return 0;
+}
+
+int
+miistatus(Mii* mii)
+{
+ MiiPhy *phy;
+ int anlpar, bmsr, p, r, phyno;
+
+ if(mii == nil || mii->ctlr == nil || mii->curphy == nil)
+ return -1;
+ phy = mii->curphy;
+ phyno = phy->phyno;
+
+ /*
+ * Check Auto-Negotiation is complete and link is up.
+ * (Read status twice as the Ls bit is sticky).
+ */
+ bmsr = mii->mir(mii, phyno, Bmsr);
+ if(!(bmsr & (BmsrAnc|BmsrAna))) {
+ // print("miistatus: auto-neg incomplete\n");
+ return -1;
+ }
+
+ bmsr = mii->mir(mii, phyno, Bmsr);
+ if(!(bmsr & BmsrLs)){
+ // print("miistatus: link down\n");
+ phy->link = 0;
+ return -1;
+ }
+
+ phy->speed = phy->fd = phy->rfc = phy->tfc = 0;
+ if(phy->mscr){
+ r = mii->mir(mii, phyno, Mssr);
+ if((phy->mscr & Mscr1000TFD) && (r & Mssr1000TFD)){
+ phy->speed = 1000;
+ phy->fd = 1;
+ }
+ else if((phy->mscr & Mscr1000THD) && (r & Mssr1000THD))
+ phy->speed = 1000;
+ }
+
+ anlpar = mii->mir(mii, phyno, Anlpar);
+ if(phy->speed == 0){
+ r = phy->anar & anlpar;
+ if(r & AnaTXFD){
+ phy->speed = 100;
+ phy->fd = 1;
+ }
+ else if(r & AnaTXHD)
+ phy->speed = 100;
+ else if(r & Ana10FD){
+ phy->speed = 10;
+ phy->fd = 1;
+ }
+ else if(r & Ana10HD)
+ phy->speed = 10;
+ }
+ if(phy->speed == 0) {
+ // print("miistatus: phy speed 0\n");
+ return -1;
+ }
+
+ if(phy->fd){
+ p = phy->fc;
+ r = anlpar & (AnaAP|AnaP);
+ if(p == AnaAP && r == (AnaAP|AnaP))
+ phy->tfc = 1;
+ else if(p == (AnaAP|AnaP) && r == AnaAP)
+ phy->rfc = 1;
+ else if((p & AnaP) && (r & AnaP))
+ phy->rfc = phy->tfc = 1;
+ }
+
+ phy->link = 1;
+
+ return 0;
+}
diff --git a/sys/src/9/teg2/ethermii.h b/sys/src/9/teg2/ethermii.h
new file mode 100644
index 000000000..02a45ee5f
--- /dev/null
+++ b/sys/src/9/teg2/ethermii.h
@@ -0,0 +1,116 @@
+typedef struct Mii Mii;
+typedef struct MiiPhy MiiPhy;
+
+enum { /* registers */
+ Bmcr = 0x00, /* Basic Mode Control */
+ Bmsr = 0x01, /* Basic Mode Status */
+ Phyidr1 = 0x02, /* PHY Identifier #1 */
+ Phyidr2 = 0x03, /* PHY Identifier #2 */
+ Anar = 0x04, /* Auto-Negotiation Advertisement */
+ Anlpar = 0x05, /* AN Link Partner Ability */
+ Aner = 0x06, /* AN Expansion */
+ Annptr = 0x07, /* AN Next Page TX */
+ Annprr = 0x08, /* AN Next Page RX */
+ Mscr = 0x09, /* MASTER-SLAVE Control */
+ Mssr = 0x0A, /* MASTER-SLAVE Status */
+ Esr = 0x0F, /* Extended Status */
+
+ NMiiPhyr = 32,
+ NMiiPhy = 32,
+};
+
+enum { /* Bmcr */
+ BmcrSs1 = 0x0040, /* Speed Select[1] */
+ BmcrCte = 0x0080, /* Collision Test Enable */
+ BmcrDm = 0x0100, /* Duplex Mode */
+ BmcrRan = 0x0200, /* Restart Auto-Negotiation */
+ BmcrI = 0x0400, /* Isolate */
+ BmcrPd = 0x0800, /* Power Down */
+ BmcrAne = 0x1000, /* Auto-Negotiation Enable */
+ BmcrSs0 = 0x2000, /* Speed Select[0] */
+ BmcrLe = 0x4000, /* Loopback Enable */
+ BmcrR = 0x8000, /* Reset */
+};
+
+enum { /* Bmsr */
+ BmsrEc = 0x0001, /* Extended Capability */
+ BmsrJd = 0x0002, /* Jabber Detect */
+ BmsrLs = 0x0004, /* Link Status */
+ BmsrAna = 0x0008, /* Auto-Negotiation Ability */
+ BmsrRf = 0x0010, /* Remote Fault */
+ BmsrAnc = 0x0020, /* Auto-Negotiation Complete */
+ BmsrPs = 0x0040, /* Preamble Suppression Capable */
+ BmsrEs = 0x0100, /* Extended Status */
+ Bmsr100T2HD = 0x0200, /* 100BASE-T2 HD Capable */
+ Bmsr100T2FD = 0x0400, /* 100BASE-T2 FD Capable */
+ Bmsr10THD = 0x0800, /* 10BASE-T HD Capable */
+ Bmsr10TFD = 0x1000, /* 10BASE-T FD Capable */
+ Bmsr100TXHD = 0x2000, /* 100BASE-TX HD Capable */
+ Bmsr100TXFD = 0x4000, /* 100BASE-TX FD Capable */
+ Bmsr100T4 = 0x8000, /* 100BASE-T4 Capable */
+};
+
+enum { /* Anar/Anlpar */
+ Ana10HD = 0x0020, /* Advertise 10BASE-T */
+ Ana10FD = 0x0040, /* Advertise 10BASE-T FD */
+ AnaTXHD = 0x0080, /* Advertise 100BASE-TX */
+ AnaTXFD = 0x0100, /* Advertise 100BASE-TX FD */
+ AnaT4 = 0x0200, /* Advertise 100BASE-T4 */
+ AnaP = 0x0400, /* Pause */
+ AnaAP = 0x0800, /* Asymmetrical Pause */
+ AnaRf = 0x2000, /* Remote Fault */
+ AnaAck = 0x4000, /* Acknowledge */
+ AnaNp = 0x8000, /* Next Page Indication */
+};
+
+enum { /* Mscr */
+ Mscr1000THD = 0x0100, /* Advertise 1000BASE-T HD */
+ Mscr1000TFD = 0x0200, /* Advertise 1000BASE-T FD */
+};
+
+enum { /* Mssr */
+ Mssr1000THD = 0x0400, /* Link Partner 1000BASE-T HD able */
+ Mssr1000TFD = 0x0800, /* Link Partner 1000BASE-T FD able */
+};
+
+enum { /* Esr */
+ Esr1000THD = 0x1000, /* 1000BASE-T HD Capable */
+ Esr1000TFD = 0x2000, /* 1000BASE-T FD Capable */
+ Esr1000XHD = 0x4000, /* 1000BASE-X HD Capable */
+ Esr1000XFD = 0x8000, /* 1000BASE-X FD Capable */
+};
+
+typedef struct Mii {
+ Lock;
+ int nphy;
+ int mask;
+ MiiPhy* phy[NMiiPhy];
+ MiiPhy* curphy;
+
+ void* ctlr;
+ int (*mir)(Mii*, int, int);
+ int (*miw)(Mii*, int, int, int);
+} Mii;
+
+typedef struct MiiPhy {
+ Mii* mii;
+ int oui;
+ int phyno;
+
+ int anar;
+ int fc;
+ int mscr;
+
+ int link;
+ int speed;
+ int fd;
+ int rfc;
+ int tfc;
+};
+
+extern int mii(Mii*, int);
+extern int miiane(Mii*, int, int, int);
+extern int miimir(Mii*, int);
+extern int miimiw(Mii*, int, int);
+extern int miireset(Mii*);
+extern int miistatus(Mii*);
diff --git a/sys/src/9/teg2/fns.h b/sys/src/9/teg2/fns.h
new file mode 100644
index 000000000..1e2e55fe5
--- /dev/null
+++ b/sys/src/9/teg2/fns.h
@@ -0,0 +1,231 @@
+#define checkmmu(a, b)
+#define countpagerefs(a, b)
+
+#include "../port/portfns.h"
+
+typedef struct Ether Ether;
+struct Ether;
+
+extern int led(int, int);
+extern void ledexit(int);
+extern void delay(int);
+extern void _uartputs(char*, int);
+extern int _uartprint(char*, ...);
+
+#pragma varargck argpos _uartprint 1
+
+extern long ainc(long *);
+extern long adec(long *);
+extern void allcacheinfo(Memcache *);
+extern void allcacheson(void);
+extern int archether(unsigned, Ether *);
+extern void archreboot(void);
+extern void archreset(void);
+extern void cachedinv(void);
+extern void cachedinvse(void*, int);
+extern void cachedwb(void);
+extern void cachedwbinv(void);
+extern void cachedwbinvse(void*, int);
+extern void cachedwbse(void*, int);
+extern void cacheiinv(void);
+extern void cacheuwbinv(void);
+extern uintptr cankaddr(uintptr pa);
+extern void chkmissing(void);
+extern void clockprod(Ureg *);
+extern void clockshutdown(void);
+extern int clz(ulong);
+extern int cmpswap(long*, long, long);
+extern void coherence(void);
+extern void configscreengpio(void);
+extern u32int controlget(void);
+extern void cortexa9cachecfg(void);
+extern u32int cpctget(void);
+extern u32int cpidget(void);
+extern ulong cprd(int cp, int op1, int crn, int crm, int op2);
+extern ulong cprdsc(int op1, int crn, int crm, int op2);
+extern void cpuidprint(void);
+extern char *cputype2name(char *buf, int size);
+extern void cpwr(int cp, int op1, int crn, int crm, int op2, ulong val);
+extern void cpwrsc(int op1, int crn, int crm, int op2, ulong val);
+#define cycles(vlp) *(vlp) = (ulong)lcycles()
+extern u32int dacget(void);
+extern void dacput(u32int);
+extern void dmainit(void);
+extern int dmastart(void *, int, void *, int, uint, Rendez *, int *);
+extern void dmatest(void);
+extern void dump(void *vaddr, int words);
+extern u32int farget(void);
+extern void fpclear(void);
+extern void fpoff(void);
+extern void fpon(void);
+extern ulong fprd(int fpreg);
+extern void fprestreg(int fpreg, uvlong val);
+extern void fpsave(FPsave *);
+extern ulong fpsavereg(int fpreg, uvlong *fpp);
+extern void fpwr(int fpreg, ulong val);
+extern u32int fsrget(void);
+extern ulong getauxctl(void);
+extern ulong getclvlid(void);
+extern ulong getcyc(void);
+extern int getncpus(void);
+extern u32int getpsr(void);
+extern u32int getscr(void);
+extern ulong getwayssets(void);
+extern void intcmask(uint);
+extern void intcunmask(uint);
+extern void intrcpu(int);
+extern void intrcpushutdown(void);
+extern void intrshutdown(void);
+extern void intrsoff(void);
+extern int isaconfig(char*, int, ISAConf*);
+extern int isdmadone(int);
+extern int ispow2(uvlong);
+extern void l1diag(void);
+extern void l2pl310init(void);
+extern int log2(ulong);
+extern void machoff(uint cpu);
+extern void machon(uint cpu);
+extern void memdiag(ulong *);
+extern void mmuidmap(uintptr phys, int mbs);
+extern void mmuinvalidate(void); /* 'mmu' or 'tlb'? */
+extern void mmuinvalidateaddr(u32int); /* 'mmu' or 'tlb'? */
+extern void mousectl(Cmdbuf *cb);
+extern ulong pcibarsize(Pcidev*, int);
+extern void pcibussize(Pcidev*, ulong*, ulong*);
+extern int pcicfgr8(Pcidev*, int);
+extern int pcicfgr16(Pcidev*, int);
+extern int pcicfgr32(Pcidev*, int);
+extern void pcicfgw8(Pcidev*, int, int);
+extern void pcicfgw16(Pcidev*, int, int);
+extern void pcicfgw32(Pcidev*, int, int);
+extern void pciclrbme(Pcidev*);
+extern void pciclrioe(Pcidev*);
+extern void pciclrmwi(Pcidev*);
+extern void pcieintrdone(void);
+extern int pcigetpms(Pcidev*);
+extern void pcihinv(Pcidev*);
+extern uchar pciipin(Pcidev*, uchar);
+extern Pcidev* pcimatch(Pcidev*, int, int);
+extern Pcidev* pcimatchtbdf(int);
+extern void pcireset(void);
+extern void pcisetbme(Pcidev*);
+extern void pcisetioe(Pcidev*);
+extern void pcisetmwi(Pcidev*);
+extern int pcisetpms(Pcidev*, int);
+extern u32int pidget(void);
+extern void pidput(u32int);
+extern void prcachecfg(void);
+extern vlong probeaddr(uintptr);
+extern void procrestore(Proc *);
+extern void procsave(Proc*);
+extern void procfork(Proc*);
+extern void procsetup(Proc*);
+extern void putauxctl(ulong);
+extern void _reset(void);
+extern void screenclockson(void);
+extern void screeninit(void);
+extern void serialputc(int c);
+extern void serialputs(char* s, int n);
+extern void setcachelvl(int);
+extern void setsp(uintptr);
+extern void setr13(int, u32int*);
+extern ulong smpon(void);
+extern int startcpu(uint);
+extern void stopcpu(uint);
+extern int tas(void *);
+extern void tegclock0init(void);
+extern void tegclockinit(void);
+extern void tegclockintr(void);
+extern void tegclockshutdown(void);
+extern void tegwdogintr(Ureg *, void *);
+extern u32int ttbget(void);
+extern void ttbput(u32int);
+extern void _vrst(void);
+extern void wakewfi(void);
+extern void watchdoginit(void);
+extern void wfi(void);
+
+extern int irqenable(uint, void (*)(Ureg*, void*), void*, char*);
+extern int irqdisable(uint, void (*)(Ureg*, void*), void*, char*);
+#define intrenable(i, f, a, b, n) irqenable((i), (f), (a), (n))
+#define intrdisable(i, f, a, b, n) irqdisable((i), (f), (a), (n))
+extern void vectors(void);
+extern void vtable(void);
+
+/*
+ * Things called in main.
+ */
+extern void archconfinit(void);
+extern void clockinit(void);
+extern int i8250console(void);
+extern void links(void);
+extern void mmuinit(void);
+extern void touser(uintptr);
+extern void trapinit(void);
+
+
+extern int fpiarm(Ureg*);
+extern int fpudevprocio(Proc*, void*, long, uintptr, int);
+extern void fpuinit(void);
+extern void fpunoted(void);
+extern void fpunotify(Ureg*);
+extern void fpuprocrestore(Proc*);
+extern void fpuprocsave(Proc*);
+extern void fpusysprocsetup(Proc*);
+extern void fpusysrfork(Ureg*);
+extern void fpusysrforkchild(Proc*, Ureg*, Proc*);
+extern int fpuemu(Ureg*);
+
+/*
+ * Miscellaneous machine dependent stuff.
+ */
+extern int cas(int *, int, int);
+extern char* getenv(char*, char*, int);
+char* getconf(char*);
+uintptr mmukmap(uintptr, uintptr, usize);
+uintptr mmukunmap(uintptr, uintptr, usize);
+extern void* mmuuncache(void*, usize);
+extern void* ucalloc(usize);
+extern Block* ucallocb(int);
+extern void* ucallocalign(usize size, int align, int span);
+extern void ucfree(void*);
+extern void ucfreeb(Block*);
+
+/*
+ * Things called from port.
+ */
+extern void delay(int); /* only scheddump() */
+extern int islo(void);
+extern void microdelay(int); /* only edf.c */
+extern void evenaddr(uintptr);
+extern void idlehands(void);
+extern void setkernur(Ureg*, Proc*); /* only devproc.c */
+extern void* sysexecregs(uintptr, ulong, int);
+extern void sysprocsetup(Proc*);
+
+/* libc */
+long labs(long);
+
+/*
+ * PCI stuff.
+ */
+
+extern void forkret(void);
+extern int userureg(Ureg*);
+void* vmap(uintptr, usize);
+void vunmap(void*, usize);
+
+extern void kexit(Ureg*);
+
+#define getpgcolor(a) 0
+#define kmapinval()
+
+#define PTR2UINT(p) ((uintptr)(p))
+#define UINT2PTR(i) ((void*)(i))
+
+#define waserror() (up->nerrlab++, setlabel(&up->errlab[up->nerrlab-1]))
+
+#define KADDR(pa) UINT2PTR(KZERO | ((uintptr)(pa) & ~KSEGM))
+#define PADDR(va) PTR2UINT(PHYSDRAM | ((uintptr)(va) & ~KSEGM))
+
+#define MASK(v) ((1UL << (v)) - 1) /* mask `v' bits wide */
diff --git a/sys/src/9/teg2/fpi.c b/sys/src/9/teg2/fpi.c
new file mode 100644
index 000000000..f341f2e4a
--- /dev/null
+++ b/sys/src/9/teg2/fpi.c
@@ -0,0 +1,300 @@
+/*
+ * Floating Point Interpreter.
+ * shamelessly stolen from an original by ark.
+ */
+#include "fpi.h"
+
+void
+fpiround(Internal *i)
+{
+ unsigned long guard;
+
+ guard = i->l & GuardMask;
+ i->l &= ~GuardMask;
+ if(guard > (LsBit>>1) || (guard == (LsBit>>1) && (i->l & LsBit))){
+ i->l += LsBit;
+ if(i->l & CarryBit){
+ i->l &= ~CarryBit;
+ i->h++;
+ if(i->h & CarryBit){
+ if (i->h & 0x01)
+ i->l |= CarryBit;
+ i->l >>= 1;
+ i->h >>= 1;
+ i->e++;
+ }
+ }
+ }
+}
+
+static void
+matchexponents(Internal *x, Internal *y)
+{
+ int count;
+
+ count = y->e - x->e;
+ x->e = y->e;
+ if(count >= 2*FractBits){
+ x->l = x->l || x->h;
+ x->h = 0;
+ return;
+ }
+ if(count >= FractBits){
+ count -= FractBits;
+ x->l = x->h|(x->l != 0);
+ x->h = 0;
+ }
+ while(count > 0){
+ count--;
+ if(x->h & 0x01)
+ x->l |= CarryBit;
+ if(x->l & 0x01)
+ x->l |= 2;
+ x->l >>= 1;
+ x->h >>= 1;
+ }
+}
+
+static void
+shift(Internal *i)
+{
+ i->e--;
+ i->h <<= 1;
+ i->l <<= 1;
+ if(i->l & CarryBit){
+ i->l &= ~CarryBit;
+ i->h |= 0x01;
+ }
+}
+
+static void
+normalise(Internal *i)
+{
+ while((i->h & HiddenBit) == 0)
+ shift(i);
+}
+
+static void
+renormalise(Internal *i)
+{
+ if(i->e < -2 * FractBits)
+ i->e = -2 * FractBits;
+ while(i->e < 1){
+ i->e++;
+ if(i->h & 0x01)
+ i->l |= CarryBit;
+ i->h >>= 1;
+ i->l = (i->l>>1)|(i->l & 0x01);
+ }
+ if(i->e >= ExpInfinity)
+ SetInfinity(i);
+}
+
+void
+fpinormalise(Internal *x)
+{
+ if(!IsWeird(x) && !IsZero(x))
+ normalise(x);
+}
+
+void
+fpiadd(Internal *x, Internal *y, Internal *i)
+{
+ Internal *t;
+
+ i->s = x->s;
+ if(IsWeird(x) || IsWeird(y)){
+ if(IsNaN(x) || IsNaN(y))
+ SetQNaN(i);
+ else
+ SetInfinity(i);
+ return;
+ }
+ if(x->e > y->e){
+ t = x;
+ x = y;
+ y = t;
+ }
+ matchexponents(x, y);
+ i->e = x->e;
+ i->h = x->h + y->h;
+ i->l = x->l + y->l;
+ if(i->l & CarryBit){
+ i->h++;
+ i->l &= ~CarryBit;
+ }
+ if(i->h & (HiddenBit<<1)){
+ if(i->h & 0x01)
+ i->l |= CarryBit;
+ i->l = (i->l>>1)|(i->l & 0x01);
+ i->h >>= 1;
+ i->e++;
+ }
+ if(IsWeird(i))
+ SetInfinity(i);
+}
+
+void
+fpisub(Internal *x, Internal *y, Internal *i)
+{
+ Internal *t;
+
+ if(y->e < x->e
+ || (y->e == x->e && (y->h < x->h || (y->h == x->h && y->l < x->l)))){
+ t = x;
+ x = y;
+ y = t;
+ }
+ i->s = y->s;
+ if(IsNaN(y)){
+ SetQNaN(i);
+ return;
+ }
+ if(IsInfinity(y)){
+ if(IsInfinity(x))
+ SetQNaN(i);
+ else
+ SetInfinity(i);
+ return;
+ }
+ matchexponents(x, y);
+ i->e = y->e;
+ i->h = y->h - x->h;
+ i->l = y->l - x->l;
+ if(i->l < 0){
+ i->l += CarryBit;
+ i->h--;
+ }
+ if(i->h == 0 && i->l == 0)
+ SetZero(i);
+ else while(i->e > 1 && (i->h & HiddenBit) == 0)
+ shift(i);
+}
+
+#define CHUNK (FractBits/2)
+#define CMASK ((1<<CHUNK)-1)
+#define HI(x) ((short)((x)>>CHUNK) & CMASK)
+#define LO(x) ((short)(x) & CMASK)
+#define SPILL(x) ((x)>>CHUNK)
+#define M(x, y) ((long)a[x]*(long)b[y])
+#define C(h, l) (((long)((h) & CMASK)<<CHUNK)|((l) & CMASK))
+
+void
+fpimul(Internal *x, Internal *y, Internal *i)
+{
+ long a[4], b[4], c[7], f[4];
+
+ i->s = x->s^y->s;
+ if(IsWeird(x) || IsWeird(y)){
+ if(IsNaN(x) || IsNaN(y) || IsZero(x) || IsZero(y))
+ SetQNaN(i);
+ else
+ SetInfinity(i);
+ return;
+ }
+ else if(IsZero(x) || IsZero(y)){
+ SetZero(i);
+ return;
+ }
+ normalise(x);
+ normalise(y);
+ i->e = x->e + y->e - (ExpBias - 1);
+
+ a[0] = HI(x->h); b[0] = HI(y->h);
+ a[1] = LO(x->h); b[1] = LO(y->h);
+ a[2] = HI(x->l); b[2] = HI(y->l);
+ a[3] = LO(x->l); b[3] = LO(y->l);
+
+ c[6] = M(3, 3);
+ c[5] = M(2, 3) + M(3, 2) + SPILL(c[6]);
+ c[4] = M(1, 3) + M(2, 2) + M(3, 1) + SPILL(c[5]);
+ c[3] = M(0, 3) + M(1, 2) + M(2, 1) + M(3, 0) + SPILL(c[4]);
+ c[2] = M(0, 2) + M(1, 1) + M(2, 0) + SPILL(c[3]);
+ c[1] = M(0, 1) + M(1, 0) + SPILL(c[2]);
+ c[0] = M(0, 0) + SPILL(c[1]);
+
+ f[0] = c[0];
+ f[1] = C(c[1], c[2]);
+ f[2] = C(c[3], c[4]);
+ f[3] = C(c[5], c[6]);
+
+ if((f[0] & HiddenBit) == 0){
+ f[0] <<= 1;
+ f[1] <<= 1;
+ f[2] <<= 1;
+ f[3] <<= 1;
+ if(f[1] & CarryBit){
+ f[0] |= 1;
+ f[1] &= ~CarryBit;
+ }
+ if(f[2] & CarryBit){
+ f[1] |= 1;
+ f[2] &= ~CarryBit;
+ }
+ if(f[3] & CarryBit){
+ f[2] |= 1;
+ f[3] &= ~CarryBit;
+ }
+ i->e--;
+ }
+ i->h = f[0];
+ i->l = f[1];
+ if(f[2] || f[3])
+ i->l |= 1;
+ renormalise(i);
+}
+
+void
+fpidiv(Internal *x, Internal *y, Internal *i)
+{
+ i->s = x->s^y->s;
+ if(IsNaN(x) || IsNaN(y)
+ || (IsInfinity(x) && IsInfinity(y)) || (IsZero(x) && IsZero(y))){
+ SetQNaN(i);
+ return;
+ }
+ else if(IsZero(x) || IsInfinity(y)){
+ SetInfinity(i);
+ return;
+ }
+ else if(IsInfinity(x) || IsZero(y)){
+ SetZero(i);
+ return;
+ }
+ normalise(x);
+ normalise(y);
+ i->h = 0;
+ i->l = 0;
+ i->e = y->e - x->e + (ExpBias + 2*FractBits - 1);
+ do{
+ if(y->h > x->h || (y->h == x->h && y->l >= x->l)){
+ i->l |= 0x01;
+ y->h -= x->h;
+ y->l -= x->l;
+ if(y->l < 0){
+ y->l += CarryBit;
+ y->h--;
+ }
+ }
+ shift(y);
+ shift(i);
+ }while ((i->h & HiddenBit) == 0);
+ if(y->h || y->l)
+ i->l |= 0x01;
+ renormalise(i);
+}
+
+int
+fpicmp(Internal *x, Internal *y)
+{
+ if(IsNaN(x) && IsNaN(y))
+ return 0;
+ if(IsInfinity(x) && IsInfinity(y))
+ return y->s - x->s;
+ if(x->e == y->e && x->h == y->h && x->l == y->l)
+ return y->s - x->s;
+ if(x->e < y->e
+ || (x->e == y->e && (x->h < y->h || (x->h == y->h && x->l < y->l))))
+ return y->s ? 1: -1;
+ return x->s ? -1: 1;
+}
diff --git a/sys/src/9/teg2/fpi.h b/sys/src/9/teg2/fpi.h
new file mode 100644
index 000000000..abaa7c120
--- /dev/null
+++ b/sys/src/9/teg2/fpi.h
@@ -0,0 +1,61 @@
+typedef long Word;
+typedef unsigned long Single;
+typedef struct {
+ unsigned long l;
+ unsigned long h;
+} Double;
+
+enum {
+ FractBits = 28,
+ CarryBit = 0x10000000,
+ HiddenBit = 0x08000000,
+ MsBit = HiddenBit,
+ NGuardBits = 3,
+ GuardMask = 0x07,
+ LsBit = (1<<NGuardBits),
+
+ SingleExpBias = 127,
+ SingleExpMax = 255,
+ DoubleExpBias = 1023,
+ DoubleExpMax = 2047,
+
+ ExpBias = DoubleExpBias,
+ ExpInfinity = DoubleExpMax,
+};
+
+typedef struct {
+ unsigned char s;
+ short e;
+ long l; /* 0000FFFFFFFFFFFFFFFFFFFFFFFFFGGG */
+ long h; /* 0000HFFFFFFFFFFFFFFFFFFFFFFFFFFF */
+} Internal;
+
+#define IsWeird(n) ((n)->e >= ExpInfinity)
+#define IsInfinity(n) (IsWeird(n) && (n)->h == HiddenBit && (n)->l == 0)
+#define SetInfinity(n) ((n)->e = ExpInfinity, (n)->h = HiddenBit, (n)->l = 0)
+#define IsNaN(n) (IsWeird(n) && (((n)->h & ~HiddenBit) || (n)->l))
+#define SetQNaN(n) ((n)->s = 0, (n)->e = ExpInfinity, \
+ (n)->h = HiddenBit|(LsBit<<1), (n)->l = 0)
+#define IsZero(n) ((n)->e == 1 && (n)->h == 0 && (n)->l == 0)
+#define SetZero(n) ((n)->e = 1, (n)->h = 0, (n)->l = 0)
+
+/*
+ * fpi.c
+ */
+extern void fpiround(Internal *);
+extern void fpiadd(Internal *, Internal *, Internal *);
+extern void fpisub(Internal *, Internal *, Internal *);
+extern void fpimul(Internal *, Internal *, Internal *);
+extern void fpidiv(Internal *, Internal *, Internal *);
+extern int fpicmp(Internal *, Internal *);
+extern void fpinormalise(Internal*);
+
+/*
+ * fpimem.c
+ */
+extern void fpis2i(Internal *, void *);
+extern void fpid2i(Internal *, void *);
+extern void fpiw2i(Internal *, void *);
+extern void fpii2s(void *, Internal *);
+extern void fpii2d(void *, Internal *);
+extern void fpii2w(Word *, Internal *);
diff --git a/sys/src/9/teg2/fpiarm.c b/sys/src/9/teg2/fpiarm.c
new file mode 100644
index 000000000..571e89fa6
--- /dev/null
+++ b/sys/src/9/teg2/fpiarm.c
@@ -0,0 +1,506 @@
+/*
+ * this doesn't attempt to implement ARM floating-point properties
+ * that aren't visible in the Inferno environment.
+ * all arithmetic is done in double precision.
+ * the FP trap status isn't updated.
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+
+#include "ureg.h"
+
+#include "arm.h"
+#include "fpi.h"
+
+#define ARM7500 /* emulate old pre-VFP opcodes */
+
+/* undef this if correct kernel r13 isn't in Ureg;
+ * check calculation in fpiarm below
+ */
+
+#define REG(ur, x) (*(long*)(((char*)(ur))+roff[(x)]))
+#ifdef ARM7500
+#define FR(ufp, x) (*(Internal*)(ufp)->regs[(x)&7])
+#else
+#define FR(ufp, x) (*(Internal*)(ufp)->regs[(x)&(Nfpregs - 1)])
+#endif
+
+typedef struct FP2 FP2;
+typedef struct FP1 FP1;
+
+struct FP2 {
+ char* name;
+ void (*f)(Internal, Internal, Internal*);
+};
+
+struct FP1 {
+ char* name;
+ void (*f)(Internal*, Internal*);
+};
+
+enum {
+ N = 1<<31,
+ Z = 1<<30,
+ C = 1<<29,
+ V = 1<<28,
+ REGPC = 15,
+};
+
+enum {
+ fpemudebug = 0,
+};
+
+#undef OFR
+#define OFR(X) ((ulong)&((Ureg*)0)->X)
+
+static int roff[] = {
+ OFR(r0), OFR(r1), OFR(r2), OFR(r3),
+ OFR(r4), OFR(r5), OFR(r6), OFR(r7),
+ OFR(r8), OFR(r9), OFR(r10), OFR(r11),
+ OFR(r12), OFR(r13), OFR(r14), OFR(pc),
+};
+
+static Internal fpconst[8] = { /* indexed by op&7 (ARM 7500 FPA) */
+ /* s, e, l, h */
+ {0, 0x1, 0x00000000, 0x00000000}, /* 0.0 */
+ {0, 0x3FF, 0x00000000, 0x08000000}, /* 1.0 */
+ {0, 0x400, 0x00000000, 0x08000000}, /* 2.0 */
+ {0, 0x400, 0x00000000, 0x0C000000}, /* 3.0 */
+ {0, 0x401, 0x00000000, 0x08000000}, /* 4.0 */
+ {0, 0x401, 0x00000000, 0x0A000000}, /* 5.0 */
+ {0, 0x3FE, 0x00000000, 0x08000000}, /* 0.5 */
+ {0, 0x402, 0x00000000, 0x0A000000}, /* 10.0 */
+};
+
+/*
+ * arm binary operations
+ */
+
+static void
+fadd(Internal m, Internal n, Internal *d)
+{
+ (m.s == n.s? fpiadd: fpisub)(&m, &n, d);
+}
+
+static void
+fsub(Internal m, Internal n, Internal *d)
+{
+ m.s ^= 1;
+ (m.s == n.s? fpiadd: fpisub)(&m, &n, d);
+}
+
+static void
+fsubr(Internal m, Internal n, Internal *d)
+{
+ n.s ^= 1;
+ (n.s == m.s? fpiadd: fpisub)(&n, &m, d);
+}
+
+static void
+fmul(Internal m, Internal n, Internal *d)
+{
+ fpimul(&m, &n, d);
+}
+
+static void
+fdiv(Internal m, Internal n, Internal *d)
+{
+ fpidiv(&m, &n, d);
+}
+
+static void
+fdivr(Internal m, Internal n, Internal *d)
+{
+ fpidiv(&n, &m, d);
+}
+
+/*
+ * arm unary operations
+ */
+
+static void
+fmov(Internal *m, Internal *d)
+{
+ *d = *m;
+}
+
+static void
+fmovn(Internal *m, Internal *d)
+{
+ *d = *m;
+ d->s ^= 1;
+}
+
+static void
+fabsf(Internal *m, Internal *d)
+{
+ *d = *m;
+ d->s = 0;
+}
+
+static void
+frnd(Internal *m, Internal *d)
+{
+ short e;
+
+ (m->s? fsub: fadd)(fpconst[6], *m, d);
+ if(IsWeird(d))
+ return;
+ fpiround(d);
+ e = (d->e - ExpBias) + 1;
+ if(e <= 0)
+ SetZero(d);
+ else if(e > FractBits){
+ if(e < 2*FractBits)
+ d->l &= ~((1<<(2*FractBits - e))-1);
+ }else{
+ d->l = 0;
+ if(e < FractBits)
+ d->h &= ~((1<<(FractBits-e))-1);
+ }
+}
+
+/*
+ * ARM 7500 FPA opcodes
+ */
+
+static FP1 optab1[16] = { /* Fd := OP Fm */
+[0] {"MOVF", fmov},
+[1] {"NEGF", fmovn},
+[2] {"ABSF", fabsf},
+[3] {"RNDF", frnd},
+[4] {"SQTF", /*fsqt*/0},
+/* LOG, LGN, EXP, SIN, COS, TAN, ASN, ACS, ATN all `deprecated' */
+/* URD and NRM aren't implemented */
+};
+
+static FP2 optab2[16] = { /* Fd := Fn OP Fm */
+[0] {"ADDF", fadd},
+[1] {"MULF", fmul},
+[2] {"SUBF", fsub},
+[3] {"RSUBF", fsubr},
+[4] {"DIVF", fdiv},
+[5] {"RDIVF", fdivr},
+/* POW, RPW deprecated */
+[8] {"REMF", /*frem*/0},
+[9] {"FMF", fmul}, /* fast multiply */
+[10] {"FDV", fdiv}, /* fast divide */
+[11] {"FRD", fdivr}, /* fast reverse divide */
+/* POL deprecated */
+};
+
+static ulong
+fcmp(Internal *n, Internal *m)
+{
+ int i;
+ Internal rm, rn;
+
+ if(IsWeird(m) || IsWeird(n)){
+ /* BUG: should trap if not masked */
+ return V|C;
+ }
+ rn = *n;
+ rm = *m;
+ fpiround(&rn);
+ fpiround(&rm);
+ i = fpicmp(&rn, &rm);
+ if(i > 0)
+ return C;
+ else if(i == 0)
+ return C|Z;
+ else
+ return N;
+}
+
+static void
+fld(void (*f)(Internal*, void*), int d, ulong ea, int n, FPsave *ufp)
+{
+ void *mem;
+
+ mem = (void*)ea;
+ (*f)(&FR(ufp, d), mem);
+ if(fpemudebug)
+ print("MOV%c #%lux, F%d\n", n==8? 'D': 'F', ea, d);
+}
+
+static void
+fst(void (*f)(void*, Internal*), ulong ea, int s, int n, FPsave *ufp)
+{
+ Internal tmp;
+ void *mem;
+
+ mem = (void*)ea;
+ tmp = FR(ufp, s);
+ if(fpemudebug)
+ print("MOV%c F%d,#%lux\n", n==8? 'D': 'F', s, ea);
+ (*f)(mem, &tmp);
+}
+
+static int
+condok(int cc, int c)
+{
+ switch(c){
+ case 0: /* Z set */
+ return cc&Z;
+ case 1: /* Z clear */
+ return (cc&Z) == 0;
+ case 2: /* C set */
+ return cc&C;
+ case 3: /* C clear */
+ return (cc&C) == 0;
+ case 4: /* N set */
+ return cc&N;
+ case 5: /* N clear */
+ return (cc&N) == 0;
+ case 6: /* V set */
+ return cc&V;
+ case 7: /* V clear */
+ return (cc&V) == 0;
+ case 8: /* C set and Z clear */
+ return cc&C && (cc&Z) == 0;
+ case 9: /* C clear or Z set */
+ return (cc&C) == 0 || cc&Z;
+ case 10: /* N set and V set, or N clear and V clear */
+ return (~cc&(N|V))==0 || (cc&(N|V)) == 0;
+ case 11: /* N set and V clear, or N clear and V set */
+ return (cc&(N|V))==N || (cc&(N|V))==V;
+ case 12: /* Z clear, and either N set and V set or N clear and V clear */
+ return (cc&Z) == 0 && ((~cc&(N|V))==0 || (cc&(N|V))==0);
+ case 13: /* Z set, or N set and V clear or N clear and V set */
+ return (cc&Z) || (cc&(N|V))==N || (cc&(N|V))==V;
+ case 14: /* always */
+ return 1;
+ case 15: /* never (reserved) */
+ return 0;
+ }
+ return 0; /* not reached */
+}
+
+static void
+unimp(ulong pc, ulong op)
+{
+ char buf[60];
+
+ snprint(buf, sizeof(buf), "sys: fp: pc=%lux unimp fp 0x%.8lux", pc, op);
+ if(fpemudebug)
+ print("FPE: %s\n", buf);
+ error(buf);
+ /* no return */
+}
+
+static void
+fpemu(ulong pc, ulong op, Ureg *ur, FPsave *ufp)
+{
+ int rn, rd, tag, o;
+ long off;
+ ulong ea;
+ Internal tmp, *fm, *fn;
+
+ /* note: would update fault status here if we noted numeric exceptions */
+
+ /*
+ * LDF, STF; 10.1.1
+ */
+ if(((op>>25)&7) == 6){
+ if(op & (1<<22))
+ unimp(pc, op); /* packed or extended */
+ rn = (op>>16)&0xF;
+ off = (op&0xFF)<<2;
+ if((op & (1<<23)) == 0)
+ off = -off;
+ ea = REG(ur, rn);
+ if(rn == REGPC)
+ ea += 8;
+ if(op & (1<<24))
+ ea += off;
+ rd = (op>>12)&7;
+ if(op & (1<<20)){
+ if(op & (1<<15))
+ fld(fpid2i, rd, ea, 8, ufp);
+ else
+ fld(fpis2i, rd, ea, 4, ufp);
+ }else{
+ if(op & (1<<15))
+ fst(fpii2d, ea, rd, 8, ufp);
+ else
+ fst(fpii2s, ea, rd, 4, ufp);
+ }
+ if((op & (1<<24)) == 0)
+ ea += off;
+ if(op & (1<<21))
+ REG(ur, rn) = ea;
+ return;
+ }
+
+ /*
+ * CPRT/transfer, 10.3
+ */
+ if(op & (1<<4)){
+ rd = (op>>12) & 0xF;
+
+ /*
+ * compare, 10.3.1
+ */
+ if(rd == 15 && op & (1<<20)){
+ rn = (op>>16)&7;
+ fn = &FR(ufp, rn);
+ if(op & (1<<3)){
+ fm = &fpconst[op&7];
+ if(fpemudebug)
+ tag = 'C';
+ }else{
+ fm = &FR(ufp, op&7);
+ if(fpemudebug)
+ tag = 'F';
+ }
+ switch((op>>21)&7){
+ default:
+ unimp(pc, op);
+ case 4: /* CMF: Fn :: Fm */
+ case 6: /* CMFE: Fn :: Fm (with exception) */
+ ur->psr &= ~(N|C|Z|V);
+ ur->psr |= fcmp(fn, fm);
+ break;
+ case 5: /* CNF: Fn :: -Fm */
+ case 7: /* CNFE: Fn :: -Fm (with exception) */
+ tmp = *fm;
+ tmp.s ^= 1;
+ ur->psr &= ~(N|C|Z|V);
+ ur->psr |= fcmp(fn, &tmp);
+ break;
+ }
+ if(fpemudebug)
+ print("CMPF %c%d,F%ld =%#lux\n",
+ tag, rn, op&7, ur->psr>>28);
+ return;
+ }
+
+ /*
+ * other transfer, 10.3
+ */
+ switch((op>>20)&0xF){
+ default:
+ unimp(pc, op);
+ case 0: /* FLT */
+ rn = (op>>16) & 7;
+ fpiw2i(&FR(ufp, rn), &REG(ur, rd));
+ if(fpemudebug)
+ print("MOVW[FD] R%d, F%d\n", rd, rn);
+ break;
+ case 1: /* FIX */
+ if(op & (1<<3))
+ unimp(pc, op);
+ rn = op & 7;
+ tmp = FR(ufp, rn);
+ fpii2w(&REG(ur, rd), &tmp);
+ if(fpemudebug)
+ print("MOV[FD]W F%d, R%d =%ld\n", rn, rd, REG(ur, rd));
+ break;
+ case 2: /* FPSR := Rd */
+ ufp->status = REG(ur, rd);
+ if(fpemudebug)
+ print("MOVW R%d, FPSR\n", rd);
+ break;
+ case 3: /* Rd := FPSR */
+ REG(ur, rd) = ufp->status;
+ if(fpemudebug)
+ print("MOVW FPSR, R%d\n", rd);
+ break;
+ case 4: /* FPCR := Rd */
+ ufp->control = REG(ur, rd);
+ if(fpemudebug)
+ print("MOVW R%d, FPCR\n", rd);
+ break;
+ case 5: /* Rd := FPCR */
+ REG(ur, rd) = ufp->control;
+ if(fpemudebug)
+ print("MOVW FPCR, R%d\n", rd);
+ break;
+ }
+ return;
+ }
+
+ /*
+ * arithmetic
+ */
+
+ if(op & (1<<3)){ /* constant */
+ fm = &fpconst[op&7];
+ if(fpemudebug)
+ tag = 'C';
+ }else{
+ fm = &FR(ufp, op&7);
+ if(fpemudebug)
+ tag = 'F';
+ }
+ rd = (op>>12)&7;
+ o = (op>>20)&0xF;
+ if(op & (1<<15)){ /* monadic */
+ FP1 *fp;
+ fp = &optab1[o];
+ if(fp->f == nil)
+ unimp(pc, op);
+ if(fpemudebug)
+ print("%s %c%ld,F%d\n", fp->name, tag, op&7, rd);
+ (*fp->f)(fm, &FR(ufp, rd));
+ } else {
+ FP2 *fp;
+ fp = &optab2[o];
+ if(fp->f == nil)
+ unimp(pc, op);
+ rn = (op>>16)&7;
+ if(fpemudebug)
+ print("%s %c%ld,F%d,F%d\n", fp->name, tag, op&7, rn, rd);
+ (*fp->f)(*fm, FR(ufp, rn), &FR(ufp, rd));
+ }
+}
+
+/*
+ * returns the number of FP instructions emulated
+ */
+int
+fpiarm(Ureg *ur)
+{
+ ulong op, o, cp;
+ FPsave *ufp;
+ int n;
+
+ if(up == nil)
+ panic("fpiarm not in a process");
+ ufp = &up->fpsave;
+ /*
+ * because all the emulated fp state is in the proc structure,
+ * it need not be saved/restored
+ */
+ switch(up->fpstate){
+ case FPactive:
+ case FPinactive:
+ error("illegal instruction: emulated fpu opcode in VFP mode");
+ case FPinit:
+ assert(sizeof(Internal) <= sizeof(ufp->regs[0]));
+ up->fpstate = FPemu;
+ ufp->control = 0;
+ ufp->status = (0x01<<28)|(1<<12); /* sw emulation, alt. C flag */
+ for(n = 0; n < 8; n++)
+ FR(ufp, n) = fpconst[0];
+ }
+ for(n=0; ;n++){
+ validaddr(ur->pc, 4, 0);
+ op = *(ulong*)(ur->pc);
+ if(fpemudebug)
+ print("%#lux: %#8.8lux ", ur->pc, op);
+ o = (op>>24) & 0xF;
+ cp = (op>>8) & 0xF;
+ if(!ISFPAOP(cp, o))
+ break;
+ if(condok(ur->psr, op>>28))
+ fpemu(ur->pc, op, ur, ufp);
+ ur->pc += 4; /* pretend cpu executed the instr */
+ }
+ if(fpemudebug)
+ print("\n");
+ return n;
+}
diff --git a/sys/src/9/teg2/fpimem.c b/sys/src/9/teg2/fpimem.c
new file mode 100644
index 000000000..627ab6355
--- /dev/null
+++ b/sys/src/9/teg2/fpimem.c
@@ -0,0 +1,136 @@
+#include "fpi.h"
+
+/*
+ * the following routines depend on memory format, not the machine
+ */
+
+void
+fpis2i(Internal *i, void *v)
+{
+ Single *s = v;
+
+ i->s = (*s & 0x80000000) ? 1: 0;
+ if((*s & ~0x80000000) == 0){
+ SetZero(i);
+ return;
+ }
+ i->e = ((*s>>23) & 0x00FF) - SingleExpBias + ExpBias;
+ i->h = (*s & 0x007FFFFF)<<(1+NGuardBits);
+ i->l = 0;
+ if(i->e)
+ i->h |= HiddenBit;
+ else
+ i->e++;
+}
+
+void
+fpid2i(Internal *i, void *v)
+{
+ Double *d = v;
+
+ i->s = (d->h & 0x80000000) ? 1: 0;
+ i->e = (d->h>>20) & 0x07FF;
+ i->h = ((d->h & 0x000FFFFF)<<(4+NGuardBits))|((d->l>>25) & 0x7F);
+ i->l = (d->l & 0x01FFFFFF)<<NGuardBits;
+ if(i->e)
+ i->h |= HiddenBit;
+ else
+ i->e++;
+}
+
+void
+fpiw2i(Internal *i, void *v)
+{
+ Word w, word = *(Word*)v;
+ short e;
+
+ if(word < 0){
+ i->s = 1;
+ word = -word;
+ }
+ else
+ i->s = 0;
+ if(word == 0){
+ SetZero(i);
+ return;
+ }
+ if(word > 0){
+ for (e = 0, w = word; w; w >>= 1, e++)
+ ;
+ } else
+ e = 32;
+ if(e > FractBits){
+ i->h = word>>(e - FractBits);
+ i->l = (word & ((1<<(e - FractBits)) - 1))<<(2*FractBits - e);
+ }
+ else {
+ i->h = word<<(FractBits - e);
+ i->l = 0;
+ }
+ i->e = (e - 1) + ExpBias;
+}
+
+void
+fpii2s(void *v, Internal *i)
+{
+ short e;
+ Single *s = (Single*)v;
+
+ fpiround(i);
+ if(i->h & HiddenBit)
+ i->h &= ~HiddenBit;
+ else
+ i->e--;
+ *s = i->s ? 0x80000000: 0;
+ e = i->e;
+ if(e < ExpBias){
+ if(e <= (ExpBias - SingleExpBias))
+ return;
+ e = SingleExpBias - (ExpBias - e);
+ }
+ else if(e >= (ExpBias + (SingleExpMax-SingleExpBias))){
+ *s |= SingleExpMax<<23;
+ return;
+ }
+ else
+ e = SingleExpBias + (e - ExpBias);
+ *s |= (e<<23)|(i->h>>(1+NGuardBits));
+}
+
+void
+fpii2d(void *v, Internal *i)
+{
+ Double *d = (Double*)v;
+
+ fpiround(i);
+ if(i->h & HiddenBit)
+ i->h &= ~HiddenBit;
+ else
+ i->e--;
+ i->l = ((i->h & GuardMask)<<25)|(i->l>>NGuardBits);
+ i->h >>= NGuardBits;
+ d->h = i->s ? 0x80000000: 0;
+ d->h |= (i->e<<20)|((i->h & 0x00FFFFFF)>>4);
+ d->l = (i->h<<28)|i->l;
+}
+
+void
+fpii2w(Word *word, Internal *i)
+{
+ Word w;
+ short e;
+
+ fpiround(i);
+ e = (i->e - ExpBias) + 1;
+ if(e <= 0)
+ w = 0;
+ else if(e > 31)
+ w = 0x7FFFFFFF;
+ else if(e > FractBits)
+ w = (i->h<<(e - FractBits))|(i->l>>(2*FractBits - e));
+ else
+ w = i->h>>(FractBits-e);
+ if(i->s)
+ w = -w;
+ *word = w;
+}
diff --git a/sys/src/9/teg2/init9.s b/sys/src/9/teg2/init9.s
new file mode 100644
index 000000000..1d7f2bec3
--- /dev/null
+++ b/sys/src/9/teg2/init9.s
@@ -0,0 +1,25 @@
+/*
+ * This is the same as the C programme:
+ *
+ * void
+ * main(char* argv0)
+ * {
+ * startboot(argv0, &argv0);
+ * }
+ *
+ * It is in assembler because SB needs to be
+ * set and doing this in C drags in too many
+ * other routines.
+ */
+TEXT main(SB), 1, $8
+ MOVW $setR12(SB), R12 /* load the SB */
+ MOVW $boot(SB), R0
+
+ ADD $12, R13, R1 /* pointer to 0(FP) */
+
+ MOVW R0, 4(R13) /* pass argc, argv */
+ MOVW R1, 8(R13)
+
+ BL startboot(SB)
+_loop:
+ B _loop
diff --git a/sys/src/9/teg2/io.h b/sys/src/9/teg2/io.h
new file mode 100644
index 000000000..703e0cbe4
--- /dev/null
+++ b/sys/src/9/teg2/io.h
@@ -0,0 +1,219 @@
+#pragma varargck type "T" int
+#pragma varargck type "T" uint
+
+/*
+ * PCI
+ */
+
+enum {
+ BusCBUS = 0, /* Corollary CBUS */
+ BusCBUSII, /* Corollary CBUS II */
+ BusEISA, /* Extended ISA */
+ BusFUTURE, /* IEEE Futurebus */
+ BusINTERN, /* Internal bus */
+ BusISA, /* Industry Standard Architecture */
+ BusMBI, /* Multibus I */
+ BusMBII, /* Multibus II */
+ BusMCA, /* Micro Channel Architecture */
+ BusMPI, /* MPI */
+ BusMPSA, /* MPSA */
+ BusNUBUS, /* Apple Macintosh NuBus */
+ BusPCI, /* Peripheral Component Interconnect */
+ BusPCMCIA, /* PC Memory Card International Association */
+ BusTC, /* DEC TurboChannel */
+ BusVL, /* VESA Local bus */
+ BusVME, /* VMEbus */
+ BusXPRESS, /* Express System Bus */
+};
+
+#define MKBUS(t,b,d,f) (((t)<<24)|(((b)&0xFF)<<16)|(((d)&0x1F)<<11)|(((f)&0x07)<<8))
+#define BUSFNO(tbdf) (((tbdf)>>8)&0x07)
+#define BUSDNO(tbdf) (((tbdf)>>11)&0x1F)
+#define BUSBNO(tbdf) (((tbdf)>>16)&0xFF)
+#define BUSTYPE(tbdf) ((tbdf)>>24)
+#define BUSBDF(tbdf) ((tbdf)&0x00FFFF00)
+#define BUSUNKNOWN (-1)
+
+enum { /* type 0 & type 1 pre-defined header */
+ PciVID = 0x00, /* vendor ID */
+ PciDID = 0x02, /* device ID */
+ PciPCR = 0x04, /* command */
+ PciPSR = 0x06, /* status */
+ PciRID = 0x08, /* revision ID */
+ PciCCRp = 0x09, /* programming interface class code */
+ PciCCRu = 0x0A, /* sub-class code */
+ PciCCRb = 0x0B, /* base class code */
+ PciCLS = 0x0C, /* cache line size */
+ PciLTR = 0x0D, /* latency timer */
+ PciHDT = 0x0E, /* header type */
+ PciBST = 0x0F, /* BIST */
+
+ PciBAR0 = 0x10, /* base address */
+ PciBAR1 = 0x14,
+
+ PciINTL = 0x3C, /* interrupt line */
+ PciINTP = 0x3D, /* interrupt pin */
+};
+
+/* ccrb (base class code) values; controller types */
+enum {
+ Pcibcpci1 = 0, /* pci 1.0; no class codes defined */
+ Pcibcstore = 1, /* mass storage */
+ Pcibcnet = 2, /* network */
+ Pcibcdisp = 3, /* display */
+ Pcibcmmedia = 4, /* multimedia */
+ Pcibcmem = 5, /* memory */
+ Pcibcbridge = 6, /* bridge */
+ Pcibccomm = 7, /* simple comms (e.g., serial) */
+ Pcibcbasesys = 8, /* base system */
+ Pcibcinput = 9, /* input */
+ Pcibcdock = 0xa, /* docking stations */
+ Pcibcproc = 0xb, /* processors */
+ Pcibcserial = 0xc, /* serial bus (e.g., USB) */
+ Pcibcwireless = 0xd, /* wireless */
+ Pcibcintell = 0xe, /* intelligent i/o */
+ Pcibcsatcom = 0xf, /* satellite comms */
+ Pcibccrypto = 0x10, /* encryption/decryption */
+ Pcibcdacq = 0x11, /* data acquisition & signal proc. */
+};
+
+/* ccru (sub-class code) values; common cases only */
+enum {
+ /* mass storage */
+ Pciscscsi = 0, /* SCSI */
+ Pciscide = 1, /* IDE (ATA) */
+ Pciscsata = 6, /* SATA */
+
+ /* network */
+ Pciscether = 0, /* Ethernet */
+
+ /* display */
+ Pciscvga = 0, /* VGA */
+ Pciscxga = 1, /* XGA */
+ Pcisc3d = 2, /* 3D */
+
+ /* bridges */
+ Pcischostpci = 0, /* host/pci */
+ Pciscpcicpci = 1, /* pci/pci */
+
+ /* simple comms */
+ Pciscserial = 0, /* 16450, etc. */
+ Pciscmultiser = 1, /* multiport serial */
+
+ /* serial bus */
+ Pciscusb = 3, /* USB */
+};
+
+enum { /* type 0 pre-defined header */
+ PciCIS = 0x28, /* cardbus CIS pointer */
+ PciSVID = 0x2C, /* subsystem vendor ID */
+ PciSID = 0x2E, /* cardbus CIS pointer */
+ PciEBAR0 = 0x30, /* expansion ROM base address */
+ PciMGNT = 0x3E, /* burst period length */
+ PciMLT = 0x3F, /* maximum latency between bursts */
+};
+
+enum { /* type 1 pre-defined header */
+ PciPBN = 0x18, /* primary bus number */
+ PciSBN = 0x19, /* secondary bus number */
+ PciUBN = 0x1A, /* subordinate bus number */
+ PciSLTR = 0x1B, /* secondary latency timer */
+ PciIBR = 0x1C, /* I/O base */
+ PciILR = 0x1D, /* I/O limit */
+ PciSPSR = 0x1E, /* secondary status */
+ PciMBR = 0x20, /* memory base */
+ PciMLR = 0x22, /* memory limit */
+ PciPMBR = 0x24, /* prefetchable memory base */
+ PciPMLR = 0x26, /* prefetchable memory limit */
+ PciPUBR = 0x28, /* prefetchable base upper 32 bits */
+ PciPULR = 0x2C, /* prefetchable limit upper 32 bits */
+ PciIUBR = 0x30, /* I/O base upper 16 bits */
+ PciIULR = 0x32, /* I/O limit upper 16 bits */
+ PciEBAR1 = 0x28, /* expansion ROM base address */
+ PciBCR = 0x3E, /* bridge control register */
+};
+
+enum { /* type 2 pre-defined header */
+ PciCBExCA = 0x10,
+ PciCBSPSR = 0x16,
+ PciCBPBN = 0x18, /* primary bus number */
+ PciCBSBN = 0x19, /* secondary bus number */
+ PciCBUBN = 0x1A, /* subordinate bus number */
+ PciCBSLTR = 0x1B, /* secondary latency timer */
+ PciCBMBR0 = 0x1C,
+ PciCBMLR0 = 0x20,
+ PciCBMBR1 = 0x24,
+ PciCBMLR1 = 0x28,
+ PciCBIBR0 = 0x2C, /* I/O base */
+ PciCBILR0 = 0x30, /* I/O limit */
+ PciCBIBR1 = 0x34, /* I/O base */
+ PciCBILR1 = 0x38, /* I/O limit */
+ PciCBSVID = 0x40, /* subsystem vendor ID */
+ PciCBSID = 0x42, /* subsystem ID */
+ PciCBLMBAR = 0x44, /* legacy mode base address */
+};
+
+enum {
+ /* bar bits */
+ Barioaddr = 1<<0, /* vs. memory addr */
+ Barwidthshift = 1,
+ Barwidthmask = MASK(2),
+ Barwidth32 = 0,
+ Barwidth64 = 2,
+ Barprefetch = 1<<3,
+};
+
+struct Pcisiz
+{
+ Pcidev* dev;
+ int siz;
+ int bar;
+};
+
+struct Pcidev
+{
+ int tbdf; /* type+bus+device+function */
+ ushort vid; /* vendor ID */
+ ushort did; /* device ID */
+
+ ushort pcr;
+
+ uchar rid;
+ uchar ccrp;
+ uchar ccru;
+ uchar ccrb;
+ uchar cls;
+ uchar ltr;
+
+ struct {
+ ulong bar; /* base address */
+ int size;
+ } mem[6];
+
+ struct {
+ ulong bar;
+ int size;
+ } rom;
+ uchar intl; /* interrupt line */
+
+ Pcidev* list;
+ Pcidev* link; /* next device on this bno */
+
+ Pcidev* bridge; /* down a bus */
+
+ int pmrb; /* power management register block */
+};
+
+enum {
+ /* vendor ids */
+ Vatiamd = 0x1002,
+ Vintel = 0x8086,
+ Vjmicron= 0x197b,
+ Vmarvell= 0x1b4b,
+ Vmyricom= 0x14c1,
+ Vnvidia = 0x10de,
+ Vrealtek= 0x10ec,
+};
+
+#define PCIWINDOW 0
+#define PCIWADDR(va) (PADDR(va)+PCIWINDOW)
diff --git a/sys/src/9/teg2/l.s b/sys/src/9/teg2/l.s
new file mode 100644
index 000000000..182d3ca57
--- /dev/null
+++ b/sys/src/9/teg2/l.s
@@ -0,0 +1,873 @@
+/*
+ * tegra 2 SoC machine assist
+ * dual arm cortex-a9 processors
+ *
+ * ARM v7 arch. ref. man. §B1.3.3 says that we don't need barriers
+ * around writes to CPSR.
+ *
+ * LDREX/STREX use an exclusive monitor, which is part of the data cache unit
+ * for the L1 cache, so they won't work right if the L1 cache is disabled.
+ */
+
+#include "arm.s"
+
+#define LDREX(fp,t) WORD $(0xe<<28|0x01900f9f | (fp)<<16 | (t)<<12)
+/* `The order of operands is from left to right in dataflow order' - asm man */
+#define STREX(f,tp,r) WORD $(0xe<<28|0x01800f90 | (tp)<<16 | (r)<<12 | (f)<<0)
+
+#define MAXMB (KiB-1) /* last MB has vectors */
+#define TMPSTACK (DRAMSIZE - 64*MiB) /* used only during cpu startup */
+/* tas/cas strex debugging limits; started at 10000 */
+#define MAXSC 100000
+
+GLOBL testmem(SB), $4
+
+/*
+ * Entered here from Das U-Boot or another Plan 9 kernel with MMU disabled.
+ * Until the MMU is enabled it is OK to call functions provided
+ * they are within ±32MiB relative and do not require any
+ * local variables or more than one argument (i.e. there is
+ * no stack).
+ */
+TEXT _start(SB), 1, $-4
+ CPSMODE(PsrMsvc)
+ CPSID /* interrupts off */
+ CPSAE
+ SETEND(0) /* little-endian */
+ BARRIERS
+ CLREX
+ SETZSB
+
+ MOVW CPSR, R0
+ ORR $PsrDfiq, R0
+ MOVW R0, CPSR
+
+ /* invalidate i-cache and branch-target cache */
+ MTCP CpSC, 0, PC, C(CpCACHE), C(CpCACHEinvi), CpCACHEall
+ BARRIERS
+
+ /* put cpus other than 0 to sleep until cpu 0 is ready */
+ CPUID(R1)
+ BEQ cpuinit
+
+ /* not cpu 0 */
+PUTC('Z')
+PUTC('Z')
+ BARRIERS
+dowfi:
+ WFI
+ MOVW cpus_proceed(SB), R1
+ CMP $0, R1
+ BEQ dowfi
+ BL cpureset(SB)
+ B dowfi
+
+cpuinit:
+ DELAY(printloopret, 1)
+PUTC('\r')
+ DELAY(printloopnl, 1)
+PUTC('\n')
+
+ DELAY(printloops, 1)
+PUTC('P')
+ /* disable the PL310 L2 cache on cpu0 */
+ MOVW $(PHYSL2BAG+0x100), R1
+ MOVW $0, R2
+ MOVW R2, (R1)
+ BARRIERS
+ /* invalidate it */
+ MOVW $((1<<16)-1), R2
+ MOVW R2, 0x77c(R1)
+ BARRIERS
+
+ /*
+ * disable my MMU & caches
+ */
+ MFCP CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl
+ ORR $CpCsbo, R1
+ BIC $(CpCsbz|CpCmmu|CpCdcache|CpCicache|CpCpredict), R1
+ MTCP CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl
+ BARRIERS
+
+ /* cortex-a9 model-specific initial configuration */
+ MOVW $0, R1
+ MTCP CpSC, 0, R1, C(CpCONTROL), C(0), CpAuxctl
+ BARRIERS
+
+PUTC('l')
+ DELAY(printloop3, 1)
+
+ MOVW $testmem-KZERO(SB), R0
+ BL memdiag(SB)
+
+PUTC('a')
+ /* clear Mach for cpu 0 */
+ MOVW $PADDR(MACHADDR), R4 /* address of Mach for cpu 0 */
+ MOVW $0, R0
+_machZ:
+ MOVW R0, (R4)
+ ADD $4, R4
+ CMP.S $PADDR(L1+L1X(0)), R4 /* end at top-level page table */
+ BNE _machZ
+
+ /*
+ * set up the MMU page table for cpu 0
+ */
+
+PUTC('n')
+ /* clear all PTEs first, to provide a default */
+// MOVW $PADDR(L1+L1X(0)), R4 /* address of PTE for 0 */
+_ptenv0:
+ ZEROPTE()
+ CMP.S $PADDR(L1+16*KiB), R4
+ BNE _ptenv0
+
+ DELAY(printloop4, 2)
+PUTC(' ')
+ /*
+ * set up double map of PHYSDRAM, KZERO to PHYSDRAM for first few MBs,
+ * but only if KZERO and PHYSDRAM differ.
+ */
+ MOVW $PTEDRAM, R2 /* PTE bits */
+ MOVW $PHYSDRAM, R3 /* pa */
+ CMP $KZERO, R3
+ BEQ no2map
+ MOVW $PADDR(L1+L1X(PHYSDRAM)), R4 /* address of PTE for PHYSDRAM */
+ MOVW $DOUBLEMAPMBS, R5
+_ptdbl:
+ FILLPTE()
+ SUB.S $1, R5
+ BNE _ptdbl
+no2map:
+
+ /*
+ * back up and fill in PTEs for memory at KZERO.
+ * trimslice has 1 bank of 1GB at PHYSDRAM.
+ * Map the maximum.
+ */
+PUTC('9')
+ MOVW $PTEDRAM, R2 /* PTE bits */
+ MOVW $PHYSDRAM, R3
+ MOVW $PADDR(L1+L1X(KZERO)), R4 /* start with PTE for KZERO */
+ MOVW $MAXMB, R5 /* inner loop count (MBs) */
+_ptekrw: /* set PTEs */
+ FILLPTE()
+ SUB.S $1, R5 /* decrement inner loop count */
+ BNE _ptekrw
+
+ /*
+ * back up and fill in PTEs for MMIO
+ */
+PUTC(' ')
+ MOVW $PTEIO, R2 /* PTE bits */
+ MOVW $PHYSIO, R3
+ MOVW $PADDR(L1+L1X(VIRTIO)), R4 /* start with PTE for VIRTIO */
+_ptenv2:
+ FILLPTE()
+ CMP.S $PADDR(L1+L1X(PHYSIOEND)), R4
+ BNE _ptenv2
+
+ /* mmu.c sets up the trap vectors later */
+
+ MOVW $(PHYSDRAM | TMPSTACK), SP
+
+ /*
+ * learn l1 cache characteristics (on cpu 0 only).
+ */
+
+ MOVW $(1-1), R0 /* l1 */
+ SLL $1, R0 /* R0 = (cache - 1) << 1 */
+ MTCP CpSC, CpIDcssel, R0, C(CpID), C(CpIDid), 0 /* select l1 cache */
+ BARRIERS
+ MFCP CpSC, CpIDcsize, R0, C(CpID), C(CpIDid), 0 /* get sets & ways */
+ MOVW $CACHECONF, R8
+
+ /* get log2linelen into l1setsh */
+ MOVW R0, R1
+ AND $3, R1
+ ADD $4, R1
+ /* l1 & l2 must have same cache line size, thus same set shift */
+ MOVW R1, 4(R8) /* +4 = l1setsh */
+ MOVW R1, 12(R8) /* +12 = l2setsh */
+
+ /* get nways in R1 */
+ SRA $3, R0, R1
+ AND $((1<<10)-1), R1
+ ADD $1, R1
+
+ /* get log2(nways) in R2 (assume nways is 2^n) */
+ MOVW $(BI2BY*BY2WD - 1), R2
+ CLZ(1, 1)
+ SUB.S R1, R2 /* R2 = 31 - clz(nways) */
+ ADD.EQ $1, R2
+// MOVW R2, R3 /* print log2(nways): 2 */
+
+ MOVW $32, R1
+ SUB R2, R1 /* R1 = 32 - log2(nways) */
+ MOVW R1, 0(R8) /* +0 = l1waysh */
+
+ BARRIERS
+
+ MOVW $testmem-KZERO(SB), R0
+ BL memdiag(SB)
+
+ /*
+ * the mpcore manual says invalidate d-cache, scu, pl310 in that order,
+ * but says nothing about when to disable them.
+ *
+ * invalidate my caches before enabling
+ */
+ BL cachedinv(SB)
+ MTCP CpSC, 0, PC, C(CpCACHE), C(CpCACHEinvi), CpCACHEall
+ BARRIERS
+
+PUTC('f')
+ /*
+ * the mpcore manual says enable scu, d-cache, pl310, smp mode
+ * in that order. we have to reverse the last two; see main().
+ */
+ BL scuon(SB)
+
+ /*
+ * turn my L1 cache on; need it for tas below.
+ */
+ MFCP CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl
+ ORR $(CpCdcache|CpCicache|CpCalign|CpCpredict), R1
+ MTCP CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl
+ BARRIERS
+
+ /* cortex-a9 model-specific configuration */
+ MOVW $CpACl1pref, R1
+ MTCP CpSC, 0, R1, C(CpCONTROL), C(0), CpAuxctl
+ BARRIERS
+
+ /* we're supposed to wait until l1 & l2 are on before calling smpon */
+
+PUTC('r')
+ /* set the domain access control */
+ MOVW $Client, R0
+ BL dacput(SB)
+
+ DELAY(printloop5, 2)
+PUTC('o')
+ BL mmuinvalidate(SB)
+
+ MOVW $0, R0
+ BL pidput(SB)
+
+ /* set the translation table base */
+ MOVW $PADDR(L1), R0
+ BL ttbput(SB)
+
+PUTC('m')
+ /*
+ * the little dance to turn the MMU on
+ */
+ BL cacheuwbinv(SB)
+ BL mmuinvalidate(SB)
+ BL mmuenable(SB)
+
+PUTC(' ')
+ /* warp the PC into the virtual map */
+ MOVW $KZERO, R0
+ BL _r15warp(SB)
+ /*
+ * cpu 0 is now running at KZERO+something!
+ */
+
+ BARRIERS
+ MOVW $setR12(SB), R12 /* reload kernel SB */
+ MOVW $(KZERO | TMPSTACK), SP
+
+ BL cacheuwbinv(SB)
+
+PUTC('B')
+ MOVW $PHYSDRAM, R3 /* pa */
+ CMP $KZERO, R3
+ BEQ no2unmap
+ /* undo double map of PHYSDRAM, KZERO & first few MBs */
+ MOVW $(L1+L1X(PHYSDRAM)), R4 /* addr. of PTE for PHYSDRAM */
+ MOVW $0, R0
+ MOVW $DOUBLEMAPMBS, R5
+_ptudbl:
+ ZEROPTE()
+ SUB.S $1, R5
+ BNE _ptudbl
+no2unmap:
+
+ BL cachedwb(SB)
+ BL mmuinvalidate(SB)
+
+ /*
+ * call main in C
+ * pass Mach to main and set up the stack in it
+ */
+ MOVW $MACHADDR, R0 /* cpu 0 Mach */
+ MOVW R0, R(MACH) /* m = MACHADDR */
+ ADD $(MACHSIZE-4), R0, SP /* leave space for link register */
+PUTC('e')
+ BL main(SB) /* main(m) */
+limbo:
+ BL idlehands(SB)
+ B limbo
+
+ BL _div(SB) /* hack to load _div, etc. */
+
+
+/*
+ * called on cpu(s) other than 0, to start them, from _vrst
+ * (reset vector) in lexception.s, with interrupts disabled
+ * and in SVC mode, running in the zero segment (pc is in lower 256MB).
+ * SB is set for the zero segment.
+ */
+TEXT cpureset(SB), 1, $-4
+ CLREX
+ MOVW CPSR, R0
+ ORR $PsrDfiq, R0
+ MOVW R0, CPSR
+
+ MOVW $(PHYSDRAM | TMPSTACK), SP /* stack for cache ops */
+
+ /* paranoia: turn my mmu and caches off. */
+ MFCP CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl
+ ORR $CpCsbo, R0
+ BIC $(CpCsbz|CpCmmu|CpCdcache|CpCicache|CpCpredict), R0
+ MTCP CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl
+ BARRIERS
+
+ /* cortex-a9 model-specific initial configuration */
+ MOVW $0, R1
+ MTCP CpSC, 0, R1, C(CpCONTROL), C(0), CpAuxctl
+ ISB
+
+ /* invalidate my caches before enabling */
+ BL cachedinv(SB)
+ MTCP CpSC, 0, PC, C(CpCACHE), C(CpCACHEinvi), CpCACHEall
+ BARRIERS
+
+ /*
+ * turn my L1 cache on; need it (and mmu) for tas below.
+ * need branch prediction to make delay() timing right.
+ */
+ MFCP CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl
+ ORR $(CpCdcache|CpCicache|CpCalign|CpCpredict), R0
+ MTCP CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl
+ BARRIERS
+
+ /* enable l1 caches coherency, at minimum for ldrex/strex. */
+ BL smpon(SB)
+ BARRIERS
+
+ /*
+ * we used to write to PHYSEVP here; now we do it in C, which offers
+ * more assurance that we're up and won't go off the rails.
+ */
+
+ /* set the domain access control */
+ MOVW $Client, R0
+ BL dacput(SB)
+
+ BL setmach(SB)
+
+ /*
+ * redo double map of PHYSDRAM, KZERO in this cpu's ptes.
+ * mmuinit will undo this later.
+ */
+
+ MOVW $PHYSDRAM, R3
+ CMP $KZERO, R3
+ BEQ noun2map
+
+ /* launchinit set m->mmul1 to a copy of cpu0's l1 page table */
+ MOVW 12(R(MACH)), R0 /* m->mmul1 (virtual addr) */
+ BL k2paddr(SB) /* R0 = PADDR(m->mmul1) */
+ ADD $L1X(PHYSDRAM), R0, R4 /* R4 = address of PHYSDRAM's PTE */
+
+ MOVW $PTEDRAM, R2 /* PTE bits */
+ MOVW $DOUBLEMAPMBS, R5
+_ptrdbl:
+ ORR R3, R2, R1 /* first identity-map 0 to 0, etc. */
+ MOVW R1, (R4)
+ ADD $4, R4 /* bump PTE address */
+ ADD $MiB, R3 /* bump pa */
+ SUB.S $1, R5
+ BNE _ptrdbl
+noun2map:
+
+ MOVW $0, R0
+ BL pidput(SB)
+
+ /* set the translation table base to PADDR(m->mmul1) */
+ MOVW 12(R(MACH)), R0 /* m->mmul1 */
+ BL k2paddr(SB) /* R0 = PADDR(m->mmul1) */
+ BL ttbput(SB)
+
+ /*
+ * the little dance to turn the MMU on
+ */
+ BL cacheuwbinv(SB)
+ BL mmuinvalidate(SB)
+ BL mmuenable(SB)
+
+ /*
+ * mmu is now on, with l1 pt at m->mmul1.
+ */
+
+ /* warp the PC into the virtual map */
+ MOVW $KZERO, R0
+ BL _r15warp(SB)
+
+ /*
+ * now running at KZERO+something!
+ */
+
+ BARRIERS
+ MOVW $setR12(SB), R12 /* reload kernel's SB */
+ MOVW $(KZERO | TMPSTACK), SP /* stack for cache ops*/
+ BL setmach(SB)
+ ADD $(MACHSIZE-4), R(MACH), SP /* leave space for link register */
+ BL cpustart(SB)
+
+
+/*
+ * converts virtual address in R0 to a physical address.
+ */
+TEXT k2paddr(SB), 1, $-4
+ BIC $KSEGM, R0
+ ADD $PHYSDRAM, R0
+ RET
+
+/*
+ * converts physical address in R0 to a virtual address.
+ */
+TEXT p2kaddr(SB), 1, $-4
+ BIC $KSEGM, R0
+ ORR $KZERO, R0
+ RET
+
+/*
+ * converts address in R0 to the current segment, as defined by the PC.
+ * clobbers R1.
+ */
+TEXT addr2pcseg(SB), 1, $-4
+ BIC $KSEGM, R0
+ MOVW PC, R1
+ AND $KSEGM, R1 /* segment PC is in */
+ ORR R1, R0
+ RET
+
+/* sets R(MACH), preserves other registers */
+TEXT setmach(SB), 1, $-4
+ MOVM.DB.W [R14], (R13)
+ MOVM.DB.W [R0-R2], (R13)
+
+ CPUID(R2)
+ SLL $2, R2 /* convert to word index */
+
+ MOVW $machaddr(SB), R0
+ BL addr2pcseg(SB)
+ ADD R2, R0 /* R0 = &machaddr[cpuid] */
+ MOVW (R0), R0 /* R0 = machaddr[cpuid] */
+ CMP $0, R0
+ MOVW.EQ $MACHADDR, R0 /* paranoia: use MACHADDR if 0 */
+ BL addr2pcseg(SB)
+ MOVW R0, R(MACH) /* m = machaddr[cpuid] */
+
+ MOVM.IA.W (R13), [R0-R2]
+ MOVM.IA.W (R13), [R14]
+ RET
+
+
+/*
+ * memory diagnostic
+ * tests word at (R0); modifies R7 and R8
+ */
+TEXT memdiag(SB), 1, $-4
+ MOVW $0xabcdef89, R7
+ MOVW R7, (R0)
+ MOVW (R0), R8
+ CMP R7, R8
+ BNE mbuggery /* broken memory */
+
+ BARRIERS
+ MOVW (R0), R8
+ CMP R7, R8
+ BNE mbuggery /* broken memory */
+
+ MOVW $0, R7
+ MOVW R7, (R0)
+ BARRIERS
+ RET
+
+/* modifies R0, R3—R6 */
+TEXT printhex(SB), 1, $-4
+ MOVW R0, R3
+ PUTC('0')
+ PUTC('x')
+ MOVW $(32-4), R5 /* bits to shift right */
+nextdig:
+ SRA R5, R3, R4
+ AND $0xf, R4
+ ADD $'0', R4
+ CMP.S $'9', R4
+ BLE nothex /* if R4 <= 9, jump */
+ ADD $('a'-('9'+1)), R4
+nothex:
+ PUTC(R4)
+ SUB.S $4, R5
+ BGE nextdig
+
+ PUTC('\r')
+ PUTC('\n')
+ DELAY(proct, 50)
+ RET
+
+mbuggery:
+ PUTC('?')
+ PUTC('m')
+mtopanic:
+ MOVW $membmsg(SB), R0
+ MOVW R14, R1 /* get R14's segment ... */
+ AND $KSEGM, R1
+ BIC $KSEGM, R0 /* strip segment from address */
+ ORR R1, R0 /* combine them */
+ BL panic(SB)
+mbugloop:
+ WFI
+ B mbugloop
+
+ DATA membmsg+0(SB)/8,$"memory b"
+ DATA membmsg+8(SB)/6,$"roken\z"
+ GLOBL membmsg(SB), $14
+
+TEXT _r15warp(SB), 1, $-4
+ BIC $KSEGM, R14 /* link reg, will become PC */
+ ORR R0, R14
+ BIC $KSEGM, SP
+ ORR R0, SP
+ RET
+
+/*
+ * `single-element' cache operations.
+ * in arm arch v7, they operate on all architected cache levels, so separate
+ * l2 functions are usually unnecessary.
+ */
+
+TEXT cachedwbse(SB), $-4 /* D writeback SE */
+ MOVW R0, R2
+
+ MOVW CPSR, R3
+ CPSID /* splhi */
+
+ BARRIERS /* force outstanding stores to cache */
+ MOVW R2, R0
+ MOVW 4(FP), R1
+ ADD R0, R1 /* R1 is end address */
+ BIC $(CACHELINESZ-1), R0 /* cache line start */
+_dwbse:
+ MTCP CpSC, 0, R0, C(CpCACHE), C(CpCACHEwb), CpCACHEse
+ ADD $CACHELINESZ, R0
+ CMP.S R0, R1
+ BGT _dwbse
+ B _wait
+
+TEXT cachedwbinvse(SB), $-4 /* D writeback+invalidate SE */
+ MOVW R0, R2
+
+ MOVW CPSR, R3
+ CPSID /* splhi */
+
+ BARRIERS /* force outstanding stores to cache */
+ MOVW R2, R0
+ MOVW 4(FP), R1
+ ADD R0, R1 /* R1 is end address */
+ BIC $(CACHELINESZ-1), R0 /* cache line start */
+_dwbinvse:
+ MTCP CpSC, 0, R0, C(CpCACHE), C(CpCACHEwbi), CpCACHEse
+ ADD $CACHELINESZ, R0
+ CMP.S R0, R1
+ BGT _dwbinvse
+_wait: /* drain write buffer */
+ BARRIERS
+
+ MOVW R3, CPSR /* splx */
+ RET
+
+TEXT cachedinvse(SB), $-4 /* D invalidate SE */
+ MOVW R0, R2
+
+ MOVW CPSR, R3
+ CPSID /* splhi */
+
+ BARRIERS /* force outstanding stores to cache */
+ MOVW R2, R0
+ MOVW 4(FP), R1
+ ADD R0, R1 /* R1 is end address */
+
+ /*
+ * if start & end addresses are not on cache-line boundaries,
+ * flush first & last cache lines before invalidating.
+ */
+ AND.S $(CACHELINESZ-1), R0, R4
+ BEQ stok
+ BIC $(CACHELINESZ-1), R0, R4 /* cache line start */
+ MTCP CpSC, 0, R4, C(CpCACHE), C(CpCACHEwb), CpCACHEse
+stok:
+ AND.S $(CACHELINESZ-1), R1, R4
+ BEQ endok
+ BIC $(CACHELINESZ-1), R1, R4 /* cache line start */
+ MTCP CpSC, 0, R4, C(CpCACHE), C(CpCACHEwb), CpCACHEse
+endok:
+ BIC $(CACHELINESZ-1), R0 /* cache line start */
+_dinvse:
+ MTCP CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvd), CpCACHEse
+ ADD $CACHELINESZ, R0
+ CMP.S R0, R1
+ BGT _dinvse
+ B _wait
+
+/*
+ * enable mmu and high vectors
+ */
+TEXT mmuenable(SB), 1, $-4
+ MFCP CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl
+ ORR $CpCmmu, R0
+ MTCP CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl
+ BARRIERS
+ RET
+
+TEXT mmudisable(SB), 1, $-4
+ MFCP CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl
+ BIC $CpCmmu, R0
+ MTCP CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl
+ BARRIERS
+ RET
+
+/*
+ * If one of these MCR instructions crashes or hangs the machine,
+ * check your Level 1 page table (at TTB) closely.
+ */
+TEXT mmuinvalidate(SB), $-4 /* invalidate all */
+ MOVW CPSR, R2
+ CPSID /* interrupts off */
+ BARRIERS
+ MTCP CpSC, 0, PC, C(CpTLB), C(CpTLBinvu), CpTLBinv
+ BARRIERS
+ MOVW R2, CPSR /* interrupts restored */
+ RET
+
+TEXT mmuinvalidateaddr(SB), $-4 /* invalidate single entry */
+ MTCP CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinvse
+ BARRIERS
+ RET
+
+TEXT cpidget(SB), 1, $-4 /* main ID */
+ MFCP CpSC, 0, R0, C(CpID), C(CpIDidct), CpIDid
+ RET
+
+TEXT cpctget(SB), 1, $-4 /* cache type */
+ MFCP CpSC, 0, R0, C(CpID), C(CpIDidct), CpIDct
+ RET
+
+TEXT controlget(SB), 1, $-4 /* system control (sctlr) */
+ MFCP CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl
+ RET
+
+TEXT ttbget(SB), 1, $-4 /* translation table base */
+ MFCP CpSC, 0, R0, C(CpTTB), C(0), CpTTB0
+ RET
+
+TEXT ttbput(SB), 1, $-4 /* translation table base */
+ MOVW CPSR, R2
+ CPSID
+ MOVW R0, R1
+ BARRIERS /* finish prior accesses before changing ttb */
+ MTCP CpSC, 0, R1, C(CpTTB), C(0), CpTTB0
+ MTCP CpSC, 0, R1, C(CpTTB), C(0), CpTTB1 /* non-secure too */
+ MOVW $0, R0
+ MTCP CpSC, 0, R0, C(CpTTB), C(0), CpTTBctl
+ BARRIERS
+ MOVW R2, CPSR
+ RET
+
+TEXT dacget(SB), 1, $-4 /* domain access control */
+ MFCP CpSC, 0, R0, C(CpDAC), C(0)
+ RET
+
+TEXT dacput(SB), 1, $-4 /* domain access control */
+ MOVW R0, R1
+ BARRIERS
+ MTCP CpSC, 0, R1, C(CpDAC), C(0)
+ ISB
+ RET
+
+TEXT fsrget(SB), 1, $-4 /* fault status */
+ MFCP CpSC, 0, R0, C(CpFSR), C(0), CpDFSR
+ RET
+
+TEXT farget(SB), 1, $-4 /* fault address */
+ MFCP CpSC, 0, R0, C(CpFAR), C(0), CpDFAR
+ RET
+
+TEXT getpsr(SB), 1, $-4
+ MOVW CPSR, R0
+ RET
+
+TEXT getscr(SB), 1, $-4 /* secure configuration */
+ MFCP CpSC, 0, R0, C(CpCONTROL), C(CpCONTROLscr), CpSCRscr
+ RET
+
+TEXT pidget(SB), 1, $-4 /* address translation pid */
+ MFCP CpSC, 0, R0, C(CpPID), C(0x0)
+ RET
+
+TEXT pidput(SB), 1, $-4 /* address translation pid */
+ MTCP CpSC, 0, R0, C(CpPID), C(0), 0 /* pid, v7a deprecated */
+ MTCP CpSC, 0, R0, C(CpPID), C(0), 1 /* context id, errata 754322 */
+ ISB
+ RET
+
+/*
+ * access to yet more coprocessor registers
+ */
+
+TEXT getauxctl(SB), 1, $-4 /* get cortex-a9 aux. ctl. */
+ MFCP CpSC, 0, R0, C(CpCONTROL), C(0), CpAuxctl
+ RET
+
+TEXT putauxctl(SB), 1, $-4 /* put cortex-a9 aux. ctl. */
+ BARRIERS
+ MTCP CpSC, 0, R0, C(CpCONTROL), C(0), CpAuxctl
+ BARRIERS
+ RET
+
+TEXT getclvlid(SB), 1, $-4
+ MFCP CpSC, CpIDcsize, R0, C(CpID), C(CpIDidct), CpIDclvlid
+ RET
+
+TEXT getcyc(SB), 1, $-4
+ MFCP CpSC, 0, R0, C(CpCLD), C(CpCLDcyc), 0
+ RET
+
+TEXT getdebug(SB), 1, $-4 /* get cortex-a9 debug enable register */
+ MFCP CpSC, 0, R0, C(1), C(1), 1
+ RET
+
+TEXT getpc(SB), 1, $-4
+ MOVW PC, R0
+ RET
+
+TEXT getsb(SB), 1, $-4
+ MOVW R12, R0
+ RET
+
+TEXT setsp(SB), 1, $-4
+ MOVW R0, SP
+ RET
+
+
+TEXT splhi(SB), 1, $-4
+ MOVW CPSR, R0 /* return old CPSR */
+ CPSID /* turn off interrupts */
+ CMP.S $0, R(MACH)
+ MOVW.NE R14, 4(R(MACH)) /* save caller pc in m->splpc */
+ RET
+
+TEXT spllo(SB), 1, $-4 /* start marker for devkprof.c */
+ MOVW CPSR, R0 /* return old CPSR */
+ MOVW $0, R1
+ CMP.S R1, R(MACH)
+ MOVW.NE R1, 4(R(MACH)) /* clear m->splpc */
+ CPSIE
+ RET
+
+TEXT splx(SB), 1, $-4
+ MOVW CPSR, R3 /* must return old CPSR */
+ CPSID
+
+ CMP.S $0, R(MACH)
+ MOVW.NE R14, 4(R(MACH)) /* save caller pc in m->splpc */
+ MOVW R0, CPSR /* reset interrupt level */
+ MOVW R3, R0 /* must return old CPSR */
+ RET
+
+TEXT spldone(SB), 1, $0 /* end marker for devkprof.c */
+ RET
+
+TEXT islo(SB), 1, $-4
+ MOVW CPSR, R0
+ AND $(PsrDirq), R0
+ EOR $(PsrDirq), R0
+ RET
+
+TEXT clz(SB), $-4
+ CLZ(0, 0) /* 0 is R0 */
+ RET
+
+TEXT setlabel(SB), 1, $-4
+ MOVW SP, 0(R0)
+ MOVW R14, 4(R0) /* pc */
+ MOVW $0, R0
+ RET
+
+TEXT gotolabel(SB), 1, $-4
+ MOVW 0(R0), SP
+ MOVW 4(R0), R14 /* pc */
+ MOVW $1, R0
+ RET
+
+TEXT getcallerpc(SB), 1, $-4
+ MOVW 0(SP), R0
+ RET
+
+TEXT wfi(SB), $-4
+ MOVW CPSR, R1
+ /*
+ * an interrupt should break us out of wfi. masking interrupts
+ * slows interrupt response slightly but prevents recursion.
+ */
+// CPSIE
+ CPSID
+
+ BARRIERS
+ WFI
+
+ MOVW R1, CPSR
+ RET
+
+TEXT coherence(SB), $-4
+ BARRIERS
+ RET
+
+GLOBL cpus_proceed+0(SB), $4
+
+#include "cache.v7.s"
+
+TEXT tas(SB), $-4 /* _tas(ulong *) */
+ /* returns old (R0) after modifying (R0) */
+ MOVW R0,R5
+ DMB
+
+ MOVW $1,R2 /* new value of (R0) */
+ MOVW $MAXSC, R8
+tas1:
+ LDREX(5,7) /* LDREX 0(R5),R7 */
+ CMP.S $0, R7 /* old value non-zero (lock taken)? */
+ BNE lockbusy /* we lose */
+ SUB.S $1, R8
+ BEQ lockloop2
+ STREX(2,5,4) /* STREX R2,(R5),R4 */
+ CMP.S $0, R4
+ BNE tas1 /* strex failed? try again */
+ DMB
+ B tas0
+lockloop2:
+ PUTC('?')
+ PUTC('l')
+ PUTC('t')
+ BL abort(SB)
+lockbusy:
+ CLREX
+tas0:
+ MOVW R7, R0 /* return old value */
+ RET
diff --git a/sys/src/9/teg2/lexception.s b/sys/src/9/teg2/lexception.s
new file mode 100644
index 000000000..2fbcfade9
--- /dev/null
+++ b/sys/src/9/teg2/lexception.s
@@ -0,0 +1,325 @@
+/*
+ * arm exception handlers
+ */
+#include "arm.s"
+
+#undef B /* B is for 'botch' */
+
+/*
+ * exception vectors, copied by trapinit() to somewhere useful
+ */
+TEXT vectors(SB), 1, $-4
+ MOVW 0x18(R15), R15 /* reset */
+ MOVW 0x18(R15), R15 /* undefined instr. */
+ MOVW 0x18(R15), R15 /* SWI & SMC */
+ MOVW 0x18(R15), R15 /* prefetch abort */
+ MOVW 0x18(R15), R15 /* data abort */
+ MOVW 0x18(R15), R15 /* hypervisor call */
+ MOVW 0x18(R15), R15 /* IRQ */
+ MOVW 0x18(R15), R15 /* FIQ */
+
+TEXT vtable(SB), 1, $-4
+ WORD $_vrst-KZERO(SB) /* reset, in svc mode already */
+ WORD $_vund(SB) /* undefined, switch to svc mode */
+ WORD $_vsvc(SB) /* swi, in svc mode already */
+ WORD $_vpabt(SB) /* prefetch abort, switch to svc mode */
+ WORD $_vdabt(SB) /* data abort, switch to svc mode */
+ WORD $_vhype(SB) /* hypervisor call */
+ WORD $_virq(SB) /* IRQ, switch to svc mode */
+ WORD $_vfiq(SB) /* FIQ, switch to svc mode */
+
+/*
+ * reset - start additional cpus
+ */
+TEXT _vrst(SB), 1, $-4
+ /* running in the zero segment (pc is lower 256MB) */
+ CPSMODE(PsrMsvc) /* should be redundant */
+ CPSID
+ CPSAE
+ SETEND(0) /* force little-endian */
+ BARRIERS
+ SETZSB
+ MOVW $PsrMsvc, SPSR
+ MOVW $0, R14
+
+ /* invalidate i-cache and branch-target cache */
+ MTCP CpSC, 0, PC, C(CpCACHE), C(CpCACHEinvi), CpCACHEall
+ BARRIERS
+
+ BL cpureset(SB)
+spin:
+ B spin
+
+/*
+ * system call
+ */
+TEXT _vsvc(SB), 1, $-4 /* SWI */
+ CLREX
+ BARRIERS
+ /* stack is m->stack */
+ MOVW.W R14, -4(R13) /* ureg->pc = interrupted PC */
+ MOVW SPSR, R14 /* ureg->psr = SPSR */
+ MOVW.W R14, -4(R13) /* ... */
+ MOVW $PsrMsvc, R14 /* ureg->type = PsrMsvc */
+ MOVW.W R14, -4(R13) /* ... */
+
+ /* avoid the ambiguity described in notes/movm.w. */
+ MOVM.DB.S [R0-R14], (R13) /* save user level registers */
+ SUB $(NREGS*4), R13 /* r13 now points to ureg */
+
+ MOVW $setR12(SB), R12 /* Make sure we've got the kernel's SB loaded */
+
+ /*
+ * set up m and up registers since user registers could contain anything
+ */
+ CPUID(R1)
+ SLL $2, R1 /* convert to word index */
+ MOVW $machaddr(SB), R2
+ ADD R1, R2
+ MOVW (R2), R(MACH) /* m = machaddr[cpuid] */
+ CMP $0, R(MACH)
+ MOVW.EQ $MACHADDR, R0 /* paranoia: use MACHADDR if 0 */
+ MOVW 8(R(MACH)), R(USER) /* up = m->proc */
+
+ MOVW ((NREGS+1)*4)(R13), R2 /* saved SPSR (user mode) */
+
+ MOVW R13, R0 /* first arg is pointer to ureg */
+ SUB $8, R13 /* space for argument+link */
+
+ BL syscall(SB)
+ /*
+ * caller saves on plan 9, so registers other than 9, 10, 13 & 14
+ * may have been trashed when we get here.
+ */
+
+ MOVW $setR12(SB), R12 /* reload kernel's SB */
+
+ ADD $(8+4*NREGS), R13 /* make r13 point to ureg->type */
+
+ MOVW 8(R13), R14 /* restore link */
+ MOVW 4(R13), R0 /* restore SPSR */
+/*
+ * return from user-mode exception.
+ * expects new SPSR in R0. R13 must point to ureg->type.
+ */
+_rfue:
+TEXT rfue(SB), 1, $-4
+ MOVW R0, SPSR /* ... */
+
+ /*
+ * order on stack is type, psr, pc, but RFEV7 needs pc, psr.
+ * step on type and previous word to hold temporary values.
+ * we could instead change the order in which psr & pc are pushed.
+ */
+ MOVW 4(R13), R1 /* psr */
+ MOVW 8(R13), R2 /* pc */
+ MOVW R2, 4(R13) /* pc */
+ MOVW R1, 8(R13) /* psr */
+
+ MOVM.DB.S (R13), [R0-R14] /* restore user registers */
+ ADD $4, R13 /* pop type, sp -> pc */
+ RFEV7W(13)
+
+
+TEXT _vund(SB), 1, $-4 /* undefined */
+ /* sp is m->sund */
+ MOVM.IA [R0-R4], (R13) /* free some working space */
+ MOVW $PsrMund, R0
+ B _vswitch
+
+TEXT _vpabt(SB), 1, $-4 /* prefetch abort */
+ /* sp is m->sabt */
+ MOVM.IA [R0-R4], (R13) /* free some working space */
+ MOVW $PsrMabt, R0 /* r0 = type */
+ B _vswitch
+
+TEXT _vdabt(SB), 1, $-4 /* data abort */
+ /* sp is m->sabt */
+ MOVM.IA [R0-R4], (R13) /* free some working space */
+ MOVW $(PsrMabt+1), R0 /* r0 = type */
+ B _vswitch
+
+TEXT _virq(SB), 1, $-4 /* IRQ */
+ /* sp is m->sirq */
+ MOVM.IA [R0-R4], (R13) /* free some working space */
+ MOVW $PsrMirq, R0 /* r0 = type */
+ B _vswitch
+
+ /*
+ * come here with type in R0 and R13 pointing above saved [r0-r4].
+ * we'll switch to SVC mode and then call trap.
+ */
+_vswitch:
+// TEXT _vswtch(SB), 1, $-4 /* make symbol visible to debuggers */
+ CLREX
+ BARRIERS
+ MOVW SPSR, R1 /* save SPSR for ureg */
+ /*
+ * R12 needs to be set before using PsrMbz, so BIGENDCHECK code has
+ * been moved below.
+ */
+ MOVW R14, R2 /* save interrupted pc for ureg */
+ MOVW R13, R3 /* save pointer to where the original [R0-R4] are */
+
+ /*
+ * switch processor to svc mode. this switches the banked registers
+ * (r13 [sp] and r14 [link]) to those of svc mode (so we must be sure
+ * to never get here already in svc mode).
+ */
+ CPSMODE(PsrMsvc) /* switch! */
+ CPSID
+
+ AND.S $0xf, R1, R4 /* interrupted code kernel or user? */
+ BEQ _userexcep
+
+ /*
+ * here for trap from SVC mode
+ */
+
+ /* push ureg->{type, psr, pc} onto Msvc stack.
+ * r13 points to ureg->type after.
+ */
+ MOVM.DB.W [R0-R2], (R13)
+ MOVM.IA (R3), [R0-R4] /* restore [R0-R4] from previous mode's stack */
+
+ /*
+ * avoid the ambiguity described in notes/movm.w.
+ * In order to get a predictable value in R13 after the stores,
+ * separate the store-multiple from the stack-pointer adjustment.
+ * We'll assume that the old value of R13 should be stored on the stack.
+ */
+ /* save kernel level registers, at end r13 points to ureg */
+ MOVM.DB [R0-R14], (R13)
+ SUB $(NREGS*4), R13 /* SP now points to saved R0 */
+
+ MOVW $setR12(SB), R12 /* Make sure we've got the kernel's SB loaded */
+ /* previous mode was svc, so the saved spsr should be sane. */
+ MOVW ((NREGS+1)*4)(R13), R1
+
+ MOVM.IA (R13), [R0-R8] /* restore a few user registers */
+
+ MOVW R13, R0 /* first arg is pointer to ureg */
+ SUB $(4*2), R13 /* space for argument+link (for debugger) */
+ MOVW $0xdeaddead, R11 /* marker */
+
+ BL trap(SB) /* trap(ureg) */
+ /*
+ * caller saves on plan 9, so registers other than 9, 10, 13 & 14
+ * may have been trashed when we get here.
+ */
+
+ MOVW $setR12(SB), R12 /* reload kernel's SB */
+
+ ADD $(4*2+4*NREGS), R13 /* make r13 point to ureg->type */
+
+ /*
+ * if we interrupted a previous trap's handler and are now
+ * returning to it, we need to propagate the current R(MACH) (R10)
+ * by overriding the saved one on the stack, since we may have
+ * been rescheduled and be on a different processor now than
+ * at entry.
+ */
+ MOVW R(MACH), (-(NREGS-MACH)*4)(R13) /* restore current cpu's MACH */
+
+ MOVW 8(R13), R14 /* restore link */
+ MOVW 4(R13), R0 /* restore SPSR */
+
+ /* return from kernel-mode exception */
+ MOVW R0, SPSR /* ... */
+
+ /*
+ * order on stack is type, psr, pc, but RFEV7 needs pc, psr.
+ * step on type and previous word to hold temporary values.
+ * we could instead change the order in which psr & pc are pushed.
+ */
+ MOVW 4(R13), R1 /* psr */
+ MOVW 8(R13), R2 /* pc */
+ MOVW R2, 4(R13) /* pc */
+ MOVW R1, 8(R13) /* psr */
+
+ /* restore kernel regs other than SP; we're using it */
+ SUB $(NREGS*4), R13
+ MOVM.IA.W (R13), [R0-R12]
+ ADD $4, R13 /* skip saved kernel SP */
+ MOVM.IA.W (R13), [R14]
+ ADD $4, R13 /* pop type, sp -> pc */
+ BARRIERS
+ RFEV7W(13)
+
+ /*
+ * here for trap from USER mode
+ */
+_userexcep:
+ MOVM.DB.W [R0-R2], (R13) /* set ureg->{type, psr, pc}; r13 points to ureg->type */
+ MOVM.IA (R3), [R0-R4] /* restore [R0-R4] from previous mode's stack */
+
+ /* avoid the ambiguity described in notes/movm.w. */
+ MOVM.DB.S [R0-R14], (R13) /* save kernel level registers */
+ SUB $(NREGS*4), R13 /* r13 now points to ureg */
+
+ MOVW $setR12(SB), R12 /* Make sure we've got the kernel's SB loaded */
+
+ /*
+ * set up m and up registers since user registers could contain anything
+ */
+ CPUID(R1)
+ SLL $2, R1 /* convert to word index */
+ MOVW $machaddr(SB), R2
+ ADD R1, R2
+ MOVW (R2), R(MACH) /* m = machaddr[cpuid] */
+ CMP $0, R(MACH)
+ MOVW.EQ $MACHADDR, R0 /* paranoia: use MACHADDR if 0 */
+ MOVW 8(R(MACH)), R(USER) /* up = m->proc */
+
+ MOVW ((NREGS+1)*4)(R13), R2 /* saved SPSR */
+
+ MOVW R13, R0 /* first arg is pointer to ureg */
+ SUB $(4*2), R13 /* space for argument+link (for debugger) */
+
+ BL trap(SB) /* trap(ureg) */
+ /*
+ * caller saves on plan 9, so registers other than 9, 10, 13 & 14
+ * may have been trashed when we get here.
+ */
+
+ ADD $(4*2+4*NREGS), R13 /* make r13 point to ureg->type */
+
+ MOVW 8(R13), R14 /* restore link */
+ MOVW 4(R13), R0 /* restore SPSR */
+
+ MOVW 4(R13), R0 /* restore SPSR */
+ B _rfue
+
+
+TEXT _vfiq(SB), 1, $-4 /* FIQ */
+ PUTC('?')
+ PUTC('f')
+ PUTC('i')
+ PUTC('q')
+ RFE /* FIQ is special, ignore it for now */
+
+TEXT _vhype(SB), 1, $-4
+ PUTC('?')
+ PUTC('h')
+ PUTC('y')
+ PUTC('p')
+ RFE
+
+/*
+ * set the stack value for the mode passed in R0
+ */
+TEXT setr13(SB), 1, $-4
+ MOVW 4(FP), R1
+
+ MOVW CPSR, R2
+ BIC $(PsrMask|PsrMbz), R2, R3
+ ORR $(PsrDirq|PsrDfiq), R3
+ ORR R0, R3
+
+ MOVW R3, CPSR /* switch to new mode */
+
+ MOVW R13, R0 /* return old sp */
+ MOVW R1, R13 /* install new one */
+
+ MOVW R2, CPSR /* switch back to old mode */
+ RET
diff --git a/sys/src/9/teg2/lproc.s b/sys/src/9/teg2/lproc.s
new file mode 100644
index 000000000..a1ff29dcd
--- /dev/null
+++ b/sys/src/9/teg2/lproc.s
@@ -0,0 +1,38 @@
+#include "arm.s"
+
+/*
+ * This is the first jump from kernel to user mode.
+ * Fake a return from interrupt.
+ *
+ * Enter with R0 containing the user stack pointer.
+ * UTZERO + 0x20 is always the entry point.
+ *
+ */
+TEXT touser(SB), 1, $-4
+ /* store the user stack pointer into the USR_r13 */
+ MOVM.DB.W [R0], (R13)
+ /* avoid the ambiguity described in notes/movm.w. */
+ MOVM.S (R13), [R13]
+ ADD $4, R13 /* pop new user SP */
+
+ /* set up a PSR for user level */
+ MOVW $(PsrMusr), R0
+ MOVW R0, SPSR
+
+ /* push new user PSR */
+ MOVM.DB.W [R0], (R13)
+
+ /* push the new user PC on the stack */
+ MOVW $(UTZERO+0x20), R0
+ MOVM.DB.W [R0], (R13)
+
+ RFEV7W(13)
+
+/*
+ * here to jump to a newly forked process
+ */
+TEXT forkret(SB), 1, $-4
+ ADD $(4*NREGS), R13 /* make r13 point to ureg->type */
+ MOVW 8(R13), R14 /* restore link */
+ MOVW 4(R13), R0 /* restore SPSR */
+ B rfue(SB)
diff --git a/sys/src/9/teg2/main.c b/sys/src/9/teg2/main.c
new file mode 100644
index 000000000..b3bf81e17
--- /dev/null
+++ b/sys/src/9/teg2/main.c
@@ -0,0 +1,982 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+
+#include "init.h"
+#include <pool.h>
+
+#include "arm.h"
+#include "reboot.h"
+
+/*
+ * Where configuration info is left for the loaded programme.
+ * This will turn into a structure as more is done by the boot loader
+ * (e.g. why parse the .ini file twice?).
+ * There are 3584 bytes available at CONFADDR.
+ */
+#define BOOTARGS ((char*)CONFADDR)
+#define BOOTARGSLEN (16*KiB) /* limit in devenv.c */
+#define MAXCONF 64
+#define MAXCONFLINE 160
+
+enum {
+ Minmem = 256*MB, /* conservative default */
+};
+
+#define isascii(c) ((uchar)(c) > 0 && (uchar)(c) < 0177)
+
+extern char bdata[], edata[], end[], etext[];
+
+uintptr kseg0 = KZERO;
+Mach* machaddr[MAXMACH];
+uchar *l2pages;
+
+Memcache cachel[8]; /* arm arch v7 supports 1-7 */
+/*
+ * these are used by the cache.v7.s routines.
+ */
+Lowmemcache *cacheconf;
+
+/*
+ * Option arguments from the command line.
+ * oargv[0] is the boot file.
+ * Optionsinit() is called from multiboot()
+ * or some other machine-dependent place
+ * to set it all up.
+ */
+static int oargc;
+static char* oargv[20];
+static char oargb[128];
+static int oargblen;
+static char oenv[4096];
+
+static uintptr sp; /* XXX - must go - user stack of init proc */
+
+int vflag;
+int normalprint;
+char debug[256];
+
+static Lock testlock;
+
+/* store plan9.ini contents here at least until we stash them in #ec */
+static char confname[MAXCONF][KNAMELEN];
+static char confval[MAXCONF][MAXCONFLINE];
+static int nconf;
+
+static int
+findconf(char *name)
+{
+ int i;
+
+ for(i = 0; i < nconf; i++)
+ if(cistrcmp(confname[i], name) == 0)
+ return i;
+ return -1;
+}
+
+char*
+getconf(char *name)
+{
+ int i;
+
+ i = findconf(name);
+ if(i >= 0)
+ return confval[i];
+ return nil;
+}
+
+void
+addconf(char *name, char *val)
+{
+ int i;
+
+ i = findconf(name);
+ if(i < 0){
+ if(val == nil || nconf >= MAXCONF)
+ return;
+ i = nconf++;
+ strecpy(confname[i], confname[i]+sizeof(confname[i]), name);
+ }
+// confval[i] = val;
+ strecpy(confval[i], confval[i]+sizeof(confval[i]), val);
+}
+
+static void
+writeconf(void)
+{
+ char *p, *q;
+ int n;
+
+ p = getconfenv();
+
+ if(waserror()) {
+ free(p);
+ nexterror();
+ }
+
+ /* convert to name=value\n format */
+ for(q=p; *q; q++) {
+ q += strlen(q);
+ *q = '=';
+ q += strlen(q);
+ *q = '\n';
+ }
+ n = q - p + 1;
+ if(n >= BOOTARGSLEN)
+ error("kernel configuration too large");
+ memmove(BOOTARGS, p, n);
+ memset(BOOTARGS + n, '\n', BOOTARGSLEN - n);
+ poperror();
+ free(p);
+}
+
+/*
+ * assumes that we have loaded our /cfg/pxe/mac file at CONFADDR
+ * (usually 0x1000) with tftp in u-boot. no longer uses malloc, so
+ * can be called early.
+ */
+static void
+plan9iniinit(void)
+{
+ char *k, *v, *next;
+
+ k = (char *)CONFADDR;
+ if(!isascii(*k))
+ return;
+
+ for(; k && *k != '\0'; k = next) {
+ if (!isascii(*k)) /* sanity check */
+ break;
+ next = strchr(k, '\n');
+ if (next)
+ *next++ = '\0';
+
+ if (*k == '\0' || *k == '\n' || *k == '#')
+ continue;
+ v = strchr(k, '=');
+ if(v == nil)
+ continue; /* mal-formed line */
+ *v++ = '\0';
+
+ addconf(k, v);
+ }
+}
+
+static void
+optionsinit(char* s)
+{
+ char *o;
+
+ strcpy(oenv, "");
+ o = strecpy(oargb, oargb+sizeof(oargb), s)+1;
+ if(getenv("bootargs", o, o - oargb) != nil)
+ *(o-1) = ' ';
+
+ oargblen = strlen(oargb);
+ oargc = tokenize(oargb, oargv, nelem(oargv)-1);
+ oargv[oargc] = nil;
+}
+
+char*
+getenv(char* name, char* buf, int n)
+{
+ char *e, *p, *q;
+
+ p = oenv;
+ while(*p != 0){
+ if((e = strchr(p, '=')) == nil)
+ break;
+ for(q = name; p < e; p++){
+ if(*p != *q)
+ break;
+ q++;
+ }
+ if(p == e && *q == 0){
+ strecpy(buf, buf+n, e+1);
+ return buf;
+ }
+ p += strlen(p)+1;
+ }
+
+ return nil;
+}
+
+/* enable scheduling of this cpu */
+void
+machon(uint cpu)
+{
+ ulong cpubit;
+
+ cpubit = 1 << cpu;
+ lock(&active);
+ if ((active.machs & cpubit) == 0) { /* currently off? */
+ conf.nmach++;
+ active.machs |= cpubit;
+ }
+ unlock(&active);
+}
+
+/* disable scheduling of this cpu */
+void
+machoff(uint cpu)
+{
+ ulong cpubit;
+
+ cpubit = 1 << cpu;
+ lock(&active);
+ if (active.machs & cpubit) { /* currently on? */
+ conf.nmach--;
+ active.machs &= ~cpubit;
+ }
+ unlock(&active);
+}
+
+void
+machinit(void)
+{
+ Mach *m0;
+
+ if (m == 0) {
+ serialputc('?');
+ serialputc('m');
+ serialputc('0');
+ }
+ if(machaddr[m->machno] != m) {
+ serialputc('?');
+ serialputc('m');
+ serialputc('m');
+ }
+
+ if (canlock(&testlock)) {
+ serialputc('?');
+ serialputc('l');
+ panic("cpu%d: locks don't work", m->machno);
+ }
+
+ m->ticks = 1;
+ m->perf.period = 1;
+ m0 = MACHP(0);
+ if (m->machno != 0) {
+ /* synchronise with cpu 0 */
+ m->ticks = m0->ticks;
+ m->fastclock = m0->fastclock;
+ m->cpuhz = m0->cpuhz;
+ m->delayloop = m0->delayloop;
+ }
+ if (m->machno != 0 &&
+ (m->fastclock == 0 || m->cpuhz == 0 || m->delayloop == 0))
+ panic("buggered cpu 0 Mach");
+
+ machon(m->machno);
+ fpoff();
+}
+
+/* l.s has already zeroed Mach, which now contains our stack. */
+void
+mach0init(void)
+{
+ if (m == 0) {
+ serialputc('?');
+ serialputc('m');
+ }
+ conf.nmach = 0;
+
+ m->machno = 0;
+ machaddr[0] = m;
+
+ lock(&testlock); /* hold this forever */
+ machinit();
+
+ active.exiting = 0;
+ l1cache->wbse(&active, sizeof active);
+ up = nil;
+}
+
+/*
+ * count CPU's, set up their mach structures and l1 ptes.
+ * we're running on cpu 0 and our data structures were
+ * statically allocated.
+ */
+void
+launchinit(void)
+{
+ int mach;
+ Mach *mm;
+ PTE *l1;
+
+ for(mach = 1; mach < MAXMACH; mach++){
+ machaddr[mach] = mm = mallocalign(MACHSIZE, MACHSIZE, 0, 0);
+ l1 = mallocalign(L1SIZE, L1SIZE, 0, 0);
+ if(mm == nil || l1 == nil)
+ panic("launchinit");
+ memset(mm, 0, MACHSIZE);
+ mm->machno = mach;
+
+ memmove(l1, (void *)L1, L1SIZE); /* clone cpu0's l1 table */
+ l1cache->wbse(l1, L1SIZE);
+
+ mm->mmul1 = l1;
+ l1cache->wbse(mm, MACHSIZE);
+ }
+ l1cache->wbse(machaddr, sizeof machaddr);
+ conf.nmach = 1;
+}
+
+void
+dump(void *vaddr, int words)
+{
+ ulong *addr;
+
+ addr = vaddr;
+ while (words-- > 0)
+ iprint("%.8lux%c", *addr++, words % 8 == 0? '\n': ' ');
+}
+
+static void
+cacheinit(void)
+{
+ allcacheinfo(cachel);
+ cacheconf = (Lowmemcache *)CACHECONF;
+ cacheconf->l1waysh = cachel[1].waysh;
+ cacheconf->l1setsh = cachel[1].setsh;
+ /* on the tegra 2, l2 is unarchitected */
+ cacheconf->l2waysh = cachel[2].waysh;
+ cacheconf->l2setsh = cachel[2].setsh;
+
+ l2pl310init();
+ allcacheson();
+ allcache->wb();
+}
+
+void
+l2pageinit(void)
+{
+ l2pages = KADDR(PHYSDRAM + DRAMSIZE - RESRVDHIMEM);
+}
+
+/*
+ * at entry, l.s has set m for cpu0 and printed "Plan 9 from Be"
+ * but has not zeroed bss.
+ */
+void
+main(void)
+{
+ int cpu;
+ static ulong vfy = 0xcafebabe;
+
+ up = nil;
+ if (vfy != 0xcafebabe) {
+ serialputc('?');
+ serialputc('d');
+ panic("data segment misaligned");
+ }
+
+ memset(edata, 0, end - edata);
+
+ /*
+ * we can't lock until smpon has run, but we're supposed to wait
+ * until l1 & l2 are on. too bad. l1 is on, l2 will soon be.
+ */
+ smpon();
+ iprint("ll Labs ");
+ cacheinit();
+
+ /*
+ * data segment is aligned, bss is zeroed, caches' characteristics
+ * are known. begin initialisation.
+ */
+ mach0init();
+ l2pageinit();
+ mmuinit();
+
+ optionsinit("/boot/boot boot");
+ quotefmtinstall();
+
+ /* want plan9.ini to be able to affect memory sizing in confinit */
+ plan9iniinit(); /* before we step on plan9.ini in low memory */
+
+ /* l2 looks for *l2off= in plan9.ini */
+ l2cache->on(); /* l2->on requires locks to work, thus smpon */
+ l2cache->info(&cachel[2]);
+ allcache->on();
+
+ cortexa9cachecfg();
+
+ trapinit(); /* so confinit can probe memory to size it */
+ confinit(); /* figures out amount of memory */
+ /* xinit prints (if it can), so finish up the banner here. */
+ delay(100);
+ navailcpus = getncpus();
+ iprint("(mp arm; %d cpus)\n\n", navailcpus);
+ delay(100);
+
+ for (cpu = 1; cpu < navailcpus; cpu++)
+ stopcpu(cpu);
+
+ xinit();
+ irqtooearly = 0; /* now that xinit and trapinit have run */
+
+ mainmem->flags |= POOL_ANTAGONISM /* | POOL_PARANOIA */ ;
+
+ /*
+ * Printinit will cause the first malloc call.
+ * (printinit->qopen->malloc) unless any of the
+ * above (like clockinit) do an irqenable, which
+ * will call malloc.
+ * If the system dies here it's probably due
+ * to malloc(->xalloc) not being initialised
+ * correctly, or the data segment is misaligned
+ * (it's amazing how far you can get with
+ * things like that completely broken).
+ *
+ * (Should be) boilerplate from here on.
+ */
+
+ archreset(); /* cfg clock signals, print cache cfg */
+ clockinit(); /* start clocks */
+ timersinit();
+
+ delay(50); /* let uart catch up */
+ printinit();
+
+ cpuidprint();
+ chkmissing();
+
+ procinit0();
+ initseg();
+
+// dmainit();
+ links();
+ conf.monitor = 1;
+// screeninit();
+
+ iprint("pcireset...");
+ pcireset(); /* this tends to hang after a reboot */
+ iprint("ok\n");
+
+ chandevreset(); /* most devices are discovered here */
+// i8250console(); /* too early; see init0 */
+
+ pageinit(); /* prints "1020M memory: ⋯ */
+ swapinit();
+ userinit();
+
+ /*
+ * starting a cpu will eventually result in it calling schedinit,
+ * so everything necessary to run user processes should be set up
+ * before starting secondary cpus.
+ */
+ launchinit();
+ /* SMP & FW are already on when we get here; u-boot set them? */
+ for (cpu = 1; cpu < navailcpus; cpu++)
+ if (startcpu(cpu) < 0)
+ panic("cpu%d didn't start", cpu);
+ l1diag();
+
+ schedinit();
+ panic("cpu%d: schedinit returned", m->machno);
+}
+
+static void
+shutdown(int ispanic)
+{
+ int ms, once;
+
+ lock(&active);
+ if(ispanic)
+ active.ispanic = ispanic;
+ else if(m->machno == 0 && (active.machs & (1<<m->machno)) == 0)
+ active.ispanic = 0;
+ once = active.machs & (1<<m->machno);
+ /*
+ * setting exiting will make hzclock() on each processor call exit(0),
+ * which calls shutdown(0) and idles non-bootstrap cpus and returns
+ * on bootstrap processors (to permit a reboot). clearing our bit
+ * in machs avoids calling exit(0) from hzclock() on this processor.
+ */
+ active.machs &= ~(1<<m->machno);
+ active.exiting = 1;
+ unlock(&active);
+
+ if(once) {
+ delay(m->machno*1000); /* stagger them */
+ iprint("cpu%d: exiting\n", m->machno);
+ }
+ spllo();
+ if (m->machno == 0)
+ ms = 5*1000;
+ else
+ ms = 2*1000;
+ for(; ms > 0; ms -= TK2MS(2)){
+ delay(TK2MS(2));
+ if(active.machs == 0 && consactive() == 0)
+ break;
+ }
+ delay(500);
+}
+
+/*
+ * exit kernel either on a panic or user request
+ */
+void
+exit(int code)
+{
+ shutdown(code);
+ splhi();
+ if (m->machno == 0)
+ archreboot();
+ else {
+ intrcpushutdown();
+ stopcpu(m->machno);
+ for (;;)
+ idlehands();
+ }
+}
+
+int
+isaconfig(char *class, int ctlrno, ISAConf *isa)
+{
+ char cc[32], *p;
+ int i;
+
+ snprint(cc, sizeof cc, "%s%d", class, ctlrno);
+ p = getconf(cc);
+ if(p == nil)
+ return 0;
+
+ isa->type = "";
+ isa->nopt = tokenize(p, isa->opt, NISAOPT);
+ for(i = 0; i < isa->nopt; i++){
+ p = isa->opt[i];
+ if(cistrncmp(p, "type=", 5) == 0)
+ isa->type = p + 5;
+ else if(cistrncmp(p, "port=", 5) == 0)
+ isa->port = strtoul(p+5, &p, 0);
+ else if(cistrncmp(p, "irq=", 4) == 0)
+ isa->irq = strtoul(p+4, &p, 0);
+ else if(cistrncmp(p, "dma=", 4) == 0)
+ isa->dma = strtoul(p+4, &p, 0);
+ else if(cistrncmp(p, "mem=", 4) == 0)
+ isa->mem = strtoul(p+4, &p, 0);
+ else if(cistrncmp(p, "size=", 5) == 0)
+ isa->size = strtoul(p+5, &p, 0);
+ else if(cistrncmp(p, "freq=", 5) == 0)
+ isa->freq = strtoul(p+5, &p, 0);
+ }
+ return 1;
+}
+
+/*
+ * the new kernel is already loaded at address `code'
+ * of size `size' and entry point `entry'.
+ */
+void
+reboot(void *entry, void *code, ulong size)
+{
+ int cpu, nmach, want, ms;
+ void (*f)(ulong, ulong, ulong);
+
+ nmach = conf.nmach;
+ writeconf();
+
+ /*
+ * the boot processor is cpu0. execute this function on it
+ * so that the new kernel has the same cpu0.
+ */
+ if (m->machno != 0) {
+ procwired(up, 0);
+ sched();
+ }
+ if (m->machno != 0)
+ print("on cpu%d (not 0)!\n", m->machno);
+
+ /*
+ * the other cpus could be holding locks that will never get
+ * released (e.g., in the print path) if we put them into
+ * reset now, so force them to shutdown gracefully first.
+ */
+ for (want = 0, cpu = 1; cpu < navailcpus; cpu++)
+ want |= 1 << cpu;
+ active.stopped = 0;
+ shutdown(0);
+ for (ms = 15*1000; ms > 0 && active.stopped != want; ms -= 10)
+ delay(10);
+ delay(20);
+ if (active.stopped != want) {
+ for (cpu = 1; cpu < nmach; cpu++)
+ stopcpu(cpu); /* make really sure */
+ delay(20);
+ }
+
+ /*
+ * should be the only processor running now
+ */
+ pcireset();
+// print("reboot entry %#lux code %#lux size %ld\n",
+// PADDR(entry), PADDR(code), size);
+
+ /* turn off buffered serial console */
+ serialoq = nil;
+ kprintoq = nil;
+ screenputs = nil;
+
+ /* shutdown devices */
+ chandevshutdown();
+
+ /* call off the dog */
+ clockshutdown();
+
+ splhi();
+ intrshutdown();
+
+ /* setup reboot trampoline function */
+ f = (void*)REBOOTADDR;
+ memmove(f, rebootcode, sizeof(rebootcode));
+ cachedwb();
+ l2cache->wbinv();
+ l2cache->off();
+ cacheuwbinv();
+
+ /* off we go - never to return */
+ (*f)(PADDR(entry), PADDR(code), size);
+
+ iprint("loaded kernel returned!\n");
+ archreboot();
+}
+
+/*
+ * starting place for first process
+ */
+void
+init0(void)
+{
+ int i;
+ char buf[2*KNAMELEN];
+
+ up->nerrlab = 0;
+ coherence();
+ spllo();
+
+ /*
+ * These are o.k. because rootinit is null.
+ * Then early kproc's will have a root and dot.
+ */
+ up->slash = namec("#/", Atodir, 0, 0);
+ pathclose(up->slash->path);
+ up->slash->path = newpath("/");
+ up->dot = cclone(up->slash);
+
+ chandevinit();
+ i8250console(); /* might be redundant, but harmless */
+ if(serialoq == nil)
+ panic("init0: nil serialoq");
+ normalprint = 1;
+
+ if(!waserror()){
+ snprint(buf, sizeof(buf), "%s %s", "ARM", conffile);
+ ksetenv("terminal", buf, 0);
+ ksetenv("cputype", "arm", 0);
+ if(cpuserver)
+ ksetenv("service", "cpu", 0);
+ else
+ ksetenv("service", "terminal", 0);
+
+ /* convert plan9.ini variables to #e and #ec */
+ for(i = 0; i < nconf; i++) {
+ ksetenv(confname[i], confval[i], 0);
+ ksetenv(confname[i], confval[i], 1);
+ }
+ poperror();
+ }
+ kproc("alarm", alarmkproc, 0);
+// kproc("startcpusproc", startcpusproc, nil);
+
+ touser(sp);
+}
+
+static void
+bootargs(uintptr base)
+{
+ int i;
+ ulong ssize;
+ char **av, *p;
+
+ /*
+ * Push the boot args onto the stack.
+ * The initial value of the user stack must be such
+ * that the total used is larger than the maximum size
+ * of the argument list checked in syscall.
+ */
+ i = oargblen+1;
+ p = UINT2PTR(STACKALIGN(base + BY2PG - sizeof(up->s.args) - i));
+ memmove(p, oargb, i);
+
+ /*
+ * Now push argc and the argv pointers.
+ * This isn't strictly correct as the code jumped to by
+ * touser in init9.s calls startboot (port/initcode.c) which
+ * expects arguments
+ * startboot(char *argv0, char **argv)
+ * not the usual (int argc, char* argv[]), but argv0 is
+ * unused so it doesn't matter (at the moment...).
+ */
+ av = (char**)(p - (oargc+2)*sizeof(char*));
+ ssize = base + BY2PG - PTR2UINT(av);
+ *av++ = (char*)oargc;
+ for(i = 0; i < oargc; i++)
+ *av++ = (oargv[i] - oargb) + (p - base) + (USTKTOP - BY2PG);
+ *av = nil;
+
+ /*
+ * Leave space for the return PC of the
+ * caller of initcode.
+ */
+ sp = USTKTOP - ssize - sizeof(void*);
+}
+
+/*
+ * create the first process
+ */
+void
+userinit(void)
+{
+ Proc *p;
+ Segment *s;
+ KMap *k;
+ Page *pg;
+
+ /* no processes yet */
+ up = nil;
+
+ p = newproc();
+ p->pgrp = newpgrp();
+ p->egrp = smalloc(sizeof(Egrp));
+ p->egrp->ref = 1;
+ p->fgrp = dupfgrp(nil);
+ p->rgrp = newrgrp();
+ p->procmode = 0640;
+
+ kstrdup(&eve, "");
+ kstrdup(&p->text, "*init*");
+ kstrdup(&p->user, eve);
+
+ /*
+ * Kernel Stack
+ */
+ p->sched.pc = PTR2UINT(init0);
+ p->sched.sp = PTR2UINT(p->kstack+KSTACK-sizeof(up->s.args)-sizeof(uintptr));
+ p->sched.sp = STACKALIGN(p->sched.sp);
+
+ /*
+ * User Stack
+ *
+ * Technically, newpage can't be called here because it
+ * should only be called when in a user context as it may
+ * try to sleep if there are no pages available, but that
+ * shouldn't be the case here.
+ */
+ s = newseg(SG_STACK, USTKTOP-USTKSIZE, USTKSIZE/BY2PG);
+ s->flushme++;
+ p->seg[SSEG] = s;
+ pg = newpage(1, 0, USTKTOP-BY2PG);
+ segpage(s, pg);
+ k = kmap(pg);
+ bootargs(VA(k));
+ kunmap(k);
+
+ /*
+ * Text
+ */
+ s = newseg(SG_TEXT, UTZERO, 1);
+ p->seg[TSEG] = s;
+ pg = newpage(1, 0, UTZERO);
+ memset(pg->cachectl, PG_TXTFLUSH, sizeof(pg->cachectl));
+ segpage(s, pg);
+ k = kmap(s->map[0]->pages[0]);
+ memmove(UINT2PTR(VA(k)), initcode, sizeof initcode);
+ kunmap(k);
+
+ ready(p);
+}
+
+Conf conf; /* XXX - must go - gag */
+
+Confmem tsmem[nelem(conf.mem)] = {
+ /*
+ * Memory available to Plan 9:
+ */
+ { .base = PHYSDRAM, .limit = PHYSDRAM + Minmem, },
+};
+ulong memsize = DRAMSIZE;
+
+static int
+gotmem(uintptr sz)
+{
+ uintptr addr;
+
+ /* back off a little from the end */
+ addr = (uintptr)KADDR(PHYSDRAM + sz - BY2WD);
+ if (probeaddr(addr) >= 0) { /* didn't trap? memory present */
+ memsize = sz;
+ return 0;
+ }
+ return -1;
+}
+
+void
+confinit(void)
+{
+ int i;
+ ulong kpages;
+ uintptr pa;
+ char *p;
+
+ /*
+ * Copy the physical memory configuration to Conf.mem.
+ */
+ if(nelem(tsmem) > nelem(conf.mem)){
+ iprint("memory configuration botch\n");
+ exit(1);
+ }
+ if(0 && (p = getconf("*maxmem")) != nil) {
+ memsize = strtoul(p, 0, 0) - PHYSDRAM;
+ if (memsize < 16*MB) /* sanity */
+ memsize = 16*MB;
+ }
+
+ /*
+ * see if all that memory exists; if not, find out how much does.
+ * trapinit must have been called first.
+ */
+ if (gotmem(memsize - RESRVDHIMEM) < 0)
+ panic("can't find 1GB of memory");
+
+ tsmem[0].limit = PHYSDRAM + memsize;
+ memmove(conf.mem, tsmem, sizeof(tsmem));
+
+ conf.npage = 0;
+ pa = PADDR(PGROUND(PTR2UINT(end)));
+
+ /*
+ * we assume that the kernel is at the beginning of one of the
+ * contiguous chunks of memory and fits therein.
+ */
+ for(i=0; i<nelem(conf.mem); i++){
+ /* take kernel out of allocatable space */
+ if(pa > conf.mem[i].base && pa < conf.mem[i].limit)
+ conf.mem[i].base = pa;
+
+ conf.mem[i].npage = (conf.mem[i].limit - conf.mem[i].base)/BY2PG;
+ conf.npage += conf.mem[i].npage;
+ }
+
+ conf.upages = (conf.npage*80)/100;
+ conf.ialloc = ((conf.npage-conf.upages)/2)*BY2PG;
+
+ /* set up other configuration parameters */
+ conf.nproc = 100 + ((conf.npage*BY2PG)/MB)*5;
+ if(cpuserver)
+ conf.nproc *= 3;
+ if(conf.nproc > 2000)
+ conf.nproc = 2000;
+ conf.nswap = conf.npage*3;
+ conf.nswppo = 4096;
+ conf.nimage = 200;
+
+ /*
+ * it's simpler on mp systems to take page-faults early,
+ * on reference, rather than later, on write, which might
+ * require tlb shootdowns.
+ */
+ conf.copymode = 1; /* copy on reference */
+
+ /*
+ * Guess how much is taken by the large permanent
+ * datastructures. Mntcache and Mntrpc are not accounted for
+ * (probably ~300KB).
+ */
+ kpages = conf.npage - conf.upages;
+ kpages *= BY2PG;
+ kpages -= conf.upages*sizeof(Page)
+ + conf.nproc*sizeof(Proc)
+ + conf.nimage*sizeof(Image)
+ + conf.nswap
+ + conf.nswppo*sizeof(Page);
+ mainmem->maxsize = kpages;
+ if(!cpuserver)
+ /*
+ * give terminals lots of image memory, too; the dynamic
+ * allocation will balance the load properly, hopefully.
+ * be careful with 32-bit overflow.
+ */
+ imagmem->maxsize = kpages;
+
+// archconfinit();
+}
+
+int
+cmpswap(long *addr, long old, long new)
+{
+ return cas((int *)addr, old, new);
+}
+
+void
+advertwfi(void) /* advertise my wfi status */
+{
+ ilock(&active);
+ active.wfi |= 1 << m->machno;
+ iunlock(&active);
+}
+
+void
+unadvertwfi(void) /* do not advertise my wfi status */
+{
+ ilock(&active);
+ active.wfi &= ~(1 << m->machno);
+ iunlock(&active);
+}
+
+void
+idlehands(void)
+{
+#ifdef use_ipi
+ int advertised;
+
+ /* don't go into wfi until my local timer is ticking */
+ if (m->ticks <= 1)
+ return;
+
+ advertised = 0;
+ m->inidlehands++;
+ /* avoid recursion via ilock, advertise iff this cpu is initialised */
+ if (m->inidlehands == 1 && m->syscall > 0) {
+ advertwfi();
+ advertised = 1;
+ }
+
+ wfi();
+
+ if (advertised)
+ unadvertwfi();
+ m->inidlehands--;
+#endif
+}
+
+void
+wakewfi(void)
+{
+#ifdef use_ipi
+ uint cpu;
+
+ /*
+ * find any cpu other than me currently in wfi.
+ * need not be exact.
+ */
+ cpu = BI2BY*BY2WD - 1 - clz(active.wfi & ~(1 << m->machno));
+ if (cpu < MAXMACH)
+ intrcpu(cpu);
+#endif
+}
diff --git a/sys/src/9/teg2/mem.h b/sys/src/9/teg2/mem.h
new file mode 100644
index 000000000..aeb24d4e1
--- /dev/null
+++ b/sys/src/9/teg2/mem.h
@@ -0,0 +1,150 @@
+/*
+ * Memory and machine-specific definitions. Used in C and assembler.
+ */
+#define KiB 1024u /* Kibi 0x0000000000000400 */
+#define MiB 1048576u /* Mebi 0x0000000000100000 */
+#define GiB 1073741824u /* Gibi 000000000040000000 */
+
+#define HOWMANY(x, y) (((x)+((y)-1))/(y))
+#define ROUNDUP(x, y) (HOWMANY((x), (y))*(y)) /* ceiling */
+#define ROUNDDN(x, y) (((x)/(y))*(y)) /* floor */
+#define MIN(a, b) ((a) < (b)? (a): (b))
+#define MAX(a, b) ((a) > (b)? (a): (b))
+
+/*
+ * Not sure where these macros should go.
+ * This probably isn't right but will do for now.
+ * The macro names are problematic too.
+ */
+/*
+ * In B(o), 'o' is the bit offset in the register.
+ * For multi-bit fields use F(v, o, w) where 'v' is the value
+ * of the bit-field of width 'w' with LSb at bit offset 'o'.
+ */
+#define B(o) (1<<(o))
+#define F(v, o, w) (((v) & ((1<<(w))-1))<<(o))
+
+#define FCLR(d, o, w) ((d) & ~(((1<<(w))-1)<<(o)))
+#define FEXT(d, o, w) (((d)>>(o)) & ((1<<(w))-1))
+#define FINS(d, o, w, v) (FCLR((d), (o), (w))|F((v), (o), (w)))
+#define FSET(d, o, w) ((d)|(((1<<(w))-1)<<(o)))
+
+#define FMASK(o, w) (((1<<(w))-1)<<(o))
+
+/*
+ * Sizes
+ */
+#define BY2PG (4*KiB) /* bytes per page */
+#define PGSHIFT 12 /* log(BY2PG) */
+#define PGROUND(s) ROUNDUP(s, BY2PG)
+#define ROUND(s, sz) (((s)+(sz-1))&~(sz-1))
+
+/* max # of cpus system can run. tegra2 cpu ids are two bits wide. */
+#define MAXMACH 4
+#define MACHSIZE BY2PG
+#define L1SIZE (4 * BY2PG)
+
+#define KSTKSIZE (16*KiB) /* was 8K */
+#define STACKALIGN(sp) ((sp) & ~7) /* bug: assure with alloc */
+
+/*
+ * Magic registers
+ */
+
+#define USER 9 /* R9 is up-> */
+#define MACH 10 /* R10 is m-> */
+
+/*
+ * Address spaces.
+ * KTZERO is used by kprof and dumpstack (if any).
+ *
+ * KZERO (0xc0000000) is mapped to physical 0 (start of dram).
+ * u-boot claims to occupy the first 4 MB of dram, but we're willing to
+ * step on it once we're loaded.
+ *
+ * L2 PTEs are stored in 4K before cpu0's Mach (8K to 12K above KZERO).
+ * cpu0's Mach struct is at L1 - MACHSIZE(4K) to L1 (12K to 16K above KZERO).
+ * L1 PTEs are stored from L1 to L1+32K (16K to 48K above KZERO).
+ * plan9.ini is loaded at CONFADDR (4MB).
+ * KTZERO may be anywhere after that.
+ */
+#define KSEG0 0xC0000000 /* kernel segment */
+/* mask to check segment; good for 1GB dram */
+#define KSEGM 0xC0000000
+#define KZERO KSEG0 /* kernel address space */
+#define L1 (KZERO+16*KiB) /* cpu0 l1 page table; 16KiB aligned */
+#define CONFADDR (KZERO+0x400000) /* unparsed plan9.ini */
+#define CACHECONF (CONFADDR+48*KiB)
+/* KTZERO must match loadaddr in mkfile */
+#define KTZERO (KZERO+0x410000) /* kernel text start */
+
+#define L2pages (2*MiB) /* high memory reserved for l2 page tables */
+#define RESRVDHIMEM (64*KiB + MiB + L2pages) /* avoid HVECTOR, l2 pages */
+/* we assume that we have 1 GB of ram, which is true for all trimslices. */
+#define DRAMSIZE GiB
+
+#define UZERO 0 /* user segment */
+#define UTZERO (UZERO+BY2PG) /* user text start */
+#define UTROUND(t) ROUNDUP((t), BY2PG)
+/*
+ * moved USTKTOP down to 1GB to keep MMIO space out of user space.
+ * moved it down another MB to utterly avoid KADDR(stack_base) mapping
+ * to high exception vectors. see confinit().
+ */
+#define USTKTOP (0x40000000 - 64*KiB - MiB) /* user segment end +1 */
+#define USTKSIZE (8*1024*1024) /* user stack size */
+#define TSTKTOP (USTKTOP-USTKSIZE) /* sysexec temporary stack */
+#define TSTKSIZ 256
+
+/* address at which to copy and execute rebootcode */
+#define REBOOTADDR KADDR(0x100)
+
+/*
+ * Legacy...
+ */
+#define BLOCKALIGN CACHELINESZ /* only used in allocb.c */
+#define KSTACK KSTKSIZE
+
+/*
+ * Sizes
+ */
+#define BI2BY 8 /* bits per byte */
+#define BY2SE 4
+#define BY2WD 4
+#define BY2V 8 /* only used in xalloc.c */
+
+#define CACHELINESZ 32 /* bytes per cache line */
+#define PTEMAPMEM (1024*1024)
+#define PTEPERTAB (PTEMAPMEM/BY2PG)
+#define SEGMAPSIZE 1984 /* magic 16*124 */
+#define SSEGMAPSIZE 16 /* magic */
+#define PPN(x) ((x)&~(BY2PG-1)) /* pure page number? */
+
+/*
+ * With a little work these move to port.
+ */
+#define PTEVALID (1<<0)
+#define PTERONLY 0
+#define PTEWRITE (1<<1)
+#define PTEUNCACHED (1<<2)
+#define PTEKERNEL (1<<3)
+
+/*
+ * Physical machine information from here on.
+ */
+
+#define PHYSDRAM 0
+
+#define PHYSIO 0x50000000 /* cpu */
+#define VIRTIO PHYSIO
+#define PHYSL2BAG 0x50043000 /* l2 cache bag-on-the-side */
+#define PHYSEVP 0x6000f100 /* undocumented `exception vector' */
+#define PHYSCONS 0x70006000 /* uart console */
+#define PHYSIOEND 0xc0000000 /* end of ahb mem & pcie */
+
+#define PHYSAHB 0xc0000000 /* ahb bus */
+#define VIRTAHB 0xb0000000
+#define P2VAHB(pa) ((pa) - PHYSAHB + VIRTAHB)
+
+#define PHYSNOR 0xd0000000
+#define VIRTNOR 0x40000000
diff --git a/sys/src/9/teg2/mkfile b/sys/src/9/teg2/mkfile
new file mode 100644
index 000000000..af3ed317f
--- /dev/null
+++ b/sys/src/9/teg2/mkfile
@@ -0,0 +1,151 @@
+CONF=ts
+CONFLIST=ts
+EXTRACOPIES=
+
+# allegedly u-boot uses the bottom 4MB (up to 0x400000) so avoid that,
+# and leave 64K for plan9.ini. loadaddr must match KTZERO in mem.h
+# and CONFADDR must be 0x10000 lower.
+loadaddr=0xc0410000
+
+objtype=arm
+</$objtype/mkfile
+p=9
+
+DEVS=`{rc ../port/mkdevlist $CONF}
+
+PORT=\
+ alarm.$O\
+ alloc.$O\
+ allocb.$O\
+ auth.$O\
+ cache.$O\
+ chan.$O\
+ dev.$O\
+ edf.$O\
+ fault.$O\
+ mul64fract.$O\
+ rebootcmd.$O\
+ page.$O\
+ parse.$O\
+ pgrp.$O\
+ portclock.$O\
+ print.$O\
+ proc.$O\
+ qio.$O\
+ qlock.$O\
+ segment.$O\
+ swap.$O\
+ sysfile.$O\
+ sysproc.$O\
+ taslock.$O\
+ tod.$O\
+ xalloc.$O\
+
+OBJ=\
+ l.$O\
+ lexception.$O\
+ lproc.$O\
+ arch.$O\
+ clock.$O\
+ clock-tegra.$O\
+ main.$O\
+ mmu.$O\
+ random.$O\
+ trap.$O\
+ $CONF.root.$O\
+ $CONF.rootc.$O\
+ $DEVS\
+ $PORT\
+
+HFILES=\
+ arm.h\
+ dat.h\
+ ../port/error.h\
+ errstr.h\
+ fns.h\
+ io.h\
+ ../port/lib.h\
+ mem.h\
+ ../port/portdat.h\
+ ../port/portfns.h\
+ /$objtype/include/u.h\
+
+LIB=\
+ /$objtype/lib/libmemlayer.a\
+ /$objtype/lib/libmemdraw.a\
+ /$objtype/lib/libdraw.a\
+ /$objtype/lib/libip.a\
+ /$objtype/lib/libsec.a\
+ /$objtype/lib/libmp.a\
+ /$objtype/lib/libc.a\
+
+9:V: $p$CONF s$p$CONF
+
+$p$CONF:DQ: $CONF.c $OBJ $LIB mkfile
+ $CC $CFLAGS '-DKERNDATE='`{date -n} $CONF.c
+ echo '# linking raw kernel' # H6: no headers, data segment aligned
+ $LD -o $target -H6 -R4096 -T$loadaddr -l $OBJ $CONF.$O $LIB
+
+s$p$CONF:DQ: $CONF.$O $OBJ $LIB
+ echo '# linking 9 kernel with symbols'
+# $LD -o $target -R4096 -T$loadaddr -l -a $OBJ $CONF.$O $LIB >$target.list
+ $LD -o $target -R4096 -T$loadaddr -l $OBJ $CONF.$O $LIB
+ size $target
+
+$p$CONF.gz:D: $p$CONF
+ gzip -9 <$p$CONF >$target
+
+$OBJ: $HFILES
+
+install:V: /$objtype/$p$CONF
+
+/$objtype/$p$CONF:D: $p$CONF s$p$CONF
+ cp -x $p$CONF s$p$CONF /$objtype/ &
+ for(i in $EXTRACOPIES)
+ { 9fs $i && cp $p$CONF s$p$CONF /n/$i/$objtype && echo -n $i... & }
+ wait
+ echo
+ touch $target
+
+<../boot/bootmkfile
+<../port/portmkfile
+<|../port/mkbootrules $CONF
+
+CFLAGS= -I. -I../port $CFLAGS # hack to compile private sysproc.c (e.g.)
+
+arch.$O clock.$O fpiarm.$O main.$O mmu.$O screen.$O sdscsi.$O syscall.$O \
+ trap.$O: /$objtype/include/ureg.h
+
+archtegra.$O devether.$0 ether9221.$O: etherif.h ../port/netif.h
+archtegra.$O devflash.$O flashtegra.$O flashigep.$O: ../port/flashif.h
+ecc.$O flashtegra.$O flashigep.$O: ../port/nandecc.h io.h
+fpi.$O fpiarm.$O fpimem.$O: fpi.h
+l.$O lexception.$O lproc.$O mmu.$O: arm.s mem.h
+l.$O rebootcode.$O: cache.v7.s
+main.$O: errstr.h init.h reboot.h
+devusb.$O: ../port/usb.h
+usbehci.$O usbohci.$O usbuhci.$O: ../port/usb.h usbehci.h uncached.h
+
+init.h:D: ../port/initcode.c init9.s
+ $CC ../port/initcode.c
+ $AS init9.s
+ $LD -l -R1 -s -o init.out init9.$O initcode.$O /$objtype/lib/libc.a
+ {echo 'uchar initcode[]={'
+ xd -1x <init.out |
+ sed -e 's/^[0-9a-f]+ //' -e 's/ ([0-9a-f][0-9a-f])/0x\1,/g'
+ echo '};'} > init.h
+
+reboot.h:D: rebootcode.s cache.v7.s arm.s arm.h mem.h
+ $AS rebootcode.s
+ # -lc is only for memmove. -T arg is PADDR(REBOOTADDR)
+# $LD -l -a -s -T0x100 -R4 -o reboot.out rebootcode.$O -lc >reboot.list
+ $LD -l -s -T0x100 -R4 -o reboot.out rebootcode.$O -lc
+ {echo 'uchar rebootcode[]={'
+ xd -1x reboot.out |
+ sed -e '1,2d' -e 's/^[0-9a-f]+ //' -e 's/ ([0-9a-f][0-9a-f])/0x\1,/g'
+ echo '};'} > reboot.h
+errstr.h:D: ../port/mkerrstr ../port/error.h
+ rc ../port/mkerrstr > errstr.h
+
+$CONF.clean:
+ rm -rf $p$CONF s$p$CONF errstr.h reboot.h $CONF.c boot$CONF.c
diff --git a/sys/src/9/teg2/mmu.c b/sys/src/9/teg2/mmu.c
new file mode 100644
index 000000000..19065fd6a
--- /dev/null
+++ b/sys/src/9/teg2/mmu.c
@@ -0,0 +1,750 @@
+/*
+ * arm arch v7 mmu
+ *
+ * we initially thought that we needn't flush the l2 cache since external
+ * devices needn't see page tables. sadly, reality does not agree with
+ * the manuals.
+ *
+ * we use l1 and l2 cache ops here because they are empirically needed.
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+
+#include "arm.h"
+
+#define L1X(va) FEXT((va), 20, 12)
+#define L2X(va) FEXT((va), 12, 8)
+
+enum {
+ Debug = 0,
+
+ L1lo = UZERO/MiB, /* L1X(UZERO)? */
+#ifdef SMALL_ARM /* well under 1GB of RAM? */
+ L1hi = (USTKTOP+MiB-1)/MiB, /* L1X(USTKTOP+MiB-1)? */
+#else
+ /*
+ * on trimslice, top of 1GB ram can't be addressible, as high
+ * virtual memory (0xfff.....) contains high vectors. We
+ * moved USTKTOP down another MB to utterly avoid KADDR(stack_base)
+ * mapping to high exception vectors. USTKTOP is thus
+ * (0x40000000 - 64*KiB - MiB), which in kernel virtual space is
+ * (0x100000000ull - 64*KiB - MiB), but we need the whole user
+ * virtual address space to be unmapped in a new process.
+ */
+ L1hi = DRAMSIZE/MiB,
+#endif
+};
+
+#define ISHOLE(type) ((type) == 0)
+
+typedef struct Range Range;
+struct Range {
+ uintptr startva;
+ uvlong endva;
+ uintptr startpa;
+ uvlong endpa;
+ ulong attrs;
+ int type; /* L1 Section or Coarse? */
+};
+
+static void mmul1empty(void);
+
+static char *
+typename(int type)
+{
+ static char numb[20];
+
+ switch(type) {
+ case Coarse:
+ return "4KB-page table(s)";
+ case Section:
+ return "1MB section(s)";
+ default:
+ snprint(numb, sizeof numb, "type %d", type);
+ return numb;
+ }
+}
+
+static void
+prl1range(Range *rp)
+{
+ int attrs;
+
+ iprint("l1 maps va (%#8.8lux-%#llux) -> ", rp->startva, rp->endva-1);
+ if (rp->startva == rp->startpa)
+ iprint("identity-mapped");
+ else
+ iprint("pa %#8.8lux", rp->startpa);
+ iprint(" attrs ");
+ attrs = rp->attrs;
+ if (attrs) {
+ if (attrs & Cached)
+ iprint("C");
+ if (attrs & Buffered)
+ iprint("B");
+ if (attrs & L1sharable)
+ iprint("S1");
+ if (attrs & L1wralloc)
+ iprint("A1");
+ } else
+ iprint("\"\"");
+ iprint(" %s\n", typename(rp->type));
+ delay(100);
+ rp->endva = 0;
+}
+
+static void
+l2dump(Range *rp, PTE pte)
+{
+ USED(rp, pte);
+}
+
+/* dump level 1 page table at virtual addr l1 */
+void
+mmudump(PTE *l1)
+{
+ int i, type, attrs;
+ uintptr pa;
+ uvlong va;
+ PTE pte;
+ Range rng;
+
+ /* dump first level of ptes */
+ iprint("cpu%d l1 pt @ %#p:\n", m->machno, PADDR(l1));
+ memset(&rng, 0, sizeof rng);
+ for (va = i = 0; i < 4096; i++, va += MB) {
+ pte = l1[i];
+ type = pte & (Section|Coarse);
+ if (type == Section)
+ pa = pte & ~(MB - 1);
+ else
+ pa = pte & ~(KiB - 1);
+ attrs = 0;
+ if (!ISHOLE(type) && type == Section)
+ attrs = pte & L1ptedramattrs;
+
+ /* if a range is open but this pte isn't part, close & open */
+ if (!ISHOLE(type) &&
+ (pa != rng.endpa || type != rng.type || attrs != rng.attrs))
+ if (rng.endva != 0) { /* range is open? close it */
+ prl1range(&rng);
+ rng.type = 0;
+ rng.attrs = 0;
+ }
+
+ if (ISHOLE(type)) { /* end of any open range? */
+ if (rng.endva != 0) /* range is open? close it */
+ prl1range(&rng);
+ } else { /* continuation or new range */
+ if (rng.endva == 0) { /* no open range? start one */
+ rng.startva = va;
+ rng.startpa = pa;
+ rng.type = type;
+ rng.attrs = attrs;
+ }
+ rng.endva = va + MB; /* continue the open range */
+ rng.endpa = pa + MB;
+ }
+ if (type == Coarse)
+ l2dump(&rng, pte);
+ }
+ if (rng.endva != 0) /* close any open range */
+ prl1range(&rng);
+ iprint("\n");
+}
+
+/*
+ * map `mbs' megabytes from virt to phys, uncached.
+ * device registers are sharable, except the private memory region:
+ * 2 4K pages, at 0x50040000 on the tegra2.
+ */
+void
+mmumap(uintptr virt, uintptr phys, int mbs)
+{
+ uint off;
+ PTE *l1;
+
+ phys &= ~(MB-1);
+ virt &= ~(MB-1);
+ l1 = KADDR(ttbget());
+ for (off = 0; mbs-- > 0; off += MB)
+ l1[L1X(virt + off)] = (phys + off) | Dom0 | L1AP(Krw) |
+ Section | L1sharable;
+ allcache->wbse(l1, L1SIZE);
+ mmuinvalidate();
+}
+
+/* identity map `mbs' megabytes from phys */
+void
+mmuidmap(uintptr phys, int mbs)
+{
+ mmumap(phys, phys, mbs);
+}
+
+PTE *
+newl2page(void)
+{
+ PTE *p;
+
+ if ((uintptr)l2pages >= HVECTORS - BY2PG)
+ panic("l2pages");
+ p = (PTE *)l2pages;
+ l2pages += BY2PG;
+ return p;
+}
+
+/*
+ * replace an L1 section pte with an L2 page table and an L1 coarse pte,
+ * with the same attributes as the original pte and covering the same
+ * region of memory.
+ */
+static void
+expand(uintptr va)
+{
+ int x;
+ uintptr tva, pa;
+ PTE oldpte;
+ PTE *l1, *l2;
+
+ va &= ~(MB-1);
+ x = L1X(va);
+ l1 = &m->mmul1[x];
+ oldpte = *l1;
+ if (oldpte == Fault || (oldpte & (Coarse|Section)) != Section)
+ return; /* make idempotent */
+
+ /* wasteful - l2 pages only have 256 entries - fix */
+ /*
+ * it may be very early, before any memory allocators are
+ * configured, so do a crude allocation from the top of memory.
+ */
+ l2 = newl2page();
+ memset(l2, 0, BY2PG);
+
+ /* write new L1 l2 entry back into L1 descriptors */
+ *l1 = PPN(PADDR(l2))|Dom0|Coarse;
+
+ /* fill l2 page with l2 ptes with equiv attrs; copy AP bits */
+ x = Small | oldpte & (Cached|Buffered) | (oldpte & (1<<15 | 3<<10)) >> 6;
+ if (oldpte & L1sharable)
+ x |= L2sharable;
+ if (oldpte & L1wralloc)
+ x |= L2wralloc;
+ pa = oldpte & ~(MiB - 1);
+ for(tva = va; tva < va + MiB; tva += BY2PG, pa += BY2PG)
+ l2[L2X(tva)] = PPN(pa) | x;
+
+ /* force l2 page to memory */
+ allcache->wbse(l2, BY2PG);
+
+ /* clear out the current entry */
+ mmuinvalidateaddr(PPN(va));
+
+ allcache->wbinvse(l1, sizeof *l1);
+ if ((*l1 & (Coarse|Section)) != Coarse)
+ panic("explode %#p", va);
+}
+
+/*
+ * cpu0's l1 page table has likely changed since we copied it in
+ * launchinit, notably to allocate uncached sections for ucalloc.
+ * so copy it again from cpu0's.
+ */
+void
+mmuninit(void)
+{
+ int s;
+ PTE *l1, *newl1;
+
+ s = splhi();
+ l1 = m->mmul1;
+ newl1 = mallocalign(L1SIZE, L1SIZE, 0, 0);
+ assert(newl1);
+
+ allcache->wbinvse((PTE *)L1, L1SIZE); /* get cpu0's up-to-date copy */
+ memmove(newl1, (PTE *)L1, L1SIZE);
+ allcache->wbse(newl1, L1SIZE);
+
+ mmuinvalidate();
+ coherence();
+
+ ttbput(PADDR(newl1)); /* switch */
+ coherence();
+ mmuinvalidate();
+ coherence();
+ m->mmul1 = newl1;
+ coherence();
+
+ mmul1empty();
+ coherence();
+ mmuinvalidate();
+ coherence();
+
+// mmudump(m->mmul1); /* DEBUG */
+ splx(s);
+ free(l1);
+}
+
+/* l1 is base of my l1 descriptor table */
+static PTE *
+l2pteaddr(PTE *l1, uintptr va)
+{
+ uintptr l2pa;
+ PTE pte;
+ PTE *l2;
+
+ expand(va);
+ pte = l1[L1X(va)];
+ if ((pte & (Coarse|Section)) != Coarse)
+ panic("l2pteaddr l1 pte %#8.8ux @ %#p not Coarse",
+ pte, &l1[L1X(va)]);
+ l2pa = pte & ~(KiB - 1);
+ l2 = (PTE *)KADDR(l2pa);
+ return &l2[L2X(va)];
+}
+
+void
+mmuinit(void)
+{
+ ulong va;
+ uintptr pa;
+ PTE *l1, *l2;
+
+ if (m->machno != 0) {
+ mmuninit();
+ return;
+ }
+
+ pa = ttbget();
+ l1 = KADDR(pa);
+
+ /* identity map most of the io space */
+ mmuidmap(PHYSIO, (PHYSIOEND - PHYSIO + MB - 1) / MB);
+ /* move the rest to more convenient addresses */
+ mmumap(VIRTNOR, PHYSNOR, 256); /* 0x40000000 v -> 0xd0000000 p */
+ mmumap(VIRTAHB, PHYSAHB, 256); /* 0xb0000000 v -> 0xc0000000 p */
+
+ /* map high vectors to start of dram, but only 4K, not 1MB */
+ pa -= MACHSIZE+BY2PG; /* page tables must be page aligned */
+ l2 = KADDR(pa);
+ memset(l2, 0, 1024);
+
+ m->mmul1 = l1; /* used by explode in l2pteaddr */
+
+ /* map private mem region (8K at soc.scu) without sharable bits */
+ va = soc.scu;
+ *l2pteaddr(l1, va) &= ~L2sharable;
+ va += BY2PG;
+ *l2pteaddr(l1, va) &= ~L2sharable;
+
+ /*
+ * below (and above!) the vectors in virtual space may be dram.
+ * populate the rest of l2 for the last MB.
+ */
+ for (va = -MiB; va != 0; va += BY2PG)
+ l2[L2X(va)] = PADDR(va) | L2AP(Krw) | Small | L2ptedramattrs;
+ /* map high vectors page to 0; must match attributes of KZERO->0 map */
+ l2[L2X(HVECTORS)] = PHYSDRAM | L2AP(Krw) | Small | L2ptedramattrs;
+ coherence();
+ l1[L1X(HVECTORS)] = pa | Dom0 | Coarse; /* l1 -> ttb-machsize-4k */
+
+ /* make kernel text unwritable */
+ for(va = KTZERO; va < (ulong)etext; va += BY2PG)
+ *l2pteaddr(l1, va) |= L2apro;
+
+ allcache->wbinv();
+ mmuinvalidate();
+
+ m->mmul1 = l1;
+ coherence();
+ mmul1empty();
+ coherence();
+// mmudump(l1); /* DEBUG */
+}
+
+static void
+mmul2empty(Proc* proc, int clear)
+{
+ PTE *l1;
+ Page **l2, *page;
+
+ l1 = m->mmul1;
+ l2 = &proc->mmul2;
+ for(page = *l2; page != nil; page = page->next){
+ if(clear)
+ memset(UINT2PTR(page->va), 0, BY2PG);
+ l1[page->daddr] = Fault;
+ allcache->wbse(l1, sizeof *l1);
+ l2 = &page->next;
+ }
+ *l2 = proc->mmul2cache;
+ proc->mmul2cache = proc->mmul2;
+ proc->mmul2 = nil;
+}
+
+static void
+mmul1empty(void)
+{
+#ifdef notdef
+/* there's a bug in here */
+ PTE *l1;
+
+ /* clean out any user mappings still in l1 */
+ if(m->mmul1lo > L1lo){
+ if(m->mmul1lo == 1)
+ m->mmul1[L1lo] = Fault;
+ else
+ memset(&m->mmul1[L1lo], 0, m->mmul1lo*sizeof(PTE));
+ m->mmul1lo = L1lo;
+ }
+ if(m->mmul1hi < L1hi){
+ l1 = &m->mmul1[m->mmul1hi];
+ if((L1hi - m->mmul1hi) == 1)
+ *l1 = Fault;
+ else
+ memset(l1, 0, (L1hi - m->mmul1hi)*sizeof(PTE));
+ m->mmul1hi = L1hi;
+ }
+#else
+ memset(&m->mmul1[L1lo], 0, (L1hi - L1lo)*sizeof(PTE));
+#endif /* notdef */
+ allcache->wbse(&m->mmul1[L1lo], (L1hi - L1lo)*sizeof(PTE));
+}
+
+void
+mmuswitch(Proc* proc)
+{
+ int x;
+ PTE *l1;
+ Page *page;
+
+ /* do kprocs get here and if so, do they need to? */
+ if(m->mmupid == proc->pid && !proc->newtlb)
+ return;
+ m->mmupid = proc->pid;
+
+ /* write back dirty and invalidate caches */
+ l1cache->wbinv();
+
+ if(proc->newtlb){
+ mmul2empty(proc, 1);
+ proc->newtlb = 0;
+ }
+
+ mmul1empty();
+
+ /* move in new map */
+ l1 = m->mmul1;
+ for(page = proc->mmul2; page != nil; page = page->next){
+ x = page->daddr;
+ l1[x] = PPN(page->pa)|Dom0|Coarse;
+ /* know here that L1lo < x < L1hi */
+ if(x+1 - m->mmul1lo < m->mmul1hi - x)
+ m->mmul1lo = x+1;
+ else
+ m->mmul1hi = x;
+ }
+
+ /* make sure map is in memory */
+ /* could be smarter about how much? */
+ allcache->wbse(&l1[L1X(UZERO)], (L1hi - L1lo)*sizeof(PTE));
+
+ /* lose any possible stale tlb entries */
+ mmuinvalidate();
+
+ //print("mmuswitch l1lo %d l1hi %d %d\n",
+ // m->mmul1lo, m->mmul1hi, proc->kp);
+
+ wakewfi(); /* in case there's another runnable proc */
+}
+
+void
+flushmmu(void)
+{
+ int s;
+
+ s = splhi();
+ up->newtlb = 1;
+ mmuswitch(up);
+ splx(s);
+}
+
+void
+mmurelease(Proc* proc)
+{
+ Page *page, *next;
+
+ /* write back dirty and invalidate caches */
+ l1cache->wbinv();
+
+ mmul2empty(proc, 0);
+ for(page = proc->mmul2cache; page != nil; page = next){
+ next = page->next;
+ if(--page->ref)
+ panic("mmurelease: page->ref %d", page->ref);
+ pagechainhead(page);
+ }
+ if(proc->mmul2cache && palloc.r.p)
+ wakeup(&palloc.r);
+ proc->mmul2cache = nil;
+
+ mmul1empty();
+
+ /* make sure map is in memory */
+ /* could be smarter about how much? */
+ allcache->wbse(&m->mmul1[L1X(UZERO)], (L1hi - L1lo)*sizeof(PTE));
+
+ /* lose any possible stale tlb entries */
+ mmuinvalidate();
+}
+
+void
+putmmu(uintptr va, uintptr pa, Page* page)
+{
+ int x;
+ Page *pg;
+ PTE *l1, *pte;
+
+ x = L1X(va);
+ l1 = &m->mmul1[x];
+ if (Debug) {
+ iprint("putmmu(%#p, %#p, %#p) ", va, pa, page->pa);
+ iprint("mmul1 %#p l1 %#p *l1 %#ux x %d pid %ld\n",
+ m->mmul1, l1, *l1, x, up->pid);
+ if (*l1)
+ panic("putmmu: old l1 pte non-zero; stuck?");
+ }
+ if(*l1 == Fault){
+ /* wasteful - l2 pages only have 256 entries - fix */
+ if(up->mmul2cache == nil){
+ /* auxpg since we don't need much? memset if so */
+ pg = newpage(1, 0, 0);
+ pg->va = VA(kmap(pg));
+ }
+ else{
+ pg = up->mmul2cache;
+ up->mmul2cache = pg->next;
+ memset(UINT2PTR(pg->va), 0, BY2PG);
+ }
+ pg->daddr = x;
+ pg->next = up->mmul2;
+ up->mmul2 = pg;
+
+ /* force l2 page to memory */
+ allcache->wbse((void *)pg->va, BY2PG);
+
+ *l1 = PPN(pg->pa)|Dom0|Coarse;
+ allcache->wbse(l1, sizeof *l1);
+
+ if (Debug)
+ iprint("l1 %#p *l1 %#ux x %d pid %ld\n", l1, *l1, x, up->pid);
+
+ if(x >= m->mmul1lo && x < m->mmul1hi){
+ if(x+1 - m->mmul1lo < m->mmul1hi - x)
+ m->mmul1lo = x+1;
+ else
+ m->mmul1hi = x;
+ }
+ }
+ pte = UINT2PTR(KADDR(PPN(*l1)));
+ if (Debug) {
+ iprint("pte %#p index %ld was %#ux\n", pte, L2X(va), *(pte+L2X(va)));
+ if (*(pte+L2X(va)))
+ panic("putmmu: old l2 pte non-zero; stuck?");
+ }
+
+ /* protection bits are
+ * PTERONLY|PTEVALID;
+ * PTEWRITE|PTEVALID;
+ * PTEWRITE|PTEUNCACHED|PTEVALID;
+ */
+ x = Small;
+ if(!(pa & PTEUNCACHED))
+ x |= L2ptedramattrs;
+ if(pa & PTEWRITE)
+ x |= L2AP(Urw);
+ else
+ x |= L2AP(Uro);
+ pte[L2X(va)] = PPN(pa)|x;
+ allcache->wbse(&pte[L2X(va)], sizeof pte[0]);
+
+ /* clear out the current entry */
+ mmuinvalidateaddr(PPN(va));
+
+ /* write back dirty entries - we need this because the pio() in
+ * fault.c is writing via a different virt addr and won't clean
+ * its changes out of the dcache. Page coloring doesn't work
+ * on this mmu because the virtual cache is set associative
+ * rather than direct mapped.
+ */
+ l1cache->wb();
+
+ if(page->cachectl[0] == PG_TXTFLUSH){
+ /* pio() sets PG_TXTFLUSH whenever a text pg has been written */
+ cacheiinv();
+ page->cachectl[0] = PG_NOFLUSH;
+ }
+ if (Debug)
+ iprint("putmmu %#p %#p %#p\n", va, pa, PPN(pa)|x);
+}
+
+void*
+mmuuncache(void* v, usize size)
+{
+ int x;
+ PTE *pte;
+ uintptr va;
+
+ /*
+ * Simple helper for ucalloc().
+ * Uncache a Section, must already be
+ * valid in the MMU.
+ */
+ va = PTR2UINT(v);
+ assert(!(va & (1*MiB-1)) && size == 1*MiB);
+
+ x = L1X(va);
+ pte = &m->mmul1[x];
+ if((*pte & (Section|Coarse)) != Section)
+ return nil;
+ *pte &= ~L1ptedramattrs;
+ *pte |= L1sharable;
+ mmuinvalidateaddr(va);
+ allcache->wbse(pte, 4);
+
+ return v;
+}
+
+uintptr
+mmukmap(uintptr va, uintptr pa, usize size)
+{
+ int x;
+ PTE *pte;
+
+ /*
+ * Stub.
+ */
+ assert(!(va & (1*MiB-1)) && !(pa & (1*MiB-1)) && size == 1*MiB);
+
+ x = L1X(va);
+ pte = &m->mmul1[x];
+ if(*pte != Fault)
+ return 0;
+ *pte = pa|Dom0|L1AP(Krw)|Section;
+ mmuinvalidateaddr(va);
+ allcache->wbse(pte, 4);
+
+ return va;
+}
+
+uintptr
+mmukunmap(uintptr va, uintptr pa, usize size)
+{
+ int x;
+ PTE *pte;
+
+ /*
+ * Stub.
+ */
+ assert(!(va & (1*MiB-1)) && !(pa & (1*MiB-1)) && size == 1*MiB);
+
+ x = L1X(va);
+ pte = &m->mmul1[x];
+ if(*pte != (pa|Dom0|L1AP(Krw)|Section))
+ return 0;
+ *pte = Fault;
+ mmuinvalidateaddr(va);
+ allcache->wbse(pte, 4);
+
+ return va;
+}
+
+/*
+ * Return the number of bytes that can be accessed via KADDR(pa).
+ * If pa is not a valid argument to KADDR, return 0.
+ */
+uintptr
+cankaddr(uintptr pa)
+{
+ if((PHYSDRAM == 0 || pa >= PHYSDRAM) && pa < PHYSDRAM+memsize)
+ return PHYSDRAM+memsize - pa;
+ return 0;
+}
+
+/* from 386 */
+void*
+vmap(uintptr pa, usize size)
+{
+ uintptr pae, va;
+ usize o, osize;
+
+ /*
+ * XXX - replace with new vm stuff.
+ * Crock after crock - the first 4MB is mapped with 2MB pages
+ * so catch that and return good values because the current mmukmap
+ * will fail.
+ */
+ if(pa+size < 4*MiB)
+ return UINT2PTR(kseg0|pa);
+
+ osize = size;
+ o = pa & (BY2PG-1);
+ pa -= o;
+ size += o;
+ size = ROUNDUP(size, BY2PG);
+
+ va = kseg0|pa;
+ pae = mmukmap(va, pa, size);
+ if(pae == 0 || pae-size != pa)
+ panic("vmap(%#p, %ld) called from %#p: mmukmap fails %#p",
+ pa+o, osize, getcallerpc(&pa), pae);
+
+ return UINT2PTR(va+o);
+}
+
+/* from 386 */
+void
+vunmap(void* v, usize size)
+{
+ /*
+ * XXX - replace with new vm stuff.
+ * Can't do this until do real vmap for all space that
+ * might be used, e.g. stuff below 1MB which is currently
+ * mapped automagically at boot but that isn't used (or
+ * at least shouldn't be used) by the kernel.
+ upafree(PADDR(v), size);
+ */
+ USED(v, size);
+}
+
+/*
+ * Notes.
+ * Everything is in domain 0;
+ * domain 0 access bits in the DAC register are set
+ * to Client, which means access is controlled by the
+ * permission values set in the PTE.
+ *
+ * L1 access control for the kernel is set to 1 (RW,
+ * no user mode access);
+ * L2 access control for the kernel is set to 1 (ditto)
+ * for all 4 AP sets;
+ * L1 user mode access is never set;
+ * L2 access control for user mode is set to either
+ * 2 (RO) or 3 (RW) depending on whether text or data,
+ * for all 4 AP sets.
+ * (To get kernel RO set AP to 0 and S bit in control
+ * register c1).
+ * Coarse L1 page-tables are used. They have 256 entries
+ * and so consume 1024 bytes per table.
+ * Small L2 page-tables are used. They have 1024 entries
+ * and so consume 4096 bytes per table.
+ *
+ * 4KiB. That's the size of 1) a page, 2) the
+ * size allocated for an L2 page-table page (note only 1KiB
+ * is needed per L2 page - to be dealt with later) and
+ * 3) the size of the area in L1 needed to hold the PTEs
+ * to map 1GiB of user space (0 -> 0x3fffffff, 1024 entries).
+ */
diff --git a/sys/src/9/teg2/notes/assumes-hz-under-1000 b/sys/src/9/teg2/notes/assumes-hz-under-1000
new file mode 100644
index 000000000..10dd12390
--- /dev/null
+++ b/sys/src/9/teg2/notes/assumes-hz-under-1000
@@ -0,0 +1,4 @@
+./dat.h:9: #define MS2HZ (1000/HZ) /* millisec per clock tick */
+./random.c:87: addclock0link(randomclock, 1000/HZ);
+../port/portclock.c:255: ms = 1000/HZ;
+../port/portfns.h:335: #define TK2MS(x) ((x)*(1000/HZ))
diff --git a/sys/src/9/teg2/notes/bug.rfe b/sys/src/9/teg2/notes/bug.rfe
new file mode 100644
index 000000000..c9577a410
--- /dev/null
+++ b/sys/src/9/teg2/notes/bug.rfe
@@ -0,0 +1,41 @@
+/*
+ * return from user-mode exception.
+ * expects new SPSR in R0. R13 must point to ureg->type.
+ */
+_rfue:
+TEXT rfue(SB), 1, $-4
+// CPSID
+// BIC $PsrMbz, R0 /* force little-endian upon return */
+ MOVW R0, SPSR /* ... */
+
+ /*
+ * order on stack is type, psr, pc, but RFEV7 needs pc, psr.
+ * step on type and previous word to hold temporary values.
+ * we could instead change the order in which psr & pc are pushed.
+ */
+ MOVW 4(R13), R1 /* psr */
+ MOVW 8(R13), R2 /* pc */
+ MOVW R2, 4(R13) /* pc */
+ MOVW R1, 8(R13) /* psr */
+
+ MOVM.DB.S (R13), [R0-R14] /* restore user registers */
+ ADD $4, R13 /* pop type, sp -> pc */
+
+#ifdef OLDWAY
+ ADD $(2*4), R13 /* pop past ureg->{type+psr} to pc */
+ /*
+ * this used to work on arm arch v[567] and still works on cpu 0.
+ * for some reason it sometimes sets PsrBigend on cpu 1.
+ * Ureg's tail was:
+ *
+ * typedef struct Ureg {
+ * ⋯
+ * ulong type; /* of exception */
+ * ulong psr;
+ * ulong pc; /* interrupted addr */
+ * } Ureg;
+ */
+ RFE /* MOVM.IA.S.W (R13), [R15] */
+#endif
+// SETEND(0)
+ RFEV7W(13)
diff --git a/sys/src/9/teg2/notes/byte-order b/sys/src/9/teg2/notes/byte-order
new file mode 100644
index 000000000..0c9aee2af
--- /dev/null
+++ b/sys/src/9/teg2/notes/byte-order
@@ -0,0 +1,59 @@
+static void
+forcele(void)
+{
+#ifdef BIGENDCHECK
+ union {
+ ulong ul;
+ uchar uc[sizeof(ulong)];
+ } u;
+
+ u.ul = 0;
+ coherence();
+ u.uc[0] = 1;
+ coherence();
+ if (u.ul == 1)
+ return;
+
+ emerge('?');
+ emerge('e');
+ if ((u.ul & MASK(8)) == 0) {
+ emerge('B');
+ panic("rdbaseticks: cpu%d is big-endian", m->machno);
+ } else {
+ emerge('W');
+ panic("rdbaseticks: cpu%d is whacked-endian", m->machno);
+ }
+#endif
+}
+
+void
+ckbigendian(char *state)
+{
+ int wrong;
+
+ wrong = 0;
+ if (getpsr() & PsrBigend) {
+ setendlittle();
+ wrong++;
+ wave('?');
+ wave('e');
+ wave('p');
+ if (state == nil)
+ state = "running";
+ iprint("cpu%d: %s in big-endian mode\n", m->machno, state);
+ }
+ if (controlget() & CpCee) {
+ wrong++;
+ wave('?');
+ wave('e');
+ wave('e');
+ if (state == nil)
+ state = "running";
+ iprint("cpu%d: %s with big-endian exceptions\n", m->machno, state);
+ }
+ if (wrong) {
+ dumpstack();
+ delay(3000);
+ panic("cpu%d: big-endian", m->machno);
+ }
+}
diff --git a/sys/src/9/teg2/notes/clks b/sys/src/9/teg2/notes/clks
new file mode 100644
index 000000000..14ffc459a
--- /dev/null
+++ b/sys/src/9/teg2/notes/clks
@@ -0,0 +1,19 @@
+see §5.4.40 (p.142) pllx_* (2 regs)
+
+out of u-boot, these are the settings:
+---
+pllx base 0x4003e80c:
+ enabled, no locked
+ divp == 0 (post divider == 2^0 == 1)
+ divn == 1000 (feedback divider)
+ divm == 12 (input divider)
+ misc 0x100: pllx_cpcon == 1 [ should be 12 ]
+super cclk divider 0x80000000:
+ enabled
+ dividend == 0 (thus 1)
+ divisor == 0 (thus 1)
+super sclk divider 0x0:
+ disabled
+ dividend == 0 (thus 1)
+ divisor == 0 (thus 1)
+---
diff --git a/sys/src/9/teg2/notes/movm.w b/sys/src/9/teg2/notes/movm.w
new file mode 100644
index 000000000..a2b22f1e1
--- /dev/null
+++ b/sys/src/9/teg2/notes/movm.w
@@ -0,0 +1,22 @@
+gorka writes:
+---
+I have userspace on the gumstix [xscale, not omap]. The problem that
+got me in trouble was that in lexception.s (or l.s),
+
+ MOVM.DB.W [R0-R14], (R13)
+
+works differently for this architecture (and probably for others, as
+it is unclear how it should behave by reading the arm specs). This
+happens only for kernel faults as the others (syscall, user faults)
+use MOVM.DB.W.S which uses the banked user registers.
+
+The problem is that in this arch the value of R13 saved is the value
+after R13 itself has been modified, whereas in the others (bitsy,
+pico...), it was the value before. Adding 4*15 to the stack before
+the RFE solves the problem.
+---
+
+In fact, the 2005 ARM arch. ref. man. (ARM DDI 0100I) says, under STM (1),
+that if Rn appears in the set of registers (and isn't the first one)
+and .W is specified, the stored value of Rn is unpredictable.
+The arm v7-ar arch. ref. man. says such usage is obsolete.
diff --git a/sys/src/9/teg2/notes/pci b/sys/src/9/teg2/notes/pci
new file mode 100644
index 000000000..ee2063eb5
--- /dev/null
+++ b/sys/src/9/teg2/notes/pci
@@ -0,0 +1,29 @@
+
+Plan 9 from Bell Labs
+
+127 holes free 213327872 bytes free
+l1: int split i&d, 4 ways 256 sets 32 bytes/line; can WB; can write-allocate; l1 I policy VIPT
+l2: ext unified, 8 ways 512 sets 32 bytes/line; can WT; can WB; can write-allocate
+fp: arm arch VFPv3+ with null subarch
+1000 mips (single-issue), 1980 mips (dual-issue)
+cpu0: 1000MHz ARM Cortex-A9
+pci: 0x80000000: nvidia, rev 0xa0 class 0x060000 misc 0x00010008
+ cfg sp ecfg sp downstream pref. !pref.
+axi bar sz 00000100 00000100 00000010 00010000 00010000 00000000
+axi bar start 80004000 80104000 80400000 a0000000 90000000 00000000
+fcpi bar fdff0000 fe100000 fdfc0000 00a00001 00900001 00000000
+cache bar 00000000 00000000 00000000 00000000
+ 00000000 fc000000
+msi bar 00000000 00000000 00000000
+ 00000000 00000000 00000000 00000000 00000000
+00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
+00000000 00000000 00000000 00008e05 00000000 00000001 00000004 a0024001
+00000000 00007fff 0000003f 00000000 00000000 00000000 00000000 00000000
+00000000 00000000 00000000 00000000 00000840 00000000 00103020 00000000
+3f3f003f 00000332 00000000 00100000 00000009 00000000 00000009 00000001
+00000000 00000000 00000001
+panic: external abort 0x8 pc 0xc0486b40 addr 0x8000392c
+cpu0: exiting
+archreboot: reset!
+
+
diff --git a/sys/src/9/teg2/notes/pci.2.buses b/sys/src/9/teg2/notes/pci.2.buses
new file mode 100644
index 000000000..b3e8de2e4
--- /dev/null
+++ b/sys/src/9/teg2/notes/pci.2.buses
@@ -0,0 +1,78 @@
+
+Plan 9 from Bell Labs
+
+127 holes free 213327872 bytes free
+l1: int split i&d, 4 ways 256 sets 32 bytes/line; can WB; can write-allocate; l1 I policy VIPT
+l2: ext unified, 8 ways 512 sets 32 bytes/line; can WT; can WB; can write-allocate
+fp: arm arch VFPv3+ with null subarch
+1000 mips (single-issue), 1980 mips (dual-issue)
+cpu0: 1000MHz ARM Cortex-A9
+pci: 0x80000000: nvidia, rev 0xa0 class 0x060000 misc 0x00010008
+pci->ioaddrs 0x20000101
+pci->ioaddrhi 0x80408040
+pci->memaddrs 0x8ff09000
+scanning pci bus 0...tbdf 0xc000000 probe 0x80104000 failed
+tbdf 0xc000800 probe 0x80104800 failed
+tbdf 0xc001000 probe 0x80105000 failed
+tbdf 0xc001800 probe 0x80105800 failed
+tbdf 0xc002000 probe 0x80106000 failed
+tbdf 0xc002800 probe 0x80106800 failed
+tbdf 0xc003000 probe 0x80107000 failed
+tbdf 0xc003800 probe 0x80107800 failed
+tbdf 0xc004000 probe 0x80108000 failed
+tbdf 0xc004800 probe 0x80108800 failed
+tbdf 0xc005000 probe 0x80109000 failed
+tbdf 0xc005800 probe 0x80109800 failed
+tbdf 0xc006000 probe 0x8010a000 failed
+tbdf 0xc006800 probe 0x8010a800 failed
+tbdf 0xc007000 probe 0x8010b000 failed
+tbdf 0xc007800 probe 0x8010b800 failed
+scanning pci bus 1...tbdf 0xc010000 probe 0x80114000 ok
+tbdf 0xc010000 probe 0x80114010 ok
+tbdf 0xc010000 probe 0x80114010 ok
+tbdf 0xc010000 probe 0x80114010 ok
+tbdf 0xc010000 probe 0x80114010 ok
+tbdf 0xc010000 probe 0x80114010 ok
+tbdf 0xc010000 probe 0x80114014 ok
+tbdf 0xc010000 probe 0x80114014 ok
+tbdf 0xc010000 probe 0x80114014 ok
+tbdf 0xc010000 probe 0x80114014 ok
+tbdf 0xc010000 probe 0x80114014 ok
+tbdf 0xc010000 probe 0x80114018 ok
+tbdf 0xc010000 probe 0x80114018 ok
+tbdf 0xc010000 probe 0x80114018 ok
+tbdf 0xc010000 probe 0x80114018 ok
+tbdf 0xc010000 probe 0x80114018 ok
+tbdf 0xc010000 probe 0x8011401c ok
+tbdf 0xc010000 probe 0x8011401c ok
+tbdf 0xc010000 probe 0x8011401c ok
+tbdf 0xc010000 probe 0x8011401c ok
+tbdf 0xc010000 probe 0x8011401c ok
+tbdf 0xc010000 probe 0x80114020 ok
+tbdf 0xc010000 probe 0x80114020 ok
+tbdf 0xc010000 probe 0x80114020 ok
+tbdf 0xc010000 probe 0x80114020 ok
+tbdf 0xc010000 probe 0x80114020 ok
+tbdf 0xc010000 probe 0x80114024 ok
+tbdf 0xc010000 probe 0x80114024 ok
+tbdf 0xc010000 probe 0x80114024 ok
+tbdf 0xc010000 probe 0x80114024 ok
+tbdf 0xc010000 probe 0x80114024 ok
+tbdf 0xc010800 probe 0x80114800 failed
+tbdf 0xc011000 probe 0x80115000 failed
+tbdf 0xc011800 probe 0x80115800 failed
+tbdf 0xc012000 probe 0x80116000 failed
+tbdf 0xc012800 probe 0x80116800 failed
+tbdf 0xc013000 probe 0x80117000 failed
+tbdf 0xc013800 probe 0x80117800 failed
+tbdf 0xc014000 probe 0x80118000 failed
+tbdf 0xc014800 probe 0x80118800 failed
+tbdf 0xc015000 probe 0x80119000 failed
+tbdf 0xc015800 probe 0x80119800 failed
+tbdf 0xc016000 probe 0x8011a000 failed
+tbdf 0xc016800 probe 0x8011a800 failed
+tbdf 0xc017000 probe 0x8011b000 failed
+tbdf 0xc017800 probe 0x8011b800 failed
+scanning pci bus 2...tbdf 0xc020000 probe 0x80124000 failed
+tbdf 0xc020800 probe 0x80124800
+
diff --git a/sys/src/9/teg2/nvram b/sys/src/9/teg2/nvram
new file mode 100644
index 000000000..a64a5a93f
--- /dev/null
+++ b/sys/src/9/teg2/nvram
Binary files differ
diff --git a/sys/src/9/teg2/pci.c b/sys/src/9/teg2/pci.c
new file mode 100644
index 000000000..cd13902e8
--- /dev/null
+++ b/sys/src/9/teg2/pci.c
@@ -0,0 +1,853 @@
+/*
+ * PCI support code.
+ * Needs a massive rewrite.
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+
+#define DBG if(0) pcilog
+
+typedef struct Pci Pci;
+
+struct
+{
+ char output[16*1024];
+ int ptr;
+}PCICONS;
+
+int
+pcilog(char *fmt, ...)
+{
+ int n;
+ va_list arg;
+ char buf[PRINTSIZE];
+
+ va_start(arg, fmt);
+ n = vseprint(buf, buf+sizeof(buf), fmt, arg) - buf;
+ va_end(arg);
+
+ memmove(PCICONS.output+PCICONS.ptr, buf, n);
+ PCICONS.ptr += n;
+ return n;
+}
+
+enum
+{
+ MaxFNO = 7,
+ MaxUBN = 255,
+};
+
+enum
+{ /* command register */
+ IOen = (1<<0),
+ MEMen = (1<<1),
+ MASen = (1<<2),
+ MemWrInv = (1<<4),
+ PErrEn = (1<<6),
+ SErrEn = (1<<8),
+};
+
+typedef struct {
+ ulong cap;
+ ulong ctl;
+} Capctl;
+typedef struct {
+ Capctl dev;
+ Capctl link;
+ Capctl slot;
+} Devlinkslot;
+
+/* capability list id 0x10 is pci-e */
+struct Pci {
+ /* pci-compatible config */
+ /* what io.h calls type 0 & type 1 pre-defined header */
+ ulong id;
+ ulong cs;
+ ulong revclass;
+ ulong misc; /* cache line size, latency timer, header type, bist */
+ ulong bar[2]; /* always 0 on tegra 2 */
+
+ /* types 1 & 2 pre-defined header */
+ ulong bus;
+ ulong ioaddrs;
+ ulong memaddrs;
+ ulong prefmem;
+ ulong prefbasehi;
+ ulong preflimhi;
+ /* type 2 pre-defined header only */
+ ulong ioaddrhi;
+ ulong cfgcapoff; /* offset in cfg. space to cap. list (0x40) */
+ ulong rom;
+ ulong intr; /* PciINT[LP] */
+ /* subsystem capability regs */
+ ulong subsysid;
+ ulong subsyscap;
+ /* */
+
+ Capctl pwrmgmt;
+
+ /* msi */
+ ulong msictlcap;
+ ulong msimsgaddr[2]; /* little-endian */
+ ulong msimsgdata;
+
+ /* pci-e cap. */
+ uchar _pad0[0x80-0x60];
+ ulong pciecap;
+ Devlinkslot port0;
+ ulong rootctl;
+ ulong rootsts;
+ Devlinkslot port1;
+
+ /* 0xbc */
+
+};
+
+enum {
+ /* offsets from soc.pci */
+ Port0 = 0,
+ Port1 = 0x1000,
+ Pads = 0x3000,
+ Afi = 0x3800,
+ Aficfg = Afi + 0xac,
+ Cfgspace = 0x4000,
+ Ecfgspace = 0x104000,
+
+ /* cs bits */
+ Iospace = 1<<0,
+ Memspace = 1<<1,
+ Busmaster = 1<<2,
+
+ /* Aficfg bits */
+ Fpcion = 1<<0,
+};
+
+struct Pcictlr {
+ union {
+ uchar _padpci[0x1000];
+ Pci;
+ } ports[2];
+ uchar _padpads[0x1000];
+ uchar pads[0x800];
+ uchar afi[0x800];
+ ulong cfg[0x1000];
+ ulong extcfg[0x1000];
+};
+
+static Lock pcicfglock;
+static Lock pcicfginitlock;
+static int pcicfgmode = -1;
+static int pcimaxbno = 1; /* was 7; only 2 pci buses; touching 3rd hangs */
+static int pcimaxdno;
+static Pcidev* pciroot;
+static Pcidev* pcilist;
+static Pcidev* pcitail;
+
+static int pcicfgrw8(int, int, int, int);
+static int pcicfgrw16(int, int, int, int);
+static int pcicfgrw32(int, int, int, int);
+
+static char* bustypes[] = {
+ "CBUSI",
+ "CBUSII",
+ "EISA",
+ "FUTURE",
+ "INTERN",
+ "ISA",
+ "MBI",
+ "MBII",
+ "MCA",
+ "MPI",
+ "MPSA",
+ "NUBUS",
+ "PCI",
+ "PCMCIA",
+ "TC",
+ "VL",
+ "VME",
+ "XPRESS",
+};
+
+static int
+tbdffmt(Fmt* fmt)
+{
+ char *p;
+ int l, r;
+ uint type, tbdf;
+
+ if((p = malloc(READSTR)) == nil)
+ return fmtstrcpy(fmt, "(tbdfconv)");
+
+ switch(fmt->r){
+ case 'T':
+ tbdf = va_arg(fmt->args, int);
+ if(tbdf == BUSUNKNOWN)
+ snprint(p, READSTR, "unknown");
+ else{
+ type = BUSTYPE(tbdf);
+ if(type < nelem(bustypes))
+ l = snprint(p, READSTR, bustypes[type]);
+ else
+ l = snprint(p, READSTR, "%d", type);
+ snprint(p+l, READSTR-l, ".%d.%d.%d",
+ BUSBNO(tbdf), BUSDNO(tbdf), BUSFNO(tbdf));
+ }
+ break;
+
+ default:
+ snprint(p, READSTR, "(tbdfconv)");
+ break;
+ }
+ r = fmtstrcpy(fmt, p);
+ free(p);
+
+ return r;
+}
+
+ulong
+pcibarsize(Pcidev *p, int rno)
+{
+ ulong v, size;
+
+ v = pcicfgrw32(p->tbdf, rno, 0, 1);
+ pcicfgrw32(p->tbdf, rno, 0xFFFFFFF0, 0);
+ size = pcicfgrw32(p->tbdf, rno, 0, 1);
+ if(v & 1)
+ size |= 0xFFFF0000;
+ pcicfgrw32(p->tbdf, rno, v, 0);
+
+ return -(size & ~0x0F);
+}
+
+static int
+pcilscan(int bno, Pcidev** list)
+{
+ Pcidev *p, *head, *tail;
+ int dno, fno, i, hdt, l, maxfno, maxubn, rno, sbn, tbdf, ubn;
+
+ maxubn = bno;
+ head = nil;
+ tail = nil;
+ for(dno = 0; dno <= pcimaxdno; dno++){
+ maxfno = 0;
+ for(fno = 0; fno <= maxfno; fno++){
+ /*
+ * For this possible device, form the
+ * bus+device+function triplet needed to address it
+ * and try to read the vendor and device ID.
+ * If successful, allocate a device struct and
+ * start to fill it in with some useful information
+ * from the device's configuration space.
+ */
+ tbdf = MKBUS(BusPCI, bno, dno, fno);
+ l = pcicfgrw32(tbdf, PciVID, 0, 1);
+ if(l == 0xFFFFFFFF || l == 0)
+ continue;
+ p = malloc(sizeof(*p));
+ if(p == nil)
+ panic("pcilscan: no memory");
+ p->tbdf = tbdf;
+ p->vid = l;
+ p->did = l>>16;
+
+ if(pcilist != nil)
+ pcitail->list = p;
+ else
+ pcilist = p;
+ pcitail = p;
+
+ p->pcr = pcicfgr16(p, PciPCR);
+ p->rid = pcicfgr8(p, PciRID);
+ p->ccrp = pcicfgr8(p, PciCCRp);
+ p->ccru = pcicfgr8(p, PciCCRu);
+ p->ccrb = pcicfgr8(p, PciCCRb);
+ p->cls = pcicfgr8(p, PciCLS);
+ p->ltr = pcicfgr8(p, PciLTR);
+
+ p->intl = pcicfgr8(p, PciINTL);
+
+ /*
+ * If the device is a multi-function device adjust the
+ * loop count so all possible functions are checked.
+ */
+ hdt = pcicfgr8(p, PciHDT);
+ if(hdt & 0x80)
+ maxfno = MaxFNO;
+
+ /*
+ * If appropriate, read the base address registers
+ * and work out the sizes.
+ */
+ switch(p->ccrb) {
+ case 0x03: /* display controller */
+ /* fall through */
+ case 0x01: /* mass storage controller */
+ case 0x02: /* network controller */
+ case 0x04: /* multimedia device */
+ case 0x07: /* simple comm. controllers */
+ case 0x08: /* base system peripherals */
+ case 0x09: /* input devices */
+ case 0x0A: /* docking stations */
+ case 0x0B: /* processors */
+ case 0x0C: /* serial bus controllers */
+ if((hdt & 0x7F) != 0)
+ break;
+ rno = PciBAR0 - 4;
+ for(i = 0; i < nelem(p->mem); i++) {
+ rno += 4;
+ p->mem[i].bar = pcicfgr32(p, rno);
+ p->mem[i].size = pcibarsize(p, rno);
+ }
+ break;
+
+ case 0x00:
+ case 0x05: /* memory controller */
+ case 0x06: /* bridge device */
+ default:
+ break;
+ }
+
+ if(head != nil)
+ tail->link = p;
+ else
+ head = p;
+ tail = p;
+ }
+ }
+
+ *list = head;
+ for(p = head; p != nil; p = p->link){
+ /*
+ * Find PCI-PCI bridges and recursively descend the tree.
+ */
+ if(p->ccrb != 0x06 || p->ccru != 0x04)
+ continue;
+
+ /*
+ * If the secondary or subordinate bus number is not
+ * initialised try to do what the PCI BIOS should have
+ * done and fill in the numbers as the tree is descended.
+ * On the way down the subordinate bus number is set to
+ * the maximum as it's not known how many buses are behind
+ * this one; the final value is set on the way back up.
+ */
+ sbn = pcicfgr8(p, PciSBN);
+ ubn = pcicfgr8(p, PciUBN);
+
+ if(sbn == 0 || ubn == 0) {
+ sbn = maxubn+1;
+ /*
+ * Make sure memory, I/O and master enables are
+ * off, set the primary, secondary and subordinate
+ * bus numbers and clear the secondary status before
+ * attempting to scan the secondary bus.
+ *
+ * Initialisation of the bridge should be done here.
+ */
+ pcicfgw32(p, PciPCR, 0xFFFF0000);
+ l = (MaxUBN<<16)|(sbn<<8)|bno;
+ pcicfgw32(p, PciPBN, l);
+ pcicfgw16(p, PciSPSR, 0xFFFF);
+ maxubn = pcilscan(sbn, &p->bridge);
+ l = (maxubn<<16)|(sbn<<8)|bno;
+
+ pcicfgw32(p, PciPBN, l);
+ }
+ else {
+ if(ubn > maxubn)
+ maxubn = ubn;
+ pcilscan(sbn, &p->bridge);
+ }
+ }
+
+ return maxubn;
+}
+
+extern void rtl8169interrupt(Ureg*, void* arg);
+
+/* not used yet */
+static void
+pciintr(Ureg *ureg, void *p)
+{
+ rtl8169interrupt(ureg, p); /* HACK */
+}
+
+static void
+pcicfginit(void)
+{
+ char *p;
+ Pci *pci = (Pci *)soc.pci;
+ Pcidev **list;
+ int bno, n;
+
+ lock(&pcicfginitlock);
+ if(pcicfgmode != -1) {
+ unlock(&pcicfginitlock);
+ return;
+ }
+
+ /*
+ * TrimSlice # pci 0 1
+ * Scanning PCI devices on bus 0 1
+ * BusDevFun VendorId DeviceId Device Class Sub-Class
+ * _____________________________________________________________
+ * 00.00.00 0x10de 0x0bf0 Bridge device 0x04
+ * 01.00.00 0x10ec 0x8168 Network controller 0x00
+ *
+ * thus pci bus 0 has a bridge with, perhaps, an ide/sata ctlr behind,
+ * and pci bus 1 has the realtek 8169 on it:
+ *
+ * TrimSlice # pci 1 long
+ * Scanning PCI devices on bus 1
+ *
+ * Found PCI device 01.00.00:
+ * vendor ID = 0x10ec
+ * device ID = 0x8168
+ * command register = 0x0007
+ * status register = 0x0010
+ * revision ID = 0x03
+ * class code = 0x02 (Network controller)
+ * sub class code = 0x00
+ * programming interface = 0x00
+ * cache line = 0x08
+ * base address 0 = 0x80400001 config
+ * base address 1 = 0x00000000 (ext. config)
+ * base address 2 = 0xa000000c "downstream"
+ * base address 3 = 0x00000000 (prefetchable)
+ * base address 4 = 0xa000400c not "
+ * base address 5 = 0x00000000 (unused)
+ */
+ n = pci->id >> 16;
+ if (((pci->id & MASK(16)) != Vnvidia || (n != 0xbf0 && n != 0xbf1)) &&
+ (pci->id & MASK(16)) != Vrealtek) {
+ print("no pci controller at %#p\n", pci);
+ unlock(&pcicfginitlock);
+ return;
+ }
+ if (0)
+ iprint("pci: %#p: nvidia, rev %#ux class %#6.6lux misc %#8.8lux\n",
+ pci, (uchar)pci->revclass, pci->revclass >> 8,
+ pci->misc);
+
+ pci->cs &= Iospace;
+ pci->cs |= Memspace | Busmaster;
+ coherence();
+
+ pcicfgmode = 1;
+// pcimaxdno = 31;
+ pcimaxdno = 15; /* for trimslice */
+
+ fmtinstall('T', tbdffmt);
+
+ if(p = getconf("*pcimaxbno")){
+ n = strtoul(p, 0, 0);
+ if(n < pcimaxbno)
+ pcimaxbno = n;
+ }
+ if(p = getconf("*pcimaxdno")){
+ n = strtoul(p, 0, 0);
+ if(n < pcimaxdno)
+ pcimaxdno = n;
+ }
+
+ list = &pciroot;
+ /* was bno = 0; trimslice needs to start at 1 */
+ for(bno = 1; bno <= pcimaxbno; bno++) {
+ bno = pcilscan(bno, list);
+ while(*list)
+ list = &(*list)->link;
+ }
+ unlock(&pcicfginitlock);
+
+ if(getconf("*pcihinv"))
+ pcihinv(nil);
+}
+
+enum {
+ Afiintrcode = 0xb8,
+};
+
+void
+pcieintrdone(void) /* dismiss pci-e intr */
+{
+ ulong *afi;
+
+ afi = (ulong *)(soc.pci + Afi);
+ afi[Afiintrcode/sizeof *afi] = 0; /* magic */
+ coherence();
+}
+
+/*
+ * whole config space for tbdf should be at (return address - rno).
+ */
+static void *
+tegracfgaddr(int tbdf, int rno)
+{
+ uintptr addr;
+
+ addr = soc.pci + (rno < 256? Cfgspace: Ecfgspace) + BUSBDF(tbdf) + rno;
+// if (BUSBNO(tbdf) == 1)
+// addr += Port1;
+ return (void *)addr;
+}
+
+static int
+pcicfgrw8(int tbdf, int rno, int data, int read)
+{
+ int x;
+ void *addr;
+
+ if(pcicfgmode == -1)
+ pcicfginit();
+
+ x = -1;
+ if(BUSDNO(tbdf) > pcimaxdno)
+ return x;
+
+ addr = tegracfgaddr(tbdf, rno);
+
+ lock(&pcicfglock);
+ if(read)
+ x = *(uchar *)addr;
+ else
+ *(uchar *)addr = data;
+ unlock(&pcicfglock);
+
+ return x;
+}
+
+int
+pcicfgr8(Pcidev* pcidev, int rno)
+{
+ return pcicfgrw8(pcidev->tbdf, rno, 0, 1);
+}
+
+void
+pcicfgw8(Pcidev* pcidev, int rno, int data)
+{
+ pcicfgrw8(pcidev->tbdf, rno, data, 0);
+}
+
+static int
+pcicfgrw16(int tbdf, int rno, int data, int read)
+{
+ int x;
+ void *addr;
+
+ if(pcicfgmode == -1)
+ pcicfginit();
+
+ x = -1;
+ if(BUSDNO(tbdf) > pcimaxdno)
+ return x;
+
+ addr = tegracfgaddr(tbdf, rno);
+
+ lock(&pcicfglock);
+ if(read)
+ x = *(ushort *)addr;
+ else
+ *(ushort *)addr = data;
+ unlock(&pcicfglock);
+
+ return x;
+}
+
+int
+pcicfgr16(Pcidev* pcidev, int rno)
+{
+ return pcicfgrw16(pcidev->tbdf, rno, 0, 1);
+}
+
+void
+pcicfgw16(Pcidev* pcidev, int rno, int data)
+{
+ pcicfgrw16(pcidev->tbdf, rno, data, 0);
+}
+
+static int
+pcicfgrw32(int tbdf, int rno, int data, int read)
+{
+ int x;
+ vlong v;
+ void *addr;
+
+ if(pcicfgmode == -1)
+ pcicfginit();
+
+ x = -1;
+ if(BUSDNO(tbdf) > pcimaxdno)
+ return x;
+
+ addr = tegracfgaddr(tbdf, rno);
+ v = probeaddr((uintptr)addr);
+ if (v < 0)
+ return -1;
+
+ lock(&pcicfglock);
+ if(read)
+ x = *(ulong *)addr;
+ else
+ *(ulong *)addr = data;
+ unlock(&pcicfglock);
+
+ return x;
+}
+
+int
+pcicfgr32(Pcidev* pcidev, int rno)
+{
+ return pcicfgrw32(pcidev->tbdf, rno, 0, 1);
+}
+
+void
+pcicfgw32(Pcidev* pcidev, int rno, int data)
+{
+ pcicfgrw32(pcidev->tbdf, rno, data, 0);
+}
+
+Pcidev*
+pcimatch(Pcidev* prev, int vid, int did)
+{
+ if(pcicfgmode == -1)
+ pcicfginit();
+
+ if(prev == nil)
+ prev = pcilist;
+ else
+ prev = prev->list;
+
+ while(prev != nil){
+ if((vid == 0 || prev->vid == vid)
+ && (did == 0 || prev->did == did))
+ break;
+ prev = prev->list;
+ }
+ return prev;
+}
+
+Pcidev*
+pcimatchtbdf(int tbdf)
+{
+ Pcidev *pcidev;
+
+ if(pcicfgmode == -1)
+ pcicfginit();
+
+ for(pcidev = pcilist; pcidev != nil; pcidev = pcidev->list) {
+ if(pcidev->tbdf == tbdf)
+ break;
+ }
+ return pcidev;
+}
+
+static void
+pcilhinv(Pcidev* p)
+{
+ int i;
+ Pcidev *t;
+
+ if(p == nil) {
+ putstrn(PCICONS.output, PCICONS.ptr);
+ p = pciroot;
+ print("bus dev type vid did intl memory\n");
+ }
+ for(t = p; t != nil; t = t->link) {
+ print("%d %2d/%d %.2ux %.2ux %.2ux %.4ux %.4ux %3d ",
+ BUSBNO(t->tbdf), BUSDNO(t->tbdf), BUSFNO(t->tbdf),
+ t->ccrb, t->ccru, t->ccrp, t->vid, t->did, t->intl);
+
+ for(i = 0; i < nelem(p->mem); i++) {
+ if(t->mem[i].size == 0)
+ continue;
+ print("%d:%.8lux %d ", i,
+ t->mem[i].bar, t->mem[i].size);
+ }
+ if(t->bridge)
+ print("->%d", BUSBNO(t->bridge->tbdf));
+ print("\n");
+ }
+ while(p != nil) {
+ if(p->bridge != nil)
+ pcilhinv(p->bridge);
+ p = p->link;
+ }
+}
+
+void
+pcihinv(Pcidev* p)
+{
+ if(pcicfgmode == -1)
+ pcicfginit();
+ lock(&pcicfginitlock);
+ pcilhinv(p);
+ unlock(&pcicfginitlock);
+}
+
+void
+pcireset(void)
+{
+ Pcidev *p;
+
+ if(pcicfgmode == -1)
+ pcicfginit();
+
+ for(p = pcilist; p != nil; p = p->list) {
+ /* don't mess with the bridges */
+ if(p->ccrb == 0x06)
+ continue;
+ pciclrbme(p);
+ }
+}
+
+void
+pcisetioe(Pcidev* p)
+{
+ p->pcr |= IOen;
+ pcicfgw16(p, PciPCR, p->pcr);
+}
+
+void
+pciclrioe(Pcidev* p)
+{
+ p->pcr &= ~IOen;
+ pcicfgw16(p, PciPCR, p->pcr);
+}
+
+void
+pcisetbme(Pcidev* p)
+{
+ p->pcr |= MASen;
+ pcicfgw16(p, PciPCR, p->pcr);
+}
+
+void
+pciclrbme(Pcidev* p)
+{
+ p->pcr &= ~MASen;
+ pcicfgw16(p, PciPCR, p->pcr);
+}
+
+void
+pcisetmwi(Pcidev* p)
+{
+ p->pcr |= MemWrInv;
+ pcicfgw16(p, PciPCR, p->pcr);
+}
+
+void
+pciclrmwi(Pcidev* p)
+{
+ p->pcr &= ~MemWrInv;
+ pcicfgw16(p, PciPCR, p->pcr);
+}
+
+static int
+pcigetpmrb(Pcidev* p)
+{
+ int ptr;
+
+ if(p->pmrb != 0)
+ return p->pmrb;
+ p->pmrb = -1;
+
+ /*
+ * If there are no extended capabilities implemented,
+ * (bit 4 in the status register) assume there's no standard
+ * power management method.
+ * Find the capabilities pointer based on PCI header type.
+ */
+ if(!(pcicfgr16(p, PciPSR) & 0x0010))
+ return -1;
+ switch(pcicfgr8(p, PciHDT)){
+ default:
+ return -1;
+ case 0: /* all other */
+ case 1: /* PCI to PCI bridge */
+ ptr = 0x34;
+ break;
+ case 2: /* CardBus bridge */
+ ptr = 0x14;
+ break;
+ }
+ ptr = pcicfgr32(p, ptr);
+
+ while(ptr != 0){
+ /*
+ * Check for validity.
+ * Can't be in standard header and must be double
+ * word aligned.
+ */
+ if(ptr < 0x40 || (ptr & ~0xFC))
+ return -1;
+ if(pcicfgr8(p, ptr) == 0x01){
+ p->pmrb = ptr;
+ return ptr;
+ }
+
+ ptr = pcicfgr8(p, ptr+1);
+ }
+
+ return -1;
+}
+
+int
+pcigetpms(Pcidev* p)
+{
+ int pmcsr, ptr;
+
+ if((ptr = pcigetpmrb(p)) == -1)
+ return -1;
+
+ /*
+ * Power Management Register Block:
+ * offset 0: Capability ID
+ * 1: next item pointer
+ * 2: capabilities
+ * 4: control/status
+ * 6: bridge support extensions
+ * 7: data
+ */
+ pmcsr = pcicfgr16(p, ptr+4);
+
+ return pmcsr & 0x0003;
+}
+
+int
+pcisetpms(Pcidev* p, int state)
+{
+ int ostate, pmc, pmcsr, ptr;
+
+ if((ptr = pcigetpmrb(p)) == -1)
+ return -1;
+
+ pmc = pcicfgr16(p, ptr+2);
+ pmcsr = pcicfgr16(p, ptr+4);
+ ostate = pmcsr & 0x0003;
+ pmcsr &= ~0x0003;
+
+ switch(state){
+ default:
+ return -1;
+ case 0:
+ break;
+ case 1:
+ if(!(pmc & 0x0200))
+ return -1;
+ break;
+ case 2:
+ if(!(pmc & 0x0400))
+ return -1;
+ break;
+ case 3:
+ break;
+ }
+ pmcsr |= state;
+ pcicfgw16(p, ptr+4, pmcsr);
+
+ return ostate;
+}
diff --git a/sys/src/9/teg2/rebootcode.s b/sys/src/9/teg2/rebootcode.s
new file mode 100644
index 000000000..c806009b7
--- /dev/null
+++ b/sys/src/9/teg2/rebootcode.s
@@ -0,0 +1,208 @@
+/*
+ * arm v7 reboot code
+ *
+ * must fit in 11K to avoid stepping on PTEs; see mem.h.
+ * cache parameters are at CACHECONF.
+ */
+#include "arm.s"
+
+/*
+ * All caches but L1 should be off before calling this.
+ * Turn off MMU, then copy the new kernel to its correct location
+ * in physical memory. Then jump to the start of the kernel.
+ */
+
+/* main(PADDR(entry), PADDR(code), size); */
+TEXT main(SB), 1, $-4
+ MOVW $setR12(SB), R12
+ MOVW R0, p1+0(FP) /* destination, passed in R0 */
+ CPSID /* splhi */
+
+PUTC('R')
+ BL cachesoff(SB)
+ /* now back in 29- or 26-bit addressing, mainly for SB */
+ /* double mapping of PHYSDRAM & KZERO now in effect */
+
+PUTC('e')
+ /* before turning MMU off, switch to PHYSDRAM-based addresses */
+ DMB
+
+ MOVW $KSEGM, R7 /* clear segment bits */
+ MOVW $PHYSDRAM, R0 /* set dram base bits */
+ BIC R7, R12 /* adjust SB */
+ ORR R0, R12
+
+ BL _r15warp(SB)
+ /* don't care about saving R14; we're not returning */
+
+ /*
+ * now running in PHYSDRAM segment, not KZERO.
+ */
+
+PUTC('b')
+ /* invalidate mmu mappings */
+ MOVW $KZERO, R0 /* some valid virtual address */
+ MTCP CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinv
+ BARRIERS
+
+PUTC('o')
+ /*
+ * turn the MMU off
+ */
+ MFCP CpSC, 0, R0, C(CpCONTROL), C(0)
+ BIC $CpCmmu, R0
+ MTCP CpSC, 0, R0, C(CpCONTROL), C(0)
+ BARRIERS
+
+PUTC('o')
+ /* copy in arguments from stack frame before moving stack */
+ MOVW p2+4(FP), R4 /* phys source */
+ MOVW n+8(FP), R5 /* byte count */
+ MOVW p1+0(FP), R6 /* phys destination */
+
+ /* set up a new stack for local vars and memmove args */
+ MOVW R6, SP /* tiny trampoline stack */
+ SUB $(0x20 + 4), SP /* back up before a.out header */
+
+// MOVW R14, -48(SP) /* store return addr */
+ SUB $48, SP /* allocate stack frame */
+
+ MOVW R5, 40(SP) /* save count */
+ MOVW R6, 44(SP) /* save dest/entry */
+
+ /* copy the new kernel into place */
+ DELAY(printloop2, 2)
+PUTC('t')
+ MOVW 40(SP), R5 /* restore count */
+ MOVW 44(SP), R6 /* restore dest/entry */
+ MOVW R6, 0(SP) /* normally saved LR goes here */
+ MOVW R6, 4(SP) /* push dest */
+ MOVW R6, R0
+ MOVW R4, 8(SP) /* push src */
+ MOVW R5, 12(SP) /* push size */
+ BL memmove(SB)
+
+PUTC('-')
+PUTC('>')
+ DELAY(printloopret, 1)
+PUTC('\r')
+ DELAY(printloopnl, 1)
+PUTC('\n')
+/*
+ * jump to kernel entry point. Note the true kernel entry point is
+ * the virtual address KZERO|R6, but this must wait until
+ * the MMU is enabled by the kernel in l.s
+ */
+ MOVW 44(SP), R6 /* restore R6 (dest/entry) */
+ ORR R6, R6 /* NOP: avoid link bug */
+ B (R6)
+PUTC('?')
+PUTC('?')
+ B 0(PC)
+
+/*
+ * turn the caches off, double map PHYSDRAM & KZERO, invalidate TLBs, revert
+ * to tiny addresses. upon return, it will be safe to turn off the mmu.
+ */
+TEXT cachesoff(SB), 1, $-4
+ MOVM.DB.W [R14,R1-R10], (R13) /* save regs on stack */
+ CPSID
+ BARRIERS
+
+ SUB $12, SP /* paranoia */
+ BL cacheuwbinv(SB)
+ ADD $12, SP /* paranoia */
+
+ MFCP CpSC, 0, R0, C(CpCONTROL), C(0)
+ BIC $(CpCicache|CpCdcache), R0
+ MTCP CpSC, 0, R0, C(CpCONTROL), C(0) /* caches off */
+ BARRIERS
+
+ /*
+ * caches are off
+ */
+
+ /* invalidate stale TLBs before changing them */
+ MOVW $KZERO, R0 /* some valid virtual address */
+ MTCP CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinv
+ BARRIERS
+
+ /* redo double map of PHYSDRAM, KZERO */
+ MOVW $PHYSDRAM, R3
+ CMP $KZERO, R3
+ BEQ noun2map
+ MOVW $(L1+L1X(PHYSDRAM)), R4 /* address of PHYSDRAM's PTE */
+ MOVW $PTEDRAM, R2 /* PTE bits */
+ MOVW $DOUBLEMAPMBS, R5
+_ptrdbl:
+ ORR R3, R2, R1 /* first identity-map 0 to 0, etc. */
+ MOVW R1, (R4)
+ ADD $4, R4 /* bump PTE address */
+ ADD $MiB, R3 /* bump pa */
+ SUB.S $1, R5
+ BNE _ptrdbl
+noun2map:
+
+ /*
+ * flush stale TLB entries
+ */
+
+ BARRIERS
+ MOVW $KZERO, R0 /* some valid virtual address */
+ MTCP CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinv
+ BARRIERS
+
+ /* switch back to PHYSDRAM addressing, mainly for SB */
+ MOVW $KSEGM, R7 /* clear segment bits */
+ MOVW $PHYSDRAM, R0 /* set dram base bits */
+ BIC R7, R12 /* adjust SB */
+ ORR R0, R12
+ BIC R7, SP
+ ORR R0, SP
+
+ MOVM.IA.W (R13), [R14,R1-R10] /* restore regs from stack */
+
+ MOVW $KSEGM, R0 /* clear segment bits */
+ BIC R0, R14 /* adjust link */
+ MOVW $PHYSDRAM, R0 /* set dram base bits */
+ ORR R0, R14
+
+ RET
+
+TEXT _r15warp(SB), 1, $-4
+ BIC R7, R14 /* link */
+ ORR R0, R14
+
+ BIC R7, R13 /* SP */
+ ORR R0, R13
+ RET
+
+TEXT panic(SB), 1, $-4 /* stub */
+PUTC('?')
+PUTC('!')
+ RET
+TEXT pczeroseg(SB), 1, $-4 /* stub */
+ RET
+
+#include "cache.v7.s"
+
+/* modifies R0, R3—R6 */
+TEXT printhex(SB), 1, $-4
+ MOVW R0, R3
+ MOVW $(32-4), R5 /* bits to shift right */
+nextdig:
+ SRA R5, R3, R4
+ AND $0xf, R4
+ ADD $'0', R4
+ CMP.S $'9', R4
+ BLE nothex /* if R4 <= 9, jump */
+ ADD $('a'-('9'+1)), R4
+nothex:
+ PUTC(R4)
+ SUB.S $4, R5
+ BGE nextdig
+
+ PUTC('\r')
+ PUTC('\n')
+ DELAY(proct, 50)
+ RET
diff --git a/sys/src/9/teg2/softfpu.c b/sys/src/9/teg2/softfpu.c
new file mode 100644
index 000000000..752391f2c
--- /dev/null
+++ b/sys/src/9/teg2/softfpu.c
@@ -0,0 +1,129 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+
+int
+fpudevprocio(Proc* proc, void* a, long n, uintptr offset, int write)
+{
+ /*
+ * Called from procdevtab.read and procdevtab.write
+ * allow user process access to the FPU registers.
+ * This is the only FPU routine which is called directly
+ * from the port code; it would be nice to have dynamic
+ * creation of entries in the device file trees...
+ */
+ USED(proc, a, n, offset, write);
+
+ return 0;
+}
+
+void
+fpunotify(Ureg*)
+{
+ /*
+ * Called when a note is about to be delivered to a
+ * user process, usually at the end of a system call.
+ * Note handlers are not allowed to use the FPU so
+ * the state is marked (after saving if necessary) and
+ * checked in the Device Not Available handler.
+ */
+}
+
+void
+fpunoted(void)
+{
+ /*
+ * Called from sysnoted() via the machine-dependent
+ * noted() routine.
+ * Clear the flag set above in fpunotify().
+ */
+}
+
+void
+fpusysrfork(Ureg*)
+{
+ /*
+ * Called early in the non-interruptible path of
+ * sysrfork() via the machine-dependent syscall() routine.
+ * Save the state so that it can be easily copied
+ * to the child process later.
+ */
+}
+
+void
+fpusysrforkchild(Proc*, Ureg *, Proc*)
+{
+ /*
+ * Called later in sysrfork() via the machine-dependent
+ * sysrforkchild() routine.
+ * Copy the parent FPU state to the child.
+ */
+}
+
+void
+fpuprocsave(Proc*)
+{
+ /*
+ * Called from sched() and sleep() via the machine-dependent
+ * procsave() routine.
+ * About to go in to the scheduler.
+ * If the process wasn't using the FPU
+ * there's nothing to do.
+ */
+}
+
+void
+fpuprocrestore(Proc*)
+{
+ /*
+ * The process has been rescheduled and is about to run.
+ * Nothing to do here right now. If the process tries to use
+ * the FPU again it will cause a Device Not Available
+ * exception and the state will then be restored.
+ */
+}
+
+void
+fpusysprocsetup(Proc*)
+{
+ /*
+ * Disable the FPU.
+ * Called from sysexec() via sysprocsetup() to
+ * set the FPU for the new process.
+ */
+}
+
+void
+fpuinit(void)
+{
+}
+
+int
+fpuemu(Ureg* ureg)
+{
+ int nfp;
+
+ if(waserror()){
+ splhi();
+ postnote(up, 1, up->errstr, NDebug);
+ return 1;
+ }
+ spllo();
+ nfp = fpiarm(ureg);
+ splhi();
+ poperror();
+
+ return nfp;
+}
+
+void
+fpon(void)
+{
+}
+
+void
+fpoff(void)
+{
+}
diff --git a/sys/src/9/teg2/syscall.c b/sys/src/9/teg2/syscall.c
new file mode 100644
index 000000000..adaa39962
--- /dev/null
+++ b/sys/src/9/teg2/syscall.c
@@ -0,0 +1,366 @@
+/* we use l1 and l2 cache ops to help stability. */
+
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "../port/systab.h"
+
+#include <tos.h>
+#include "ureg.h"
+
+#include "arm.h"
+
+enum {
+ Psrsysbits = PsrMask | PsrDfiq | PsrDirq | PsrDasabt | PsrMbz,
+};
+
+typedef struct {
+ uintptr ip;
+ Ureg* arg0;
+ char* arg1;
+ char msg[ERRMAX];
+ Ureg* old;
+ Ureg ureg;
+} NFrame;
+
+/*
+ * Return user to state before notify()
+ */
+static void
+noted(Ureg* cur, uintptr arg0)
+{
+ NFrame *nf;
+ Ureg *nur;
+
+ qlock(&up->debug);
+ if(arg0 != NRSTR && !up->notified){
+ qunlock(&up->debug);
+ pprint("call to noted() when not notified\n");
+ pexit("Suicide", 0);
+ }
+ up->notified = 0;
+ fpunoted();
+
+ nf = up->ureg;
+
+ /* sanity clause */
+ if(!okaddr(PTR2UINT(nf), sizeof(NFrame), 0)){
+ qunlock(&up->debug);
+ pprint("bad ureg in noted %#p\n", nf);
+ pexit("Suicide", 0);
+ }
+
+ /* don't let user change system flags */
+ nur = &nf->ureg;
+ nur->psr &= Psrsysbits;
+ nur->psr |= cur->psr & ~Psrsysbits;
+
+ memmove(cur, nur, sizeof(Ureg));
+
+ switch((int)arg0){
+ case NCONT:
+ case NRSTR:
+ if(!okaddr(nur->pc, BY2WD, 0) || !okaddr(nur->sp, BY2WD, 0)){
+ qunlock(&up->debug);
+ pprint("suicide: trap in noted\n");
+ pexit("Suicide", 0);
+ }
+ up->ureg = nf->old;
+ qunlock(&up->debug);
+ break;
+ case NSAVE:
+ if(!okaddr(nur->pc, BY2WD, 0) || !okaddr(nur->sp, BY2WD, 0)){
+ qunlock(&up->debug);
+ pprint("suicide: trap in noted\n");
+ pexit("Suicide", 0);
+ }
+ qunlock(&up->debug);
+
+ splhi();
+ nf->arg1 = nf->msg;
+ nf->arg0 = &nf->ureg;
+ nf->ip = 0;
+ cur->sp = PTR2UINT(nf);
+ break;
+ default:
+ pprint("unknown noted arg %#p\n", arg0);
+ up->lastnote.flag = NDebug;
+ /*FALLTHROUGH*/
+ case NDFLT:
+ if(up->lastnote.flag == NDebug){
+ qunlock(&up->debug);
+ pprint("suicide: %s\n", up->lastnote.msg);
+ }
+ else
+ qunlock(&up->debug);
+ pexit(up->lastnote.msg, up->lastnote.flag != NDebug);
+ }
+}
+
+/*
+ * Call user, if necessary, with note.
+ * Pass user the Ureg struct and the note on his stack.
+ */
+int
+notify(Ureg* ureg)
+{
+ int l;
+ Note *n;
+ u32int s;
+ uintptr sp;
+ NFrame *nf;
+
+ if(up->procctl)
+ procctl(up);
+ if(up->nnote == 0)
+ return 0;
+
+ fpunotify(ureg);
+
+ s = spllo();
+ qlock(&up->debug);
+
+ up->notepending = 0;
+ n = &up->note[0];
+ if(strncmp(n->msg, "sys:", 4) == 0){
+ l = strlen(n->msg);
+ if(l > ERRMAX-23) /* " pc=0x0123456789abcdef\0" */
+ l = ERRMAX-23;
+ snprint(n->msg + l, sizeof n->msg - l, " pc=%#lux", ureg->pc);
+ }
+
+ if(n->flag != NUser && (up->notified || up->notify == 0)){
+ if(n->flag == NDebug)
+ pprint("suicide: %s\n", n->msg);
+ qunlock(&up->debug);
+ pexit(n->msg, n->flag != NDebug);
+ }
+
+ if(up->notified){
+ qunlock(&up->debug);
+ splhi();
+ return 0;
+ }
+
+ if(up->notify == nil){
+ qunlock(&up->debug);
+ pexit(n->msg, n->flag != NDebug);
+ }
+ if(!okaddr(PTR2UINT(up->notify), 1, 0)){
+ pprint("suicide: notify function address %#p\n", up->notify);
+ qunlock(&up->debug);
+ pexit("Suicide", 0);
+ }
+
+ sp = ureg->sp - sizeof(NFrame);
+ if(!okaddr(sp, sizeof(NFrame), 1)){
+ qunlock(&up->debug);
+ pprint("suicide: notify stack address %#p\n", sp);
+ pexit("Suicide", 0);
+ }
+
+ nf = UINT2PTR(sp);
+ memmove(&nf->ureg, ureg, sizeof(Ureg));
+ nf->old = up->ureg;
+ up->ureg = nf;
+ memmove(nf->msg, up->note[0].msg, ERRMAX);
+ nf->arg1 = nf->msg;
+ nf->arg0 = &nf->ureg;
+ nf->ip = 0;
+
+ ureg->sp = sp;
+ ureg->pc = PTR2UINT(up->notify);
+
+ up->notified = 1;
+ up->nnote--;
+ memmove(&up->lastnote, &up->note[0], sizeof(Note));
+ memmove(&up->note[0], &up->note[1], up->nnote*sizeof(Note));
+
+ qunlock(&up->debug);
+ splx(s);
+
+ l1cache->wb(); /* is this needed? */
+ return 1;
+}
+
+void
+syscall(Ureg* ureg)
+{
+ char *e;
+ u32int s;
+ ulong sp;
+ long ret;
+ int i, scallnr;
+ vlong startns, stopns;
+
+ if(!userureg(ureg))
+ panic("syscall: from kernel: pc %#lux r14 %#lux psr %#lux",
+ ureg->pc, ureg->r14, ureg->psr);
+
+ cycles(&up->kentry);
+
+ m->syscall++;
+ up->insyscall = 1;
+ up->pc = ureg->pc;
+ up->dbgreg = ureg;
+
+ scallnr = ureg->r0;
+ up->scallnr = scallnr;
+ if(scallnr == RFORK)
+ fpusysrfork(ureg);
+ spllo();
+ sp = ureg->sp;
+
+ if(up->procctl == Proc_tracesyscall){
+ /*
+ * Redundant validaddr. Do we care?
+ * Tracing syscalls is not exactly a fast path...
+ * Beware, validaddr currently does a pexit rather
+ * than an error if there's a problem; that might
+ * change in the future.
+ */
+ if(sp < (USTKTOP-BY2PG) || sp > (USTKTOP-sizeof(Sargs)-BY2WD))
+ validaddr(sp, sizeof(Sargs)+BY2WD, 0);
+
+ syscallfmt(scallnr, ureg->pc, (va_list)(sp+BY2WD));
+ up->procctl = Proc_stopme;
+ procctl(up);
+ if (up->syscalltrace)
+ free(up->syscalltrace);
+ up->syscalltrace = nil;
+ }
+
+ up->nerrlab = 0;
+ ret = -1;
+ startns = todget(nil);
+
+ l1cache->wb(); /* system is more stable with this */
+ if(!waserror()){
+ if(scallnr >= nsyscall){
+ pprint("bad sys call number %d pc %#lux\n",
+ scallnr, ureg->pc);
+ postnote(up, 1, "sys: bad sys call", NDebug);
+ error(Ebadarg);
+ }
+
+ if(sp < (USTKTOP-BY2PG) || sp > (USTKTOP-sizeof(Sargs)-BY2WD))
+ validaddr(sp, sizeof(Sargs)+BY2WD, 0);
+
+ up->s = *((Sargs*)(sp+BY2WD));
+ up->psstate = sysctab[scallnr];
+
+ /* iprint("%s: syscall %s\n", up->text, sysctab[scallnr]?sysctab[scallnr]:"huh?"); */
+
+ ret = systab[scallnr](up->s.args);
+ poperror();
+ }else{
+ /* failure: save the error buffer for errstr */
+ e = up->syserrstr;
+ up->syserrstr = up->errstr;
+ up->errstr = e;
+ }
+ if(up->nerrlab){
+ print("bad errstack [%d]: %d extra\n", scallnr, up->nerrlab);
+ for(i = 0; i < NERR; i++)
+ print("sp=%#p pc=%#p\n",
+ up->errlab[i].sp, up->errlab[i].pc);
+ panic("error stack");
+ }
+
+ /*
+ * Put return value in frame. On the x86 the syscall is
+ * just another trap and the return value from syscall is
+ * ignored. On other machines the return value is put into
+ * the results register by caller of syscall.
+ */
+ ureg->r0 = ret;
+
+ if(up->procctl == Proc_tracesyscall){
+ stopns = todget(nil);
+ up->procctl = Proc_stopme;
+ sysretfmt(scallnr, (va_list)(sp+BY2WD), ret, startns, stopns);
+ s = splhi();
+ procctl(up);
+ splx(s);
+ if(up->syscalltrace)
+ free(up->syscalltrace);
+ up->syscalltrace = nil;
+ }
+
+ up->insyscall = 0;
+ up->psstate = 0;
+
+ if(scallnr == NOTED)
+ noted(ureg, *(ulong*)(sp+BY2WD));
+
+ splhi();
+ if(scallnr != RFORK && (up->procctl || up->nnote))
+ notify(ureg);
+
+ l1cache->wb(); /* system is more stable with this */
+
+ /* if we delayed sched because we held a lock, sched now */
+ if(up->delaysched){
+ sched();
+ splhi();
+ }
+ kexit(ureg);
+}
+
+long
+execregs(ulong entry, ulong ssize, ulong nargs)
+{
+ ulong *sp;
+ Ureg *ureg;
+
+ sp = (ulong*)(USTKTOP - ssize);
+ *--sp = nargs;
+
+ ureg = up->dbgreg;
+// memset(ureg, 0, 15*sizeof(ulong));
+ ureg->r13 = (ulong)sp;
+ ureg->pc = entry;
+//print("%lud: EXECREGS pc %#ux sp %#ux nargs %ld\n", up->pid, ureg->pc, ureg->r13, nargs);
+ allcache->wbse(ureg, sizeof *ureg); /* is this needed? */
+
+ /*
+ * return the address of kernel/user shared data
+ * (e.g. clock stuff)
+ */
+ return USTKTOP-sizeof(Tos);
+}
+
+void
+sysprocsetup(Proc* p)
+{
+ fpusysprocsetup(p);
+}
+
+/*
+ * Craft a return frame which will cause the child to pop out of
+ * the scheduler in user mode with the return register zero. Set
+ * pc to point to a l.s return function.
+ */
+void
+forkchild(Proc *p, Ureg *ureg)
+{
+ Ureg *cureg;
+
+ p->sched.sp = (ulong)p->kstack+KSTACK-sizeof(Ureg);
+ p->sched.pc = (ulong)forkret;
+
+ cureg = (Ureg*)(p->sched.sp);
+ memmove(cureg, ureg, sizeof(Ureg));
+
+ /* syscall returns 0 for child */
+ cureg->r0 = 0;
+
+ /* Things from bottom of syscall which were never executed */
+ p->psstate = 0;
+ p->insyscall = 0;
+
+ fpusysrforkchild(p, cureg, up);
+}
diff --git a/sys/src/9/teg2/trap.c b/sys/src/9/teg2/trap.c
new file mode 100644
index 000000000..35ab7dc04
--- /dev/null
+++ b/sys/src/9/teg2/trap.c
@@ -0,0 +1,1083 @@
+/*
+ * arm mpcore generic interrupt controller (gic) v1
+ * traps, exceptions, interrupts, system calls.
+ *
+ * there are two pieces: the interrupt distributor and the cpu interface.
+ *
+ * memset or memmove on any of the distributor registers generates an
+ * exception like this one:
+ * panic: external abort 0x28 pc 0xc048bf68 addr 0x50041800
+ *
+ * we use l1 and l2 cache ops to force vectors to be visible everywhere.
+ *
+ * apparently irqs 0—15 (SGIs) are always enabled.
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ureg.h"
+#include "arm.h"
+
+#define ISSGI(irq) ((uint)(irq) < Nsgi)
+
+enum {
+ Debug = 0,
+
+ Nvec = 8, /* # of vectors at start of lexception.s */
+ Bi2long = BI2BY * sizeof(long),
+ Nirqs = 1024,
+ Nsgi = 16, /* software-generated (inter-processor) intrs */
+ Nppi = 32, /* sgis + other private peripheral intrs */
+};
+
+typedef struct Intrcpuregs Intrcpuregs;
+typedef struct Intrdistregs Intrdistregs;
+
+/*
+ * almost this entire register set is buggered.
+ * the distributor is supposed to be per-system, not per-cpu,
+ * yet some registers are banked per-cpu, as marked.
+ */
+struct Intrdistregs { /* distributor */
+ ulong ctl;
+ ulong ctlrtype;
+ ulong distid;
+ uchar _pad0[0x80 - 0xc];
+
+ /* botch: *[0] are banked per-cpu from here */
+ /* bit maps */
+ ulong grp[32]; /* in group 1 (non-secure) */
+ ulong setena[32]; /* forward to cpu interfaces */
+ ulong clrena[32];
+ ulong setpend[32];
+ ulong clrpend[32];
+ ulong setact[32]; /* active? */
+ ulong clract[32];
+ /* botch: *[0] are banked per-cpu until here */
+
+ uchar pri[1020]; /* botch: pri[0] — pri[7] are banked per-cpu */
+ ulong _rsrvd1;
+ /* botch: targ[0] through targ[7] are banked per-cpu and RO */
+ uchar targ[1020]; /* byte bit maps: cpu targets indexed by intr */
+ ulong _rsrvd2;
+ /* botch: cfg[1] is banked per-cpu */
+ ulong cfg[64]; /* bit pairs: edge? 1-N? */
+ ulong _pad1[64];
+ ulong nsac[64]; /* bit pairs (v2 only) */
+
+ /* software-generated intrs (a.k.a. sgi) */
+ ulong swgen; /* intr targets */
+ uchar _pad2[0xf10 - 0xf04];
+ uchar clrsgipend[16]; /* bit map (v2 only) */
+ uchar setsgipend[16]; /* bit map (v2 only) */
+};
+
+enum {
+ /* ctl bits */
+ Forw2cpuif = 1,
+
+ /* ctlrtype bits */
+ Cpunoshft = 5,
+ Cpunomask = MASK(3),
+ Intrlines = MASK(5),
+
+ /* cfg bits */
+ Level = 0<<1,
+ Edge = 1<<1, /* edge-, not level-sensitive */
+ Toall = 0<<0,
+ To1 = 1<<0, /* vs. to all */
+
+ /* swgen bits */
+ Totargets = 0,
+ Tonotme = 1<<24,
+ Tome = 2<<24,
+};
+
+/* each cpu sees its own registers at the same base address (soc.intr) */
+struct Intrcpuregs {
+ ulong ctl;
+ ulong primask;
+
+ ulong binpt; /* group pri vs subpri split */
+ ulong ack;
+ ulong end;
+ ulong runpri;
+ ulong hipripend;
+
+ /* aliased regs (secure, for group 1) */
+ ulong alibinpt;
+ ulong aliack; /* (v2 only) */
+ ulong aliend; /* (v2 only) */
+ ulong alihipripend; /* (v2 only) */
+
+ uchar _pad0[0xd0 - 0x2c];
+ ulong actpri[4]; /* (v2 only) */
+ ulong nsactpri[4]; /* (v2 only) */
+
+ uchar _pad0[0xfc - 0xf0];
+ ulong ifid; /* ro */
+
+ uchar _pad0[0x1000 - 0x100];
+ ulong deact; /* wo (v2 only) */
+};
+
+enum {
+ /* ctl bits */
+ Enable = 1,
+ Eoinodeact = 1<<9, /* (v2 only) */
+
+ /* (ali) ack/end/hipriend/deact bits */
+ Intrmask = MASK(10),
+ Cpuidshift = 10,
+ Cpuidmask = MASK(3),
+
+ /* ifid bits */
+ Archversshift = 16,
+ Archversmask = MASK(4),
+};
+
+typedef struct Vctl Vctl;
+typedef struct Vctl {
+ Vctl* next; /* handlers on this vector */
+ char *name; /* of driver, xallocated */
+ void (*f)(Ureg*, void*); /* handler to call */
+ void* a; /* argument to call it with */
+} Vctl;
+
+static Lock vctllock;
+static Vctl* vctl[Nirqs];
+
+/*
+ * Layout at virtual address 0.
+ */
+typedef struct Vpage0 {
+ void (*vectors[Nvec])(void);
+ u32int vtable[Nvec];
+} Vpage0;
+
+enum
+{
+ Ntimevec = 20 /* number of time buckets for each intr */
+};
+ulong intrtimes[Nirqs][Ntimevec];
+
+uvlong ninterrupt;
+uvlong ninterruptticks;
+int irqtooearly = 1;
+
+static ulong shadena[32]; /* copy of enable bits, saved by intcmaskall */
+static Lock distlock, nintrlock;
+
+extern int notify(Ureg*);
+
+static void dumpstackwithureg(Ureg *ureg);
+
+void
+printrs(int base, ulong word)
+{
+ int bit;
+
+ for (bit = 0; word; bit++, word >>= 1)
+ if (word & 1)
+ iprint(" %d", base + bit);
+}
+
+void
+dumpintrs(char *what, ulong *bits)
+{
+ int i, first, some;
+ ulong word;
+ Intrdistregs *idp = (Intrdistregs *)soc.intrdist;
+
+ first = 1;
+ some = 0;
+ USED(idp);
+ for (i = 0; i < nelem(idp->setpend); i++) {
+ word = bits[i];
+ if (word) {
+ if (first) {
+ first = 0;
+ iprint("%s", what);
+ }
+ some = 1;
+ printrs(i * Bi2long, word);
+ }
+ }
+ if (!some)
+ iprint("%s none", what);
+ iprint("\n");
+}
+
+void
+dumpintrpend(void)
+{
+ Intrdistregs *idp = (Intrdistregs *)soc.intrdist;
+
+ iprint("\ncpu%d gic regs:\n", m->machno);
+ dumpintrs("group 1", idp->grp);
+ dumpintrs("enabled", idp->setena);
+ dumpintrs("pending", idp->setpend);
+ dumpintrs("active ", idp->setact);
+}
+
+/*
+ * keep histogram of interrupt service times
+ */
+void
+intrtime(Mach*, int vno)
+{
+ ulong diff;
+ ulong x;
+
+ x = perfticks();
+ diff = x - m->perf.intrts;
+ m->perf.intrts = x;
+
+ m->perf.inintr += diff;
+ if(up == nil && m->perf.inidle > diff)
+ m->perf.inidle -= diff;
+
+ if (m->cpumhz == 0)
+ return; /* don't divide by zero */
+ diff /= m->cpumhz*100; /* quantum = 100µsec */
+ if(diff >= Ntimevec)
+ diff = Ntimevec-1;
+ if ((uint)vno >= Nirqs)
+ vno = Nirqs-1;
+ intrtimes[vno][diff]++;
+}
+
+static ulong
+intack(Intrcpuregs *icp)
+{
+ return icp->ack & Intrmask;
+}
+
+static void
+intdismiss(Intrcpuregs *icp, ulong ack)
+{
+ icp->end = ack;
+ coherence();
+}
+
+static int
+irqinuse(uint irq)
+{
+ Intrdistregs *idp = (Intrdistregs *)soc.intrdist;
+
+ return idp->setena[irq / Bi2long] & (1 << (irq % Bi2long));
+}
+
+void
+intcunmask(uint irq)
+{
+ Intrdistregs *idp = (Intrdistregs *)soc.intrdist;
+
+ ilock(&distlock);
+ idp->setena[irq / Bi2long] = 1 << (irq % Bi2long);
+ iunlock(&distlock);
+}
+
+void
+intcmask(uint irq)
+{
+ Intrdistregs *idp = (Intrdistregs *)soc.intrdist;
+
+ ilock(&distlock);
+ idp->clrena[irq / Bi2long] = 1 << (irq % Bi2long);
+ iunlock(&distlock);
+}
+
+static void
+intcmaskall(Intrdistregs *idp) /* mask all intrs for all cpus */
+{
+ int i;
+
+ for (i = 0; i < nelem(idp->setena); i++)
+ shadena[i] = idp->setena[i];
+ for (i = 0; i < nelem(idp->clrena); i++)
+ idp->clrena[i] = ~0;
+ coherence();
+}
+
+static void
+intcunmaskall(Intrdistregs *idp) /* unused */
+{
+ int i;
+
+ for (i = 0; i < nelem(idp->setena); i++)
+ idp->setena[i] = shadena[i];
+ coherence();
+}
+
+static ulong
+permintrs(Intrdistregs *idp, int base, int r)
+{
+ ulong perms;
+
+ idp->clrena[r] = ~0; /* disable all */
+ coherence();
+ perms = idp->clrena[r];
+ if (perms) {
+ iprint("perm intrs:");
+ printrs(base, perms);
+ iprint("\n");
+ }
+ return perms;
+}
+
+static void
+intrcfg(Intrdistregs *idp)
+{
+ int i, cpumask;
+ ulong pat;
+
+ /* set up all interrupts as level-sensitive, to one cpu (0) */
+ pat = 0;
+ for (i = 0; i < Bi2long; i += 2)
+ pat |= (Level | To1) << i;
+
+ if (m->machno == 0) { /* system-wide & cpu0 cfg */
+ for (i = 0; i < nelem(idp->grp); i++)
+ idp->grp[i] = 0; /* secure */
+ for (i = 0; i < nelem(idp->pri); i++)
+ idp->pri[i] = 0; /* highest priority */
+ /* set up all interrupts as level-sensitive, to one cpu (0) */
+ for (i = 0; i < nelem(idp->cfg); i++)
+ idp->cfg[i] = pat;
+ /* first Nppi are read-only for SGIs and PPIs */
+ cpumask = 1<<0; /* just cpu 0 */
+ navailcpus = getncpus();
+ for (i = Nppi; i < sizeof idp->targ; i++)
+ idp->targ[i] = cpumask;
+ coherence();
+
+ intcmaskall(idp);
+ for (i = 0; i < nelem(idp->clrena); i++) {
+ // permintrs(idp, i * Bi2long, i);
+ idp->clrpend[i] = idp->clract[i] = idp->clrena[i] = ~0;
+ }
+ } else { /* per-cpu config */
+ idp->grp[0] = 0; /* secure */
+ for (i = 0; i < 8; i++)
+ idp->pri[i] = 0; /* highest priority */
+ /* idp->targ[0 through Nppi-1] are supposed to be read-only */
+ for (i = 0; i < Nppi; i++)
+ idp->targ[i] = 1<<m->machno;
+ idp->cfg[1] = pat;
+ coherence();
+
+ // permintrs(idp, i * Bi2long, i);
+ idp->clrpend[0] = idp->clract[0] = idp->clrena[0] = ~0;
+ /* on cpu1, irq Extpmuirq (118) is always pending here */
+ }
+ coherence();
+}
+
+void
+intrto(int cpu, int irq)
+{
+ Intrdistregs *idp = (Intrdistregs *)soc.intrdist;
+
+ /* first Nppi are read-only for SGIs and the like */
+ ilock(&distlock);
+ idp->targ[irq] = 1 << cpu;
+ iunlock(&distlock);
+}
+
+void
+intrsto(int cpu) /* unused */
+{
+ int i;
+ Intrdistregs *idp = (Intrdistregs *)soc.intrdist;
+
+ /* first Nppi are read-only for SGIs and the like */
+ for (i = Nppi; i < sizeof idp->targ; i++)
+ intrto(cpu, i);
+ USED(idp);
+}
+
+void
+intrcpu(int cpu)
+{
+ Intrdistregs *idp = (Intrdistregs *)soc.intrdist;
+
+ ilock(&distlock);
+ idp->swgen = Totargets | 1 << (cpu + 16) | m->machno;
+ iunlock(&distlock);
+}
+
+/*
+ * set up for exceptions
+ */
+void
+trapinit(void)
+{
+ int s;
+ Intrdistregs *idp = (Intrdistregs *)soc.intrdist;
+ Intrcpuregs *icp = (Intrcpuregs *)soc.intr;
+ Vpage0 *vpage0;
+ enum { Vecsize = sizeof vpage0->vectors + sizeof vpage0->vtable, };
+
+ /*
+ * set up the exception vectors, high and low.
+ *
+ * we can't use cache ops on HVECTORS address, since they
+ * work on virtual addresses, and only those that have a
+ * physical address == PADDR(virtual).
+ */
+ if (m->machno == 0) {
+ vpage0 = (Vpage0*)HVECTORS;
+ memmove(vpage0->vectors, vectors, sizeof(vpage0->vectors));
+ memmove(vpage0->vtable, vtable, sizeof(vpage0->vtable));
+
+ vpage0 = (Vpage0*)KADDR(0);
+ memmove(vpage0->vectors, vectors, sizeof(vpage0->vectors));
+ memmove(vpage0->vtable, vtable, sizeof(vpage0->vtable));
+
+ allcache->wbse(vpage0, Vecsize);
+ cacheiinv();
+ }
+
+ /*
+ * set up the stack pointers for the exception modes for this cpu.
+ * they point to small `save areas' in Mach, not actual stacks.
+ */
+ s = splhi(); /* make these modes ignore intrs too */
+ setr13(PsrMfiq, m->sfiq);
+ setr13(PsrMirq, m->sirq);
+ setr13(PsrMmon, m->smon);
+ setr13(PsrMabt, m->sabt);
+ setr13(PsrMund, m->sund);
+ setr13(PsrMsys, m->ssys);
+ splx(s);
+
+ assert((idp->distid & MASK(12)) == 0x43b); /* made by arm */
+ assert((icp->ifid & MASK(12)) == 0x43b); /* made by arm */
+
+ ilock(&distlock);
+ idp->ctl = 0;
+ icp->ctl = 0;
+ coherence();
+
+ intrcfg(idp); /* some per-cpu cfg here */
+
+ icp->ctl = Enable;
+ icp->primask = (uchar)~0; /* let all priorities through */
+ coherence();
+
+ idp->ctl = Forw2cpuif;
+ iunlock(&distlock);
+}
+
+void
+intrsoff(void)
+{
+ ilock(&distlock);
+ intcmaskall((Intrdistregs *)soc.intrdist);
+ iunlock(&distlock);
+}
+
+void
+intrcpushutdown(void)
+{
+ Intrcpuregs *icp = (Intrcpuregs *)soc.intr;
+
+ icp->ctl = 0;
+ icp->primask = 0; /* let no priorities through */
+ coherence();
+}
+
+/* called from cpu0 after other cpus are shutdown */
+void
+intrshutdown(void)
+{
+ Intrdistregs *idp = (Intrdistregs *)soc.intrdist;
+
+ intrsoff();
+ idp->ctl = 0;
+ intrcpushutdown();
+}
+
+/*
+ * enable an irq interrupt
+ * note that the same private interrupt may be enabled on multiple cpus
+ */
+int
+irqenable(uint irq, void (*f)(Ureg*, void*), void* a, char *name)
+{
+ Vctl *v;
+
+ if(irq >= nelem(vctl))
+ panic("irqenable irq %d", irq);
+
+ if (irqtooearly) {
+ iprint("irqenable for %d %s called too early\n", irq, name);
+ return -1;
+ }
+ /*
+ * if in use, could be a private interrupt on a secondary cpu,
+ * so don't add anything to the vector chain. irqs should
+ * otherwise be one-to-one with devices.
+ */
+ if(!ISSGI(irq) && irqinuse(irq)) {
+ lock(&vctllock);
+ if (vctl[irq] == nil) {
+ dumpintrpend();
+ panic("non-sgi irq %d in use yet no Vctl allocated", irq);
+ }
+ unlock(&vctllock);
+ }
+ /* could be 1st use of this irq or could be an sgi (always in use) */
+ else if (vctl[irq] == nil) {
+ v = malloc(sizeof(Vctl));
+ if (v == nil)
+ panic("irqenable: malloc Vctl");
+ v->f = f;
+ v->a = a;
+ v->name = malloc(strlen(name)+1);
+ if (v->name == nil)
+ panic("irqenable: malloc name");
+ strcpy(v->name, name);
+
+ lock(&vctllock);
+ if (vctl[irq] != nil) {
+ /* allocation race: someone else did it first */
+ free(v->name);
+ free(v);
+ } else {
+ v->next = vctl[irq];
+ vctl[irq] = v;
+ }
+ unlock(&vctllock);
+ }
+ intcunmask(irq);
+ return 0;
+}
+
+/*
+ * disable an irq interrupt
+ */
+int
+irqdisable(uint irq, void (*f)(Ureg*, void*), void* a, char *name)
+{
+ Vctl **vp, *v;
+
+ if(irq >= nelem(vctl))
+ panic("irqdisable irq %d", irq);
+
+ lock(&vctllock);
+ for(vp = &vctl[irq]; v = *vp; vp = &v->next)
+ if (v->f == f && v->a == a && strcmp(v->name, name) == 0){
+ print("irqdisable: remove %s\n", name);
+ *vp = v->next;
+ free(v->name);
+ free(v);
+ break;
+ }
+
+ if(v == nil)
+ print("irqdisable: irq %d, name %s not enabled\n", irq, name);
+ if(vctl[irq] == nil){
+ print("irqdisable: clear icmr bit %d\n", irq);
+ intcmask(irq);
+ }
+ unlock(&vctllock);
+
+ return 0;
+}
+
+/*
+ * called by trap to handle access faults
+ */
+static void
+faultarm(Ureg *ureg, uintptr va, int user, int read)
+{
+ int n, insyscall;
+
+ if(up == nil) {
+ dumpstackwithureg(ureg);
+ panic("faultarm: cpu%d: nil up, %sing %#p at %#p",
+ m->machno, (read? "read": "writ"), va, ureg->pc);
+ }
+ insyscall = up->insyscall;
+ up->insyscall = 1;
+
+ n = fault(va, read); /* goes spllo */
+ splhi();
+ if(n < 0){
+ char buf[ERRMAX];
+
+ if(!user){
+ dumpstackwithureg(ureg);
+ panic("fault: cpu%d: kernel %sing %#p at %#p",
+ m->machno, read? "read": "writ", va, ureg->pc);
+ }
+ /* don't dump registers; programs suicide all the time */
+ snprint(buf, sizeof buf, "sys: trap: fault %s va=%#p",
+ read? "read": "write", va);
+ postnote(up, 1, buf, NDebug);
+ }
+ up->insyscall = insyscall;
+}
+
+/*
+ * called by trap to handle interrupts.
+ * returns true iff a clock interrupt, thus maybe reschedule.
+ */
+static int
+irq(Ureg* ureg)
+{
+ int clockintr, ack;
+ uint irqno, handled, t, ticks;
+ Intrcpuregs *icp = (Intrcpuregs *)soc.intr;
+ Vctl *v;
+
+ ticks = perfticks();
+ handled = 0;
+ ack = intack(icp);
+ irqno = ack & Intrmask;
+
+ if (irqno >= nelem(vctl)) {
+ iprint("trap: irq %d >= # vectors (%d)\n", irqno, nelem(vctl));
+ intdismiss(icp, ack);
+ return 0;
+ }
+
+ if (irqno == Loctmrirq) /* this is a clock intr? */
+ m->inclockintr++; /* yes, count nesting */
+ if(m->machno && m->inclockintr > 1) {
+ iprint("cpu%d: nested clock intrs\n", m->machno);
+ m->inclockintr--;
+ intdismiss(icp, ack);
+ return 0;
+ }
+
+ for(v = vctl[irqno]; v != nil; v = v->next)
+ if (v->f) {
+ if (islo())
+ panic("trap: pl0 before trap handler for %s",
+ v->name);
+ v->f(ureg, v->a);
+ if (islo())
+ panic("trap: %s lowered pl", v->name);
+// splhi(); /* in case v->f lowered pl */
+ handled++;
+ }
+ if(!handled)
+ if (irqno >= 1022)
+ iprint("cpu%d: ignoring spurious interrupt\n", m->machno);
+ else {
+ intcmask(irqno);
+ iprint("cpu%d: unexpected interrupt %d, now masked\n",
+ m->machno, irqno);
+ }
+ t = perfticks();
+ if (0) { /* left over from another port? */
+ ilock(&nintrlock);
+ ninterrupt++;
+ if(t < ticks)
+ ninterruptticks += ticks-t;
+ else
+ ninterruptticks += t-ticks;
+ iunlock(&nintrlock);
+ }
+ USED(t, ticks);
+ clockintr = m->inclockintr == 1;
+ if (irqno == Loctmrirq)
+ m->inclockintr--;
+
+ intdismiss(icp, ack);
+ intrtime(m, irqno);
+ return clockintr;
+}
+
+/*
+ * returns 1 if the instruction writes memory, 0 otherwise
+ */
+int
+writetomem(ulong inst)
+{
+ /* swap always write memory */
+ if((inst & 0x0FC00000) == 0x01000000)
+ return 1;
+
+ /* loads and stores are distinguished by bit 20 */
+ if(inst & (1<<20))
+ return 0;
+
+ return 1;
+}
+
+static void
+datafault(Ureg *ureg, int user)
+{
+ int x;
+ ulong inst, fsr;
+ uintptr va;
+
+ va = farget();
+
+ if (m->probing && !user) {
+ if (m->trapped++ > 0) {
+ dumpstackwithureg(ureg);
+ panic("trap: recursive probe %#lux", va);
+ }
+ ureg->pc += 4; /* continue after faulting instr'n */
+ return;
+ }
+
+ inst = *(ulong*)(ureg->pc);
+ /* bits 12 and 10 have to be concatenated with status */
+ x = fsrget();
+ fsr = (x>>7) & 0x20 | (x>>6) & 0x10 | x & 0xf;
+ switch(fsr){
+ default:
+ case 0xa: /* ? was under external abort */
+ panic("unknown data fault, 6b fsr %#lux", fsr);
+ break;
+ case 0x0:
+ panic("vector exception at %#lux", ureg->pc);
+ break;
+ case 0x1: /* alignment fault */
+ case 0x3: /* access flag fault (section) */
+ if(user){
+ char buf[ERRMAX];
+
+ snprint(buf, sizeof buf,
+ "sys: alignment: pc %#lux va %#p\n",
+ ureg->pc, va);
+ postnote(up, 1, buf, NDebug);
+ } else {
+ dumpstackwithureg(ureg);
+ panic("kernel alignment: pc %#lux va %#p", ureg->pc, va);
+ }
+ break;
+ case 0x2:
+ panic("terminal exception at %#lux", ureg->pc);
+ break;
+ case 0x4: /* icache maint fault */
+ case 0x6: /* access flag fault (page) */
+ case 0x8: /* precise external abort, non-xlat'n */
+ case 0x28:
+ case 0x16: /* imprecise ext. abort, non-xlt'n */
+ case 0x36:
+ panic("external non-translation abort %#lux pc %#lux addr %#p",
+ fsr, ureg->pc, va);
+ break;
+ case 0xc: /* l1 translation, precise ext. abort */
+ case 0x2c:
+ case 0xe: /* l2 translation, precise ext. abort */
+ case 0x2e:
+ panic("external translation abort %#lux pc %#lux addr %#p",
+ fsr, ureg->pc, va);
+ break;
+ case 0x1c: /* l1 translation, precise parity err */
+ case 0x1e: /* l2 translation, precise parity err */
+ case 0x18: /* imprecise parity or ecc err */
+ panic("translation parity error %#lux pc %#lux addr %#p",
+ fsr, ureg->pc, va);
+ break;
+ case 0x5: /* translation fault, no section entry */
+ case 0x7: /* translation fault, no page entry */
+ faultarm(ureg, va, user, !writetomem(inst));
+ break;
+ case 0x9:
+ case 0xb:
+ /* domain fault, accessing something we shouldn't */
+ if(user){
+ char buf[ERRMAX];
+
+ snprint(buf, sizeof buf,
+ "sys: access violation: pc %#lux va %#p\n",
+ ureg->pc, va);
+ postnote(up, 1, buf, NDebug);
+ } else
+ panic("kernel access violation: pc %#lux va %#p",
+ ureg->pc, va);
+ break;
+ case 0xd:
+ case 0xf:
+ /* permission error, copy on write or real permission error */
+ faultarm(ureg, va, user, !writetomem(inst));
+ break;
+ }
+}
+
+/*
+ * here on all exceptions other than syscall (SWI) and reset
+ */
+void
+trap(Ureg *ureg)
+{
+ int clockintr, user, rem;
+ uintptr va, ifar, ifsr;
+
+ splhi(); /* paranoia */
+ if(up != nil)
+ rem = ((char*)ureg)-up->kstack;
+ else
+ rem = ((char*)ureg)-((char*)m+sizeof(Mach));
+ if(rem < 1024) {
+ iprint("trap: %d stack bytes left, up %#p ureg %#p m %#p cpu%d at pc %#lux\n",
+ rem, up, ureg, m, m->machno, ureg->pc);
+ dumpstackwithureg(ureg);
+ panic("trap: %d stack bytes left, up %#p ureg %#p at pc %#lux",
+ rem, up, ureg, ureg->pc);
+ }
+
+ m->perf.intrts = perfticks();
+ user = (ureg->psr & PsrMask) == PsrMusr;
+ if(user){
+ up->dbgreg = ureg;
+ cycles(&up->kentry);
+ }
+
+ /*
+ * All interrupts/exceptions should be resumed at ureg->pc-4,
+ * except for Data Abort which resumes at ureg->pc-8.
+ */
+ if(ureg->type == (PsrMabt+1))
+ ureg->pc -= 8;
+ else
+ ureg->pc -= 4;
+
+ clockintr = 0; /* if set, may call sched() before return */
+ switch(ureg->type){
+ default:
+ panic("unknown trap; type %#lux, psr mode %#lux", ureg->type,
+ ureg->psr & PsrMask);
+ break;
+ case PsrMirq:
+ m->intr++;
+ clockintr = irq(ureg);
+ if(0 && up && !clockintr)
+ preempted(); /* this causes spurious suicides */
+ break;
+ case PsrMabt: /* prefetch (instruction) fault */
+ va = ureg->pc;
+ ifsr = cprdsc(0, CpFSR, 0, CpIFSR);
+ ifsr = (ifsr>>7) & 0x8 | ifsr & 0x7;
+ switch(ifsr){
+ case 0x02: /* instruction debug event (BKPT) */
+ if(user)
+ postnote(up, 1, "sys: breakpoint", NDebug);
+ else{
+ iprint("kernel bkpt: pc %#lux inst %#ux\n",
+ va, *(u32int*)va);
+ panic("kernel bkpt");
+ }
+ break;
+ default:
+ ifar = cprdsc(0, CpFAR, 0, CpIFAR);
+ if (va != ifar)
+ iprint("trap: cpu%d: i-fault va %#p != ifar %#p\n",
+ m->machno, va, ifar);
+ faultarm(ureg, va, user, 1);
+ break;
+ }
+ break;
+ case PsrMabt+1: /* data fault */
+ datafault(ureg, user);
+ break;
+ case PsrMund: /* undefined instruction */
+ if(!user) {
+ if (ureg->pc & 3) {
+ iprint("rounding fault pc %#lux down to word\n",
+ ureg->pc);
+ ureg->pc &= ~3;
+ }
+ if (Debug)
+ iprint("mathemu: cpu%d fpon %d instr %#8.8lux at %#p\n",
+ m->machno, m->fpon, *(ulong *)ureg->pc,
+ ureg->pc);
+ dumpstackwithureg(ureg);
+ panic("cpu%d: undefined instruction: pc %#lux inst %#ux",
+ m->machno, ureg->pc, ((u32int*)ureg->pc)[0]);
+ } else if(seg(up, ureg->pc, 0) != nil &&
+ *(u32int*)ureg->pc == 0xD1200070)
+ postnote(up, 1, "sys: breakpoint", NDebug);
+ else if(fpuemu(ureg) == 0){ /* didn't find any FP instrs? */
+ char buf[ERRMAX];
+
+ snprint(buf, sizeof buf,
+ "undefined instruction: pc %#lux instr %#8.8lux\n",
+ ureg->pc, *(ulong *)ureg->pc);
+ postnote(up, 1, buf, NDebug);
+ }
+ break;
+ }
+ splhi();
+
+ /* delaysched set because we held a lock or because our quantum ended */
+ if(up && up->delaysched && clockintr){
+ sched(); /* can cause more traps */
+ splhi();
+ }
+
+ if(user){
+ if(up->procctl || up->nnote)
+ notify(ureg);
+ kexit(ureg);
+ }
+}
+
+/*
+ * Fill in enough of Ureg to get a stack trace, and call a function.
+ * Used by debugging interface rdb.
+ */
+void
+callwithureg(void (*fn)(Ureg*))
+{
+ Ureg ureg;
+
+ memset(&ureg, 0, sizeof ureg);
+ ureg.pc = getcallerpc(&fn);
+ ureg.sp = PTR2UINT(&fn);
+ fn(&ureg);
+}
+
+static void
+dumpstackwithureg(Ureg *ureg)
+{
+ int x;
+ uintptr l, v, i, estack;
+ char *s;
+
+ dumpregs(ureg);
+ if((s = getconf("*nodumpstack")) != nil && strcmp(s, "0") != 0){
+ iprint("dumpstack disabled\n");
+ return;
+ }
+ delay(1000);
+ iprint("dumpstack\n");
+
+ x = 0;
+ x += iprint("ktrace /kernel/path %#.8lux %#.8lux %#.8lux # pc, sp, link\n",
+ ureg->pc, ureg->sp, ureg->r14);
+ delay(20);
+ i = 0;
+ if(up
+ && (uintptr)&l >= (uintptr)up->kstack
+ && (uintptr)&l <= (uintptr)up->kstack+KSTACK)
+ estack = (uintptr)up->kstack+KSTACK;
+ else if((uintptr)&l >= (uintptr)m->stack
+ && (uintptr)&l <= (uintptr)m+MACHSIZE)
+ estack = (uintptr)m+MACHSIZE;
+ else
+ return;
+ x += iprint("estackx %p\n", estack);
+
+ for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){
+ v = *(uintptr*)l;
+ if((KTZERO < v && v < (uintptr)etext) || estack-l < 32){
+ x += iprint("%.8p ", v);
+ delay(20);
+ i++;
+ }
+ if(i == 8){
+ i = 0;
+ x += iprint("\n");
+ delay(20);
+ }
+ }
+ if(i)
+ iprint("\n");
+ delay(3000);
+}
+
+void
+dumpstack(void)
+{
+ callwithureg(dumpstackwithureg);
+}
+
+/*
+ * dump system control coprocessor registers
+ */
+static void
+dumpscr(void)
+{
+ iprint("0:\t%#8.8ux id\n", cpidget());
+ iprint("\t%8.8#ux ct\n", cpctget());
+ iprint("1:\t%#8.8ux control\n", controlget());
+ iprint("2:\t%#8.8ux ttb\n", ttbget());
+ iprint("3:\t%#8.8ux dac\n", dacget());
+ iprint("4:\t(reserved)\n");
+ iprint("5:\t%#8.8ux fsr\n", fsrget());
+ iprint("6:\t%#8.8ux far\n", farget());
+ iprint("7:\twrite-only cache\n");
+ iprint("8:\twrite-only tlb\n");
+ iprint("13:\t%#8.8ux pid\n", pidget());
+ delay(10);
+}
+
+/*
+ * dump general registers
+ */
+static void
+dumpgpr(Ureg* ureg)
+{
+ if(up != nil)
+ iprint("cpu%d: registers for %s %lud\n",
+ m->machno, up->text, up->pid);
+ else
+ iprint("cpu%d: registers for kernel\n", m->machno);
+
+ delay(20);
+ iprint("%#8.8lux\tr0\n", ureg->r0);
+ iprint("%#8.8lux\tr1\n", ureg->r1);
+ iprint("%#8.8lux\tr2\n", ureg->r2);
+ delay(20);
+ iprint("%#8.8lux\tr3\n", ureg->r3);
+ iprint("%#8.8lux\tr4\n", ureg->r4);
+ iprint("%#8.8lux\tr5\n", ureg->r5);
+ delay(20);
+ iprint("%#8.8lux\tr6\n", ureg->r6);
+ iprint("%#8.8lux\tr7\n", ureg->r7);
+ iprint("%#8.8lux\tr8\n", ureg->r8);
+ delay(20);
+ iprint("%#8.8lux\tr9 (up)\n", ureg->r9);
+ iprint("%#8.8lux\tr10 (m)\n", ureg->r10);
+ iprint("%#8.8lux\tr11 (loader temporary)\n", ureg->r11);
+ iprint("%#8.8lux\tr12 (SB)\n", ureg->r12);
+ delay(20);
+ iprint("%#8.8lux\tr13 (sp)\n", ureg->r13);
+ iprint("%#8.8lux\tr14 (link)\n", ureg->r14);
+ iprint("%#8.8lux\tr15 (pc)\n", ureg->pc);
+ delay(20);
+ iprint("%10.10lud\ttype\n", ureg->type);
+ iprint("%#8.8lux\tpsr\n", ureg->psr);
+ delay(500);
+}
+
+void
+dumpregs(Ureg* ureg)
+{
+ dumpgpr(ureg);
+ dumpscr();
+}
+
+vlong
+probeaddr(uintptr addr)
+{
+ vlong v;
+
+ ilock(&m->probelock);
+ m->trapped = 0;
+ m->probing = 1;
+ coherence();
+
+ v = *(ulong *)addr; /* this may cause a fault */
+ coherence();
+
+ m->probing = 0;
+ if (m->trapped)
+ v = -1;
+ iunlock(&m->probelock);
+ return v;
+}
diff --git a/sys/src/9/teg2/ts b/sys/src/9/teg2/ts
new file mode 100644
index 000000000..c8530e9da
--- /dev/null
+++ b/sys/src/9/teg2/ts
@@ -0,0 +1,89 @@
+# trimslice dual-core cortex-a9
+dev
+ root
+ cons
+ env
+ pipe
+ proc
+ mnt
+ srv
+ shr
+ dup
+ arch
+ ssl
+ tls
+ bridge log
+ sdp thwack unthwack
+ cap
+ kprof
+# aoe
+# sd
+ fs
+# flash
+
+ ether netif
+ ip arp chandial ip ipv6 ipaux iproute netlog nullmedium pktmedium ptclbsum inferno
+
+# draw screen
+# dss
+# mouse
+
+ uart
+# usb
+
+link
+ archtegra
+ ethermedium
+# flashtegra ecc
+ loopbackmedium
+ netdevmedium
+
+ ether8169 ethermii
+# usbohci
+# usbehci usbehcitegra
+
+ip
+ tcp
+ udp
+ ipifc
+ icmp
+ icmp6
+ ipmux
+ gre
+ esp
+
+misc
+ pci
+ rdb
+ coproc
+ v7-arch
+ caches
+ caches-v7
+ cache-l2-pl310
+# mouse
+# sdaoe sdscsi
+ syscall
+ syscallfmt
+ uarti8250
+ ucalloc
+ ucallocb
+# include vfp3 to use hardware fp, otherwise include softfpu
+ vfp3
+# softfpu
+# emulated arm7500 fp
+ fpi
+ fpiarm
+ fpimem
+
+port
+ int cpuserver = 1;
+ int i8250freq = 3686000;
+
+boot cpu
+ tcp
+
+bootdir
+ boot$CONF.out boot
+ /$objtype/bin/paqfs
+ /$objtype/bin/auth/factotum
+ bootfs.paq
diff --git a/sys/src/9/teg2/uarti8250.c b/sys/src/9/teg2/uarti8250.c
new file mode 100644
index 000000000..1b83573fc
--- /dev/null
+++ b/sys/src/9/teg2/uarti8250.c
@@ -0,0 +1,819 @@
+/*
+ * 8250-like UART
+ */
+
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+
+enum { /* registers */
+ Rbr = 0, /* Receiver Buffer (RO) */
+ Thr = 0, /* Transmitter Holding (WO) */
+ Ier = 1, /* Interrupt Enable */
+ Iir = 2, /* Interrupt Identification (RO) */
+ Fcr = 2, /* FIFO Control (WO) */
+ Lcr = 3, /* Line Control */
+ Mcr = 4, /* Modem Control */
+ Lsr = 5, /* Line Status */
+ Msr = 6, /* Modem Status */
+ Scr = 7, /* Scratch Pad */
+ Mdr = 8, /* Mode Def'n (omap rw) */
+// Usr = 31, /* Uart Status Register; missing in omap? */
+ Dll = 0, /* Divisor Latch LSB */
+ Dlm = 1, /* Divisor Latch MSB */
+};
+
+enum { /* Usr */
+ Busy = 0x01,
+};
+
+enum { /* Ier */
+ Erda = 0x01, /* Enable Received Data Available */
+ Ethre = 0x02, /* Enable Thr Empty */
+ Erls = 0x04, /* Enable Receiver Line Status */
+ Ems = 0x08, /* Enable Modem Status */
+};
+
+enum { /* Iir */
+ Ims = 0x00, /* Ms interrupt */
+ Ip = 0x01, /* Interrupt Pending (not) */
+ Ithre = 0x02, /* Thr Empty */
+ Irda = 0x04, /* Received Data Available */
+ Irls = 0x06, /* Receiver Line Status */
+ Ictoi = 0x0C, /* Character Time-out Indication */
+ IirMASK = 0x3F,
+ Ifena = 0xC0, /* FIFOs enabled */
+};
+
+enum { /* Fcr */
+ FIFOena = 0x01, /* FIFO enable */
+ FIFOrclr = 0x02, /* clear Rx FIFO */
+ FIFOtclr = 0x04, /* clear Tx FIFO */
+// FIFOdma = 0x08,
+ FIFO1 = 0x00, /* Rx FIFO trigger level 1 byte */
+ FIFO4 = 0x40, /* 4 bytes */
+ FIFO8 = 0x80, /* 8 bytes */
+ FIFO14 = 0xC0, /* 14 bytes */
+};
+
+enum { /* Lcr */
+ Wls5 = 0x00, /* Word Length Select 5 bits/byte */
+ Wls6 = 0x01, /* 6 bits/byte */
+ Wls7 = 0x02, /* 7 bits/byte */
+ Wls8 = 0x03, /* 8 bits/byte */
+ WlsMASK = 0x03,
+ Stb = 0x04, /* 2 stop bits */
+ Pen = 0x08, /* Parity Enable */
+ Eps = 0x10, /* Even Parity Select */
+ Stp = 0x20, /* Stick Parity */
+ Brk = 0x40, /* Break */
+ Dlab = 0x80, /* Divisor Latch Access Bit */
+};
+
+enum { /* Mcr */
+ Dtr = 0x01, /* Data Terminal Ready */
+ Rts = 0x02, /* Ready To Send */
+ Out1 = 0x04, /* no longer in use */
+// Ie = 0x08, /* IRQ Enable (cd_sts_ch on omap) */
+ Dm = 0x10, /* Diagnostic Mode loopback */
+};
+
+enum { /* Lsr */
+ Dr = 0x01, /* Data Ready */
+ Oe = 0x02, /* Overrun Error */
+ Pe = 0x04, /* Parity Error */
+ Fe = 0x08, /* Framing Error */
+ Bi = 0x10, /* Break Interrupt */
+ Thre = 0x20, /* Thr Empty */
+ Temt = 0x40, /* Transmitter Empty */
+ FIFOerr = 0x80, /* error in receiver FIFO */
+};
+
+enum { /* Msr */
+ Dcts = 0x01, /* Delta Cts */
+ Ddsr = 0x02, /* Delta Dsr */
+ Teri = 0x04, /* Trailing Edge of Ri */
+ Ddcd = 0x08, /* Delta Dcd */
+ Cts = 0x10, /* Clear To Send */
+ Dsr = 0x20, /* Data Set Ready */
+ Ri = 0x40, /* Ring Indicator */
+ Dcd = 0x80, /* Carrier Detect */
+};
+
+enum { /* Mdr */
+ Modemask = 7,
+ Modeuart = 0,
+};
+
+
+typedef struct Ctlr {
+ u32int* io;
+ int irq;
+ int tbdf;
+ int iena;
+ int poll;
+
+ uchar sticky[Scr+1];
+
+ Lock;
+ int hasfifo;
+ int checkfifo;
+ int fena;
+} Ctlr;
+
+extern PhysUart i8250physuart;
+
+static Ctlr i8250ctlr[] = {
+{ .io = (u32int*)PHYSCONS,
+ .irq = Uartirq,
+ .tbdf = -1,
+ .poll = 0, },
+};
+
+static Uart i8250uart[] = {
+{ .regs = &i8250ctlr[0], /* not [2] */
+ .name = "COM3",
+ .freq = 3686000, /* Not used, we use the global i8250freq */
+ .phys = &i8250physuart,
+ .console= 1,
+ .next = nil, },
+};
+
+#define csr8r(c, r) ((c)->io[r])
+#define csr8w(c, r, v) ((c)->io[r] = (c)->sticky[r] | (v), coherence())
+#define csr8o(c, r, v) ((c)->io[r] = (v), coherence())
+
+static long
+i8250status(Uart* uart, void* buf, long n, long offset)
+{
+ char *p;
+ Ctlr *ctlr;
+ uchar ier, lcr, mcr, msr;
+
+ ctlr = uart->regs;
+ p = malloc(READSTR);
+ mcr = ctlr->sticky[Mcr];
+ msr = csr8r(ctlr, Msr);
+ ier = ctlr->sticky[Ier];
+ lcr = ctlr->sticky[Lcr];
+ snprint(p, READSTR,
+ "b%d c%d d%d e%d l%d m%d p%c r%d s%d i%d\n"
+ "dev(%d) type(%d) framing(%d) overruns(%d) "
+ "berr(%d) serr(%d)%s%s%s%s\n",
+
+ uart->baud,
+ uart->hup_dcd,
+ (msr & Dsr) != 0,
+ uart->hup_dsr,
+ (lcr & WlsMASK) + 5,
+ (ier & Ems) != 0,
+ (lcr & Pen) ? ((lcr & Eps) ? 'e': 'o'): 'n',
+ (mcr & Rts) != 0,
+ (lcr & Stb) ? 2: 1,
+ ctlr->fena,
+
+ uart->dev,
+ uart->type,
+ uart->ferr,
+ uart->oerr,
+ uart->berr,
+ uart->serr,
+ (msr & Cts) ? " cts": "",
+ (msr & Dsr) ? " dsr": "",
+ (msr & Dcd) ? " dcd": "",
+ (msr & Ri) ? " ring": ""
+ );
+ n = readstr(offset, buf, n, p);
+ free(p);
+
+ return n;
+}
+
+static void
+i8250fifo(Uart* uart, int level)
+{
+ Ctlr *ctlr;
+
+ ctlr = uart->regs;
+ if(ctlr->hasfifo == 0)
+ return;
+
+ /*
+ * Changing the FIFOena bit in Fcr flushes data
+ * from both receive and transmit FIFOs; there's
+ * no easy way to guarantee not losing data on
+ * the receive side, but it's possible to wait until
+ * the transmitter is really empty.
+ */
+ ilock(ctlr);
+ while(!(csr8r(ctlr, Lsr) & Temt))
+ ;
+
+ /*
+ * Set the trigger level, default is the max.
+ * value.
+ * Some UARTs require FIFOena to be set before
+ * other bits can take effect, so set it twice.
+ */
+ ctlr->fena = level;
+ switch(level){
+ case 0:
+ break;
+ case 1:
+ level = FIFO1|FIFOena;
+ break;
+ case 4:
+ level = FIFO4|FIFOena;
+ break;
+ case 8:
+ level = FIFO8|FIFOena;
+ break;
+ default:
+ level = FIFO14|FIFOena;
+ break;
+ }
+ csr8w(ctlr, Fcr, level);
+ csr8w(ctlr, Fcr, level);
+ iunlock(ctlr);
+}
+
+static void
+i8250dtr(Uart* uart, int on)
+{
+ Ctlr *ctlr;
+
+ /*
+ * Toggle DTR.
+ */
+ ctlr = uart->regs;
+ if(on)
+ ctlr->sticky[Mcr] |= Dtr;
+ else
+ ctlr->sticky[Mcr] &= ~Dtr;
+ csr8w(ctlr, Mcr, 0);
+}
+
+static void
+i8250rts(Uart* uart, int on)
+{
+ Ctlr *ctlr;
+
+ /*
+ * Toggle RTS.
+ */
+ ctlr = uart->regs;
+ if(on)
+ ctlr->sticky[Mcr] |= Rts;
+ else
+ ctlr->sticky[Mcr] &= ~Rts;
+ csr8w(ctlr, Mcr, 0);
+}
+
+static void
+i8250modemctl(Uart* uart, int on)
+{
+ Ctlr *ctlr;
+
+ ctlr = uart->regs;
+ ilock(&uart->tlock);
+ if(on){
+ ctlr->sticky[Ier] |= Ems;
+ csr8w(ctlr, Ier, 0);
+ uart->modem = 1;
+ uart->cts = csr8r(ctlr, Msr) & Cts;
+ }
+ else{
+ ctlr->sticky[Ier] &= ~Ems;
+ csr8w(ctlr, Ier, 0);
+ uart->modem = 0;
+ uart->cts = 1;
+ }
+ iunlock(&uart->tlock);
+
+ /* modem needs fifo */
+ (*uart->phys->fifo)(uart, on);
+}
+
+static int
+i8250parity(Uart* uart, int parity)
+{
+ int lcr;
+ Ctlr *ctlr;
+
+ ctlr = uart->regs;
+ lcr = ctlr->sticky[Lcr] & ~(Eps|Pen);
+
+ switch(parity){
+ case 'e':
+ lcr |= Eps|Pen;
+ break;
+ case 'o':
+ lcr |= Pen;
+ break;
+ case 'n':
+ break;
+ default:
+ return -1;
+ }
+ ctlr->sticky[Lcr] = lcr;
+ csr8w(ctlr, Lcr, 0);
+
+ uart->parity = parity;
+
+ return 0;
+}
+
+static int
+i8250stop(Uart* uart, int stop)
+{
+ int lcr;
+ Ctlr *ctlr;
+
+ ctlr = uart->regs;
+ lcr = ctlr->sticky[Lcr] & ~Stb;
+
+ switch(stop){
+ case 1:
+ break;
+ case 2:
+ lcr |= Stb;
+ break;
+ default:
+ return -1;
+ }
+ ctlr->sticky[Lcr] = lcr;
+ csr8w(ctlr, Lcr, 0);
+
+ uart->stop = stop;
+
+ return 0;
+}
+
+static int
+i8250bits(Uart* uart, int bits)
+{
+ int lcr;
+ Ctlr *ctlr;
+
+ ctlr = uart->regs;
+ lcr = ctlr->sticky[Lcr] & ~WlsMASK;
+
+ switch(bits){
+ case 5:
+ lcr |= Wls5;
+ break;
+ case 6:
+ lcr |= Wls6;
+ break;
+ case 7:
+ lcr |= Wls7;
+ break;
+ case 8:
+ lcr |= Wls8;
+ break;
+ default:
+ return -1;
+ }
+ ctlr->sticky[Lcr] = lcr;
+ csr8w(ctlr, Lcr, 0);
+
+ uart->bits = bits;
+
+ return 0;
+}
+
+static int
+i8250baud(Uart* uart, int baud)
+{
+#ifdef notdef /* don't change the speed */
+ ulong bgc;
+ Ctlr *ctlr;
+ extern int i8250freq; /* In the config file */
+
+ /*
+ * Set the Baud rate by calculating and setting the Baud rate
+ * Generator Constant. This will work with fairly non-standard
+ * Baud rates.
+ */
+ if(i8250freq == 0 || baud <= 0)
+ return -1;
+ bgc = (i8250freq+8*baud-1)/(16*baud);
+
+ ctlr = uart->regs;
+ while(csr8r(ctlr, Usr) & Busy)
+ delay(1);
+ csr8w(ctlr, Lcr, Dlab); /* begin kludge */
+ csr8o(ctlr, Dlm, bgc>>8);
+ csr8o(ctlr, Dll, bgc);
+ csr8w(ctlr, Lcr, 0);
+#endif
+ uart->baud = baud;
+ return 0;
+}
+
+static void
+i8250break(Uart* uart, int ms)
+{
+ Ctlr *ctlr;
+
+ if (up == nil)
+ panic("i8250break: nil up");
+ /*
+ * Send a break.
+ */
+ if(ms <= 0)
+ ms = 200;
+
+ ctlr = uart->regs;
+ csr8w(ctlr, Lcr, Brk);
+ tsleep(&up->sleep, return0, 0, ms);
+ csr8w(ctlr, Lcr, 0);
+}
+
+static void
+emptyoutstage(Uart *uart, int n)
+{
+ _uartputs((char *)uart->op, n);
+ uart->op = uart->oe = uart->ostage;
+}
+
+static void
+i8250kick(Uart* uart)
+{
+ int i;
+ Ctlr *ctlr;
+
+ if(/* uart->cts == 0 || */ uart->blocked)
+ return;
+
+ if(!normalprint) { /* early */
+ if (uart->op < uart->oe)
+ emptyoutstage(uart, uart->oe - uart->op);
+ while ((i = uartstageoutput(uart)) > 0)
+ emptyoutstage(uart, i);
+ return;
+ }
+
+ /* nothing more to send? then disable xmit intr */
+ ctlr = uart->regs;
+ if (uart->op >= uart->oe && qlen(uart->oq) == 0 &&
+ csr8r(ctlr, Lsr) & Temt) {
+ ctlr->sticky[Ier] &= ~Ethre;
+ csr8w(ctlr, Ier, 0);
+ return;
+ }
+
+ /*
+ * 128 here is an arbitrary limit to make sure
+ * we don't stay in this loop too long. If the
+ * chip's output queue is longer than 128, too
+ * bad -- presotto
+ */
+ for(i = 0; i < 128; i++){
+ if(!(csr8r(ctlr, Lsr) & Thre))
+ break;
+ if(uart->op >= uart->oe && uartstageoutput(uart) == 0)
+ break;
+ csr8o(ctlr, Thr, *uart->op++); /* start tx */
+ ctlr->sticky[Ier] |= Ethre;
+ csr8w(ctlr, Ier, 0); /* intr when done */
+ }
+}
+
+void
+serialkick(void)
+{
+ uartkick(&i8250uart[CONSOLE]);
+}
+
+static void
+i8250interrupt(Ureg*, void* arg)
+{
+ Ctlr *ctlr;
+ Uart *uart;
+ int iir, lsr, old, r;
+
+ uart = arg;
+ ctlr = uart->regs;
+ for(iir = csr8r(ctlr, Iir); !(iir & Ip); iir = csr8r(ctlr, Iir)){
+ switch(iir & IirMASK){
+ case Ims: /* Ms interrupt */
+ r = csr8r(ctlr, Msr);
+ if(r & Dcts){
+ ilock(&uart->tlock);
+ old = uart->cts;
+ uart->cts = r & Cts;
+ if(old == 0 && uart->cts)
+ uart->ctsbackoff = 2;
+ iunlock(&uart->tlock);
+ }
+ if(r & Ddsr){
+ old = r & Dsr;
+ if(uart->hup_dsr && uart->dsr && !old)
+ uart->dohup = 1;
+ uart->dsr = old;
+ }
+ if(r & Ddcd){
+ old = r & Dcd;
+ if(uart->hup_dcd && uart->dcd && !old)
+ uart->dohup = 1;
+ uart->dcd = old;
+ }
+ break;
+ case Ithre: /* Thr Empty */
+ uartkick(uart);
+ break;
+ case Irda: /* Received Data Available */
+ case Irls: /* Receiver Line Status */
+ case Ictoi: /* Character Time-out Indication */
+ /*
+ * Consume any received data.
+ * If the received byte came in with a break,
+ * parity or framing error, throw it away;
+ * overrun is an indication that something has
+ * already been tossed.
+ */
+ while((lsr = csr8r(ctlr, Lsr)) & Dr){
+ if(lsr & (FIFOerr|Oe))
+ uart->oerr++;
+ if(lsr & Pe)
+ uart->perr++;
+ if(lsr & Fe)
+ uart->ferr++;
+ r = csr8r(ctlr, Rbr);
+ if(!(lsr & (Bi|Fe|Pe)))
+ uartrecv(uart, r);
+ }
+ break;
+
+ default:
+ iprint("weird uart interrupt type %#2.2uX\n", iir);
+ break;
+ }
+ }
+}
+
+static void
+i8250disable(Uart* uart)
+{
+ Ctlr *ctlr;
+
+ /*
+ * Turn off DTR and RTS, disable interrupts and fifos.
+ */
+ (*uart->phys->dtr)(uart, 0);
+ (*uart->phys->rts)(uart, 0);
+ (*uart->phys->fifo)(uart, 0);
+
+ ctlr = uart->regs;
+ ctlr->sticky[Ier] = 0;
+ csr8w(ctlr, Ier, 0);
+
+ if(ctlr->iena != 0){
+ if(irqdisable(ctlr->irq, i8250interrupt, uart, uart->name) == 0)
+ ctlr->iena = 0;
+ }
+}
+
+static void
+i8250enable(Uart* uart, int ie)
+{
+ int mode;
+ Ctlr *ctlr;
+
+ if (up == nil)
+ return; /* too soon */
+
+ ctlr = uart->regs;
+
+ /* omap only: set uart/irda/cir mode to uart */
+ mode = csr8r(ctlr, Mdr);
+ csr8o(ctlr, Mdr, (mode & ~Modemask) | Modeuart);
+
+ ctlr->sticky[Lcr] = Wls8; /* no parity */
+ csr8w(ctlr, Lcr, 0);
+
+ /*
+ * Check if there is a FIFO.
+ * Changing the FIFOena bit in Fcr flushes data
+ * from both receive and transmit FIFOs; there's
+ * no easy way to guarantee not losing data on
+ * the receive side, but it's possible to wait until
+ * the transmitter is really empty.
+ * Also, reading the Iir outwith i8250interrupt()
+ * can be dangerous, but this should only happen
+ * once, before interrupts are enabled.
+ */
+ ilock(ctlr);
+ if(!ctlr->checkfifo){
+ /*
+ * Wait until the transmitter is really empty.
+ */
+ while(!(csr8r(ctlr, Lsr) & Temt))
+ ;
+ csr8w(ctlr, Fcr, FIFOena);
+ if(csr8r(ctlr, Iir) & Ifena)
+ ctlr->hasfifo = 1;
+ csr8w(ctlr, Fcr, 0);
+ ctlr->checkfifo = 1;
+ }
+ iunlock(ctlr);
+
+ /*
+ * Enable interrupts and turn on DTR and RTS.
+ * Be careful if this is called to set up a polled serial line
+ * early on not to try to enable interrupts as interrupt-
+ * -enabling mechanisms might not be set up yet.
+ */
+ if(ie){
+ if(ctlr->iena == 0 && !ctlr->poll){
+ irqenable(ctlr->irq, i8250interrupt, uart, uart->name);
+ ctlr->iena = 1;
+ }
+ ctlr->sticky[Ier] = Erda;
+// ctlr->sticky[Mcr] |= Ie; /* not on omap */
+ ctlr->sticky[Mcr] = 0;
+ }
+ else{
+ ctlr->sticky[Ier] = 0;
+ ctlr->sticky[Mcr] = 0;
+ }
+ csr8w(ctlr, Ier, 0);
+ csr8w(ctlr, Mcr, 0);
+
+ (*uart->phys->dtr)(uart, 1);
+ (*uart->phys->rts)(uart, 1);
+
+ /*
+ * During startup, the i8259 interrupt controller is reset.
+ * This may result in a lost interrupt from the i8250 uart.
+ * The i8250 thinks the interrupt is still outstanding and does not
+ * generate any further interrupts. The workaround is to call the
+ * interrupt handler to clear any pending interrupt events.
+ * Note: this must be done after setting Ier.
+ */
+ if(ie)
+ i8250interrupt(nil, uart);
+}
+
+static Uart*
+i8250pnp(void)
+{
+ return i8250uart;
+}
+
+static int
+i8250getc(Uart* uart)
+{
+ Ctlr *ctlr;
+
+ ctlr = uart->regs;
+ while(!(csr8r(ctlr, Lsr) & Dr))
+ delay(1);
+ return csr8r(ctlr, Rbr);
+}
+
+static void
+i8250putc(Uart* uart, int c)
+{
+ int i;
+ Ctlr *ctlr;
+
+ if (!normalprint) { /* too early; use brute force */
+ int s = splhi();
+
+ while (!(((ulong *)PHYSCONS)[Lsr] & Thre))
+ ;
+ ((ulong *)PHYSCONS)[Thr] = c;
+ coherence();
+ splx(s);
+ return;
+ }
+
+ ctlr = uart->regs;
+ for(i = 0; !(csr8r(ctlr, Lsr) & Thre) && i < 128; i++)
+ delay(1);
+ csr8o(ctlr, Thr, (uchar)c);
+ for(i = 0; !(csr8r(ctlr, Lsr) & Thre) && i < 128; i++)
+ delay(1);
+}
+
+void
+serialputc(int c)
+{
+ i8250putc(&i8250uart[CONSOLE], c);
+}
+
+void
+serialputs(char* s, int n)
+{
+ _uartputs(s, n);
+}
+
+#ifdef notdef
+static void
+i8250poll(Uart* uart)
+{
+ Ctlr *ctlr;
+
+ /*
+ * If PhysUart has a non-nil .poll member, this
+ * routine will be called from the uartclock timer.
+ * If the Ctlr .poll member is non-zero, when the
+ * Uart is enabled interrupts will not be enabled
+ * and the result is polled input and output.
+ * Not very useful here, but ports to new hardware
+ * or simulators can use this to get serial I/O
+ * without setting up the interrupt mechanism.
+ */
+ ctlr = uart->regs;
+ if(ctlr->iena || !ctlr->poll)
+ return;
+ i8250interrupt(nil, uart);
+}
+#endif
+
+PhysUart i8250physuart = {
+ .name = "i8250",
+ .pnp = i8250pnp,
+ .enable = i8250enable,
+ .disable = i8250disable,
+ .kick = i8250kick,
+ .dobreak = i8250break,
+ .baud = i8250baud,
+ .bits = i8250bits,
+ .stop = i8250stop,
+ .parity = i8250parity,
+ .modemctl = i8250modemctl,
+ .rts = i8250rts,
+ .dtr = i8250dtr,
+ .status = i8250status,
+ .fifo = i8250fifo,
+ .getc = i8250getc,
+ .putc = i8250putc,
+// .poll = i8250poll, /* only in 9k, not 9 */
+};
+
+static void
+i8250dumpregs(Ctlr* ctlr)
+{
+ int dlm, dll;
+ int _uartprint(char*, ...);
+
+ csr8w(ctlr, Lcr, Dlab);
+ dlm = csr8r(ctlr, Dlm);
+ dll = csr8r(ctlr, Dll);
+ csr8w(ctlr, Lcr, 0);
+
+ _uartprint("dlm %#ux dll %#ux\n", dlm, dll);
+}
+
+Uart* uartenable(Uart *p);
+
+/* must call this from a process's context */
+int
+i8250console(void)
+{
+ Uart *uart = &i8250uart[CONSOLE];
+
+ if (up == nil)
+ return -1; /* too early */
+
+ if(uartenable(uart) != nil /* && uart->console */){
+ // iprint("i8250console: enabling console uart\n");
+ serialoq = uart->oq;
+ uart->opens++;
+ consuart = uart;
+ }
+ uartctl(uart, "b115200 l8 pn r1 s1 i1");
+ return 0;
+}
+
+void
+_uartputs(char* s, int n)
+{
+ char *e;
+
+ for(e = s+n; s < e; s++){
+ if(*s == '\n')
+ i8250putc(&i8250uart[CONSOLE], '\r');
+ i8250putc(&i8250uart[CONSOLE], *s);
+ }
+}
+
+int
+_uartprint(char* fmt, ...)
+{
+ int n;
+ va_list arg;
+ char buf[PRINTSIZE];
+
+ va_start(arg, fmt);
+ n = vseprint(buf, buf+sizeof(buf), fmt, arg) - buf;
+ va_end(arg);
+ _uartputs(buf, n);
+
+ return n;
+}
diff --git a/sys/src/9/teg2/usbehci.h b/sys/src/9/teg2/usbehci.h
new file mode 100644
index 000000000..b43d4774b
--- /dev/null
+++ b/sys/src/9/teg2/usbehci.h
@@ -0,0 +1,104 @@
+/* override default macros from ../port/usb.h */
+#undef dprint
+#undef ddprint
+#undef deprint
+#undef ddeprint
+#define dprint if(ehcidebug)print
+#define ddprint if(ehcidebug>1)print
+#define deprint if(ehcidebug || ep->debug)print
+#define ddeprint if(ehcidebug>1 || ep->debug>1)print
+
+typedef struct Ctlr Ctlr;
+typedef struct Eopio Eopio;
+typedef struct Isoio Isoio;
+typedef struct Poll Poll;
+typedef struct Qh Qh;
+typedef struct Qtree Qtree;
+
+#pragma incomplete Ctlr;
+#pragma incomplete Eopio;
+#pragma incomplete Isoio;
+#pragma incomplete Poll;
+#pragma incomplete Qh;
+#pragma incomplete Qtree;
+
+struct Poll
+{
+ Lock;
+ Rendez;
+ int must;
+ int does;
+};
+
+struct Ctlr
+{
+ Rendez; /* for waiting to async advance doorbell */
+ Lock; /* for ilock. qh lists and basic ctlr I/O */
+ QLock portlck; /* for port resets/enable... (and doorbell) */
+ int active; /* in use or not */
+ Ecapio* capio; /* Capability i/o regs */
+ Eopio* opio; /* Operational i/o regs */
+
+ int nframes; /* 1024, 512, or 256 frames in the list */
+ ulong* frames; /* periodic frame list (hw) */
+ Qh* qhs; /* async Qh circular list for bulk/ctl */
+ Qtree* tree; /* tree of Qhs for the periodic list */
+ int ntree; /* number of dummy qhs in tree */
+ Qh* intrqhs; /* list of (not dummy) qhs in tree */
+ Isoio* iso; /* list of active Iso I/O */
+ ulong load;
+ ulong isoload;
+ int nintr; /* number of interrupts attended */
+ int ntdintr; /* number of intrs. with something to do */
+ int nqhintr; /* number of async td intrs. */
+ int nisointr; /* number of periodic td intrs. */
+ int nreqs;
+ Poll poll;
+};
+
+/*
+ * Operational registers (hw)
+ */
+struct Eopio
+{
+ ulong cmd; /* 00 command */
+ ulong sts; /* 04 status */
+ ulong intr; /* 08 interrupt enable */
+ ulong frno; /* 0c frame index */
+ ulong seg; /* 10 bits 63:32 of EHCI datastructs (unused) */
+ ulong frbase; /* 14 frame list base addr, 4096-byte boundary */
+ ulong link; /* 18 link for async list */
+ uchar d2c[0x40-0x1c]; /* 1c dummy */
+ ulong config; /* 40 1: all ports default-routed to this HC */
+ ulong portsc[3]; /* 44 Port status and control, one per port */
+
+ /* defined for omap35 ehci at least */
+ uchar _pad0[0x80 - 0x50];
+ ulong insn[6]; /* implementation-specific */
+};
+
+typedef struct Uhh Uhh;
+struct Uhh {
+ ulong revision; /* ro */
+ uchar _pad0[0x10-0x4];
+ ulong sysconfig;
+ ulong sysstatus; /* ro */
+
+ uchar _pad1[0x40-0x18];
+ ulong hostconfig;
+ ulong debug_csr;
+};
+
+enum {
+ /* hostconfig bits */
+ P1ulpi_bypass = 1<<0, /* utmi if set; else ulpi */
+};
+
+extern Ecapio *ehcidebugcapio;
+extern int ehcidebugport;
+
+extern int ehcidebug;
+
+void ehcilinkage(Hci *hp);
+void ehcimeminit(Ctlr *ctlr);
+void ehcirun(Ctlr *ctlr, int on);
diff --git a/sys/src/9/teg2/v7-arch.c b/sys/src/9/teg2/v7-arch.c
new file mode 100644
index 000000000..40b1e09fa
--- /dev/null
+++ b/sys/src/9/teg2/v7-arch.c
@@ -0,0 +1,51 @@
+/*
+ * arm arch v7 routines other than cache-related ones.
+ *
+ * calling this arch-v7.c would confuse the mk scripts,
+ * to which a filename arch*.c is magic.
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "io.h"
+#include "arm.h"
+
+/*
+ * these routines should be cheap enough that there will
+ * be no hesitation to use them.
+ *
+ * once 5c in-lines vlong ops, just use the vlong versions.
+ */
+
+/* see Hacker's Delight if this isn't obvious */
+#define ISPOW2(i) (((i) & ((i) - 1)) == 0)
+
+int
+ispow2(uvlong uvl)
+{
+ /* see Hacker's Delight if this isn't obvious */
+ return ISPOW2(uvl);
+}
+
+static int
+isulpow2(ulong ul) /* temporary speed hack */
+{
+ return ISPOW2(ul);
+}
+
+/*
+ * return exponent of smallest power of 2 ≥ n
+ */
+int
+log2(ulong n)
+{
+ int i;
+
+ i = BI2BY*BY2WD - 1 - clz(n);
+ if (n == 0 || !ISPOW2(n))
+ i++;
+ return i;
+}
diff --git a/sys/src/9/teg2/vfp3.c b/sys/src/9/teg2/vfp3.c
new file mode 100644
index 000000000..16d45490e
--- /dev/null
+++ b/sys/src/9/teg2/vfp3.c
@@ -0,0 +1,516 @@
+/*
+ * VFPv2 or VFPv3 floating point unit
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "ureg.h"
+#include "arm.h"
+
+/* subarchitecture code in m->havefp */
+enum {
+ VFPv2 = 2,
+ VFPv3 = 3,
+};
+
+/* fp control regs. most are read-only */
+enum {
+ Fpsid = 0,
+ Fpscr = 1, /* rw */
+ Mvfr1 = 6,
+ Mvfr0 = 7,
+ Fpexc = 8, /* rw */
+ Fpinst= 9, /* optional, for exceptions */
+ Fpinst2=10,
+};
+enum {
+ /* Fpexc bits */
+ Fpex = 1u << 31,
+ Fpenabled = 1 << 30,
+ Fpdex = 1 << 29, /* defined synch exception */
+// Fp2v = 1 << 28, /* Fpinst2 reg is valid */
+// Fpvv = 1 << 27, /* if Fpdex, vecitr is valid */
+// Fptfv = 1 << 26, /* trapped fault is valid */
+// Fpvecitr = MASK(3) << 8,
+ /* FSR bits appear here */
+ Fpmbc = Fpdex, /* bits exception handler must clear */
+
+ /* Fpscr bits; see u.h for more */
+ Stride = MASK(2) << 20,
+ Len = MASK(3) << 16,
+ Dn= 1 << 25,
+ Fz= 1 << 24,
+ /* trap exception enables (not allowed in vfp3) */
+ FPIDNRM = 1 << 15, /* input denormal */
+ Alltraps = FPIDNRM | FPINEX | FPUNFL | FPOVFL | FPZDIV | FPINVAL,
+ /* pending exceptions */
+ FPAIDNRM = 1 << 7, /* input denormal */
+ Allexc = FPAIDNRM | FPAINEX | FPAUNFL | FPAOVFL | FPAZDIV | FPAINVAL,
+ /* condition codes */
+ Allcc = MASK(4) << 28,
+};
+enum {
+ /* CpCPaccess bits */
+ Cpaccnosimd = 1u << 31,
+ Cpaccd16 = 1 << 30,
+};
+
+static char *
+subarch(int impl, uint sa)
+{
+ static char *armarchs[] = {
+ "VFPv1 (unsupported)",
+ "VFPv2",
+ "VFPv3+ with common VFP subarch v2",
+ "VFPv3+ with null subarch",
+ "VFPv3+ with common VFP subarch v3",
+ };
+
+ if (impl != 'A' || sa >= nelem(armarchs))
+ return "GOK";
+ else
+ return armarchs[sa];
+}
+
+static char *
+implement(uchar impl)
+{
+ if (impl == 'A')
+ return "arm";
+ else
+ return "unknown";
+}
+
+static int
+havefp(void)
+{
+ int gotfp;
+ ulong acc, sid;
+
+ if (m->havefpvalid)
+ return m->havefp;
+
+ m->havefp = 0;
+ gotfp = 1 << CpFP | 1 << CpDFP;
+ cpwrsc(0, CpCONTROL, 0, CpCPaccess, MASK(28));
+ acc = cprdsc(0, CpCONTROL, 0, CpCPaccess);
+ if ((acc & (MASK(2) << (2*CpFP))) == 0) {
+ gotfp &= ~(1 << CpFP);
+ print("fpon: no single FP coprocessor\n");
+ }
+ if ((acc & (MASK(2) << (2*CpDFP))) == 0) {
+ gotfp &= ~(1 << CpDFP);
+ print("fpon: no double FP coprocessor\n");
+ }
+ if (!gotfp) {
+ print("fpon: no FP coprocessors\n");
+ m->havefpvalid = 1;
+ return 0;
+ }
+ m->fpon = 1; /* don't panic */
+ sid = fprd(Fpsid);
+ m->fpon = 0;
+ switch((sid >> 16) & MASK(7)){
+ case 0: /* VFPv1 */
+ break;
+ case 1: /* VFPv2 */
+ m->havefp = VFPv2;
+ m->fpnregs = 16;
+ break;
+ default: /* VFPv3 or later */
+ m->havefp = VFPv3;
+ m->fpnregs = (acc & Cpaccd16) ? 16 : 32;
+ break;
+ }
+ if (m->machno == 0)
+ print("fp: %d registers,%s simd\n", m->fpnregs,
+ (acc & Cpaccnosimd? " no": ""));
+ m->havefpvalid = 1;
+ return 1;
+}
+
+/*
+ * these can be called to turn the fpu on or off for user procs,
+ * not just at system start up or shutdown.
+ */
+
+void
+fpoff(void)
+{
+ if (m->fpon) {
+ fpwr(Fpexc, 0);
+ m->fpon = 0;
+ }
+}
+
+void
+fpononly(void)
+{
+ if (!m->fpon && havefp()) {
+ /* enable fp. must be first operation on the FPUs. */
+ fpwr(Fpexc, Fpenabled);
+ m->fpon = 1;
+ }
+}
+
+static void
+fpcfg(void)
+{
+ int impl;
+ ulong sid;
+ static int printed;
+
+ /* clear pending exceptions; no traps in vfp3; all v7 ops are scalar */
+ m->fpscr = Dn | Fz | FPRNR | (FPINVAL | FPZDIV | FPOVFL) & ~Alltraps;
+ fpwr(Fpscr, m->fpscr);
+ m->fpconfiged = 1;
+
+ if (printed)
+ return;
+ sid = fprd(Fpsid);
+ impl = sid >> 24;
+ print("fp: %s arch %s; rev %ld\n", implement(impl),
+ subarch(impl, (sid >> 16) & MASK(7)), sid & MASK(4));
+ printed = 1;
+}
+
+void
+fpinit(void)
+{
+ if (havefp()) {
+ fpononly();
+ fpcfg();
+ }
+}
+
+void
+fpon(void)
+{
+ if (havefp()) {
+ fpononly();
+ if (m->fpconfiged)
+ fpwr(Fpscr, (fprd(Fpscr) & Allcc) | m->fpscr);
+ else
+ fpcfg(); /* 1st time on this fpu; configure it */
+ }
+}
+
+void
+fpclear(void)
+{
+// ulong scr;
+
+ fpon();
+// scr = fprd(Fpscr);
+// m->fpscr = scr & ~Allexc;
+// fpwr(Fpscr, m->fpscr);
+
+ fpwr(Fpexc, fprd(Fpexc) & ~Fpmbc);
+}
+
+
+/*
+ * Called when a note is about to be delivered to a
+ * user process, usually at the end of a system call.
+ * Note handlers are not allowed to use the FPU so
+ * the state is marked (after saving if necessary) and
+ * checked in the Device Not Available handler.
+ */
+void
+fpunotify(Ureg*)
+{
+ if(up->fpstate == FPactive){
+ fpsave(&up->fpsave);
+ up->fpstate = FPinactive;
+ }
+ up->fpstate |= FPillegal;
+}
+
+/*
+ * Called from sysnoted() via the machine-dependent
+ * noted() routine.
+ * Clear the flag set above in fpunotify().
+ */
+void
+fpunoted(void)
+{
+ up->fpstate &= ~FPillegal;
+}
+
+/*
+ * Called early in the non-interruptible path of
+ * sysrfork() via the machine-dependent syscall() routine.
+ * Save the state so that it can be easily copied
+ * to the child process later.
+ */
+void
+fpusysrfork(Ureg*)
+{
+ if(up->fpstate == FPactive){
+ fpsave(&up->fpsave);
+ up->fpstate = FPinactive;
+ }
+}
+
+/*
+ * Called later in sysrfork() via the machine-dependent
+ * sysrforkchild() routine.
+ * Copy the parent FPU state to the child.
+ */
+void
+fpusysrforkchild(Proc *p, Ureg *, Proc *up)
+{
+ /* don't penalize the child, it hasn't done FP in a note handler. */
+ p->fpstate = up->fpstate & ~FPillegal;
+}
+
+/* should only be called if p->fpstate == FPactive */
+void
+fpsave(FPsave *fps)
+{
+ int n;
+
+ fpon();
+ fps->control = fps->status = fprd(Fpscr);
+ assert(m->fpnregs);
+ for (n = 0; n < m->fpnregs; n++)
+ fpsavereg(n, (uvlong *)fps->regs[n]);
+ fpoff();
+}
+
+static void
+fprestore(Proc *p)
+{
+ int n;
+
+ fpon();
+ fpwr(Fpscr, p->fpsave.control);
+ m->fpscr = fprd(Fpscr) & ~Allcc;
+ assert(m->fpnregs);
+ for (n = 0; n < m->fpnregs; n++)
+ fprestreg(n, *(uvlong *)p->fpsave.regs[n]);
+}
+
+/*
+ * Called from sched() and sleep() via the machine-dependent
+ * procsave() routine.
+ * About to go in to the scheduler.
+ * If the process wasn't using the FPU
+ * there's nothing to do.
+ */
+void
+fpuprocsave(Proc *p)
+{
+ if(p->fpstate == FPactive){
+ if(p->state == Moribund)
+ fpclear();
+ else{
+ /*
+ * Fpsave() stores without handling pending
+ * unmasked exeptions. Postnote() can't be called
+ * here as sleep() already has up->rlock, so
+ * the handling of pending exceptions is delayed
+ * until the process runs again and generates an
+ * emulation fault to activate the FPU.
+ */
+ fpsave(&p->fpsave);
+ }
+ p->fpstate = FPinactive;
+ }
+}
+
+/*
+ * The process has been rescheduled and is about to run.
+ * Nothing to do here right now. If the process tries to use
+ * the FPU again it will cause a Device Not Available
+ * exception and the state will then be restored.
+ */
+void
+fpuprocrestore(Proc *)
+{
+}
+
+/*
+ * Disable the FPU.
+ * Called from sysexec() via sysprocsetup() to
+ * set the FPU for the new process.
+ */
+void
+fpusysprocsetup(Proc *p)
+{
+ p->fpstate = FPinit;
+ fpoff();
+}
+
+static void
+mathnote(void)
+{
+ ulong status;
+ char *msg, note[ERRMAX];
+
+ status = up->fpsave.status;
+
+ /*
+ * Some attention should probably be paid here to the
+ * exception masks and error summary.
+ */
+ if (status & FPAINEX)
+ msg = "inexact";
+ else if (status & FPAOVFL)
+ msg = "overflow";
+ else if (status & FPAUNFL)
+ msg = "underflow";
+ else if (status & FPAZDIV)
+ msg = "divide by zero";
+ else if (status & FPAINVAL)
+ msg = "bad operation";
+ else
+ msg = "spurious";
+ snprint(note, sizeof note, "sys: fp: %s fppc=%#p status=%#lux",
+ msg, up->fpsave.pc, status);
+ postnote(up, 1, note, NDebug);
+}
+
+static void
+mathemu(Ureg *)
+{
+ switch(up->fpstate){
+ case FPemu:
+ error("illegal instruction: VFP opcode in emulated mode");
+ case FPinit:
+ fpinit();
+ up->fpstate = FPactive;
+ break;
+ case FPinactive:
+ /*
+ * Before restoring the state, check for any pending
+ * exceptions. There's no way to restore the state without
+ * generating an unmasked exception.
+ * More attention should probably be paid here to the
+ * exception masks and error summary.
+ */
+ if(up->fpsave.status & (FPAINEX|FPAUNFL|FPAOVFL|FPAZDIV|FPAINVAL)){
+ mathnote();
+ break;
+ }
+ fprestore(up);
+ up->fpstate = FPactive;
+ break;
+ case FPactive:
+ error("illegal instruction: bad vfp fpu opcode");
+ break;
+ }
+ fpclear();
+}
+
+void
+fpstuck(uintptr pc)
+{
+ if (m->fppc == pc && m->fppid == up->pid) {
+ m->fpcnt++;
+ if (m->fpcnt > 4)
+ panic("fpuemu: cpu%d stuck at pid %ld %s pc %#p "
+ "instr %#8.8lux", m->machno, up->pid, up->text,
+ pc, *(ulong *)pc);
+ } else {
+ m->fppid = up->pid;
+ m->fppc = pc;
+ m->fpcnt = 0;
+ }
+}
+
+enum {
+ N = 1<<31,
+ Z = 1<<30,
+ C = 1<<29,
+ V = 1<<28,
+ REGPC = 15,
+};
+
+static int
+condok(int cc, int c)
+{
+ switch(c){
+ case 0: /* Z set */
+ return cc&Z;
+ case 1: /* Z clear */
+ return (cc&Z) == 0;
+ case 2: /* C set */
+ return cc&C;
+ case 3: /* C clear */
+ return (cc&C) == 0;
+ case 4: /* N set */
+ return cc&N;
+ case 5: /* N clear */
+ return (cc&N) == 0;
+ case 6: /* V set */
+ return cc&V;
+ case 7: /* V clear */
+ return (cc&V) == 0;
+ case 8: /* C set and Z clear */
+ return cc&C && (cc&Z) == 0;
+ case 9: /* C clear or Z set */
+ return (cc&C) == 0 || cc&Z;
+ case 10: /* N set and V set, or N clear and V clear */
+ return (~cc&(N|V))==0 || (cc&(N|V)) == 0;
+ case 11: /* N set and V clear, or N clear and V set */
+ return (cc&(N|V))==N || (cc&(N|V))==V;
+ case 12: /* Z clear, and either N set and V set or N clear and V clear */
+ return (cc&Z) == 0 && ((~cc&(N|V))==0 || (cc&(N|V))==0);
+ case 13: /* Z set, or N set and V clear or N clear and V set */
+ return (cc&Z) || (cc&(N|V))==N || (cc&(N|V))==V;
+ case 14: /* always */
+ return 1;
+ case 15: /* never (reserved) */
+ return 0;
+ }
+ return 0; /* not reached */
+}
+
+/* only called to deal with user-mode instruction faults */
+int
+fpuemu(Ureg* ureg)
+{
+ int s, nfp, cop, op;
+ uintptr pc;
+
+ if(waserror()){
+ postnote(up, 1, up->errstr, NDebug);
+ return 1;
+ }
+
+ if(up->fpstate & FPillegal)
+ error("floating point in note handler");
+
+ nfp = 0;
+ pc = ureg->pc;
+ validaddr(pc, 4, 0);
+ if(!condok(ureg->psr, *(ulong*)pc >> 28))
+ iprint("fpuemu: conditional instr shouldn't have got here\n");
+ op = (*(ulong *)pc >> 24) & MASK(4);
+ cop = (*(ulong *)pc >> 8) & MASK(4);
+ if(m->fpon)
+ fpstuck(pc); /* debugging; could move down 1 line */
+ if (ISFPAOP(cop, op)) { /* old arm 7500 fpa opcode? */
+// iprint("fpuemu: fpa instr %#8.8lux at %#p\n", *(ulong *)pc, pc);
+// error("illegal instruction: old arm 7500 fpa opcode");
+ s = spllo();
+ if(waserror()){
+ splx(s);
+ nexterror();
+ }
+ nfp = fpiarm(ureg); /* advances pc past emulated instr(s) */
+ if (nfp > 1) /* could adjust this threshold */
+ m->fppc = m->fpcnt = 0;
+ splx(s);
+ poperror();
+ } else if (ISVFPOP(cop, op)) { /* if vfp, fpu must be off */
+ mathemu(ureg); /* enable fpu & retry */
+ nfp = 1;
+ }
+
+ poperror();
+ return nfp;
+}
diff --git a/sys/src/9/teg2/words b/sys/src/9/teg2/words
new file mode 100644
index 000000000..52b24fef3
--- /dev/null
+++ b/sys/src/9/teg2/words
@@ -0,0 +1,60 @@
+this is a plan 9 port to the Trimslice with tegra2 soc: dual-core,
+dual-issue 1GHz Cortex-A9 system (v7a arch).
+
+dram is 1GB at 0.
+linux believes that u-boot runs in the bottom 4MB.
+the l2 cache is a non-architectural bag nailed on the side.
+mp arm systems have a generic interrupt controller; this one is gic v1(!).
+vfp 3 floating-point is present.
+
+section numbers (§) are in the tegra 2 tech. ref. man.
+for a minimal cpu server, need these devices to work:
+ clock signals §5 (leave to u-boot),
+ pad mux + gpio crap §8, §11 and §18 (leave to u-boot),
+☑ 1 cpu §13,
+☑ uart (16[45]50) §22,
+☑ gic (gic.v1.pdf),
+☑ clock §6—7,
+☑ ether8169 via pcie §31.
+then add these:
+☑ 2nd cpu (cortex.a9.mpcore.pdf),
+☑ l2 cache (l2cache.pl310.pdf, errata),
+☑ fpu (cortex.a9.fp.pdf),
+☑ user profiling,
+ kprof,
+ in-line 64-bit arithmetic,
+eventually might want:
+ usb (e.g., for sata) §26,
+ nor flash §17,
+ video §29,
+and the really horrid ones:
+ nand flash §16,
+ mmc §25.
+
+physical memory map
+
+0 1GB ram
+
+40000000 256K iram (audio/video memory)
+50000000 cortex-a9 cpu regs, periphbase, intr distrib, memsel,
+ l2 cache
+54000000 graphics regs
+58000000 gart (graphics window)
+60000000 256MB ppsb bus dev regs, including semas, intr ctlr, dma,
+ arm7 cache, gpio, except. vects
+70000000 256MB apc bus regs, including uarts, nand, nor, spi, rtc
+
+80000000 1GB ahb extern mem, pcie for cpu only
+90000000-97ffffff pcie 0 mem(?)
+a0000000-a7ffffff pcie 0 prefetch mem, includes rtl8111dl ether(?)
+a0020000 ether region 4
+a0024000 ether region 2
+
+c0000000 256MB ahb bus virtual b0000000
+c3000000-c80007ff 81MB ide, usb, sata, mmc
+d0000000 256MB nor flash virtual 40000000
+f000f000 4K mmu tlb
+fff00000 48K irom boot code
+ffff0000 64K high vectors
+
+use 0xc0000000 as KZERO.