Added qlang's lexer to parse tokens, not hooked up yet

This commit is contained in:
Hopeless Tyromancy 2026-02-17 14:10:06 -05:00
parent 69baa26d9b
commit 990391144c
17 changed files with 657 additions and 95 deletions

View File

@ -131,6 +131,9 @@ obj/%.asm.o: %.asm Makefile
mkdir -p "$(dir $@)"
nasm $(NASMFLAGS) $< -o $@
usb: all
sudo dd if=image.hdd of=/dev/sda bs=4M oflag=sync status=progress
# Remove object files and the final executable.
.PHONY: clean
clean:

BIN
bin/os

Binary file not shown.

BIN
image.hdd

Binary file not shown.

Binary file not shown.

View File

@ -1,6 +1,9 @@
obj/src/main.c.o: src/main.c src/../include/limine.h src/acpi.h \
src/common.h src/alloc.h src/apic.h src/map.h src/print.h
obj/src/main.c.o: src/main.c src/../include/limine.h src/prompt/lex.h \
src/prompt/file.h src/acpi.h src/common.h src/alloc.h src/apic.h \
src/map.h src/print.h
src/../include/limine.h:
src/prompt/lex.h:
src/prompt/file.h:
src/acpi.h:
src/common.h:
src/alloc.h:

Binary file not shown.

View File

@ -66,7 +66,7 @@ void alloc_init() {
for (size_t i = 0; i < mem_req.response->entry_count; i++) {
struct limine_memmap_entry *entry = mem_req.response->entries[i];
print_memmap_entry(entry);
//print_memmap_entry(entry);
if (entry->type == LIMINE_MEMMAP_USABLE) {
if (entry->length > largest_size) {

View File

@ -105,15 +105,8 @@ void apic_enable() {
}
volatile u32 *spurious = apic_reg(0xF0);
print("Reg 0xF0 at 0x");
print64((u64)spurious);
print("\nSpurious: ");
print32(*spurious);
// set enable flag
*spurious |= 0x100;
print(" after flag: ");
print32(*spurious);
print("\n");
// TPR = 0 (accept all interrupts)
volatile u32 *tpr = apic_reg(0x80);
@ -173,34 +166,10 @@ void ioapic_init() {
u32 low = ioapic_read(IOAPIC_REG_REDIR(irq));
u32 high = ioapic_read(IOAPIC_REG_REDIR(irq) + 1);
print("Current redirection for IRQ");
print8(irq);
print(": ");
print32(low);
print(" ");
print32(high);
print("\n");
// Configure keyboard - UNMASKED (no IOAPIC_MASKED bit!)
low = vect | IOAPIC_DST_PHYS | IOAPIC_EDGE_TRIGG | IOAPIC_ACTIVE_HIGH;
high = (cpu << 24);
ioapic_write(IOAPIC_REG_REDIR(irq), low);
ioapic_write(IOAPIC_REG_REDIR(irq) + 1, high);
low = ioapic_read(IOAPIC_REG_REDIR(irq));
high = ioapic_read(IOAPIC_REG_REDIR(irq) + 1);
// Verify it worked
print("Current redirection for IRQ");
print8(irq);
print(": ");
print32(low);
print(" ");
print32(high);
print("\n");
if (low & IOAPIC_MASKED) {
print(" [STILL MASKED (BAD)]\n");
} else {
print(" [UNMASKED (GOOD)]\n");
}
}

View File

@ -74,14 +74,16 @@ struct cpu_ctx {
};
struct cpu_ctx *interrupt_dispatch(struct cpu_ctx *ctx) {
if(ctx->vect = 0x21) {
kbd_handler();
return ctx;
}
print("\n[IRQ ");
print8(ctx->vect);
print("] with message: ");
print64(dbg_var);
print("\n");
if(ctx->vect == 0x21) {
kbd_handler();
}
return ctx;
}

View File

@ -73,18 +73,9 @@ char kbd_buffer_getc() {
}
void kbd_handler() {
print("KBD HANDLER CALLED: scancode: ");
u8 scancode = inb(PS2_DATA_PORT);
print8(scancode);
print("\n");
bool released = (scancode & 0x80);
scancode &= 0x7F;
switch(scancode) {
case 0x2A:
case 0x36:
@ -124,7 +115,6 @@ void kbd_handler() {
kbd_buffer_putc(c);
}
printn(&c, 1);
print("\n");
}

View File

@ -4,6 +4,10 @@
#include "../include/limine.h"
#include "prompt/lex.h"
#include "prompt/file.h"
void print_tok(struct token* tok);
#include "acpi.h"
#include "alloc.h"
#include "apic.h"
@ -193,61 +197,41 @@ void kmain() {
apic_enable();
print("APIC Enabled\n");
ioapic_init();
const char* src[] = {
"u8 main(u32 x) {\n",
" u16 y = x + 16;\n",
" u8 z = x * y;\n",
" return z; }\n"
};
if (!test_if_mapped((u64)apic_reg(0))) {
print("Mapping APIC registers failed :(\n");
const char* name = "prompt> ";
struct file input = {
.row = 0,
.col = 0,
.rows = 4,
.lines = (char**)src,
.name = name,
.tokens = NULL,
.tail = &input.tokens,
};
int res = lex(&input);
print("Result from lexing: ");
print8(res);
if(res != 0) {
hang();
}
u32 id = *apic_reg(0x20);
print("LAPIC ID: ");
print32(id);
print("\n");
struct token* token = input.tokens;
for(;token; token = token->next) {
print_tok(token);
}
print("Enabling keyboard\n");
enable_ps2_keyboard_interrupts();
outb(0x64, 0x20); // Read config
while (!(inb(0x64) & 0x01))
;
u8 config = inb(0x60);
asm volatile("sti");
print("Press a key NOW...\n");
volatile u32 *tpr = apic_reg(0x80);
print("TPR: 0x");
print32(*tpr);
print("\n");
print("PS/2 config: 0x");
print8(config);
if (!(config & 0x01)) {
print("\nProblem: Bit 0 is 0 (interrupt disabled)\n");
} else {
print("\nOK: Bit 0 is 1 (interrupt enabled)\n");
}
print("Press keys (polling test)...\n");
while (1) {
if (inb(0x64) & 0x01) { // Data available
u8 code = inb(0x60);
print("Scan code: 0x");
print8(code);
print("\n");
volatile u32 *irr = apic_reg(0x210);
print32(*irr);
irr = apic_reg(0x200); // IRR bits 31:0
u32 *isr = apic_reg(0x100); // ISR bits 31:0
print("IRR: ");
print32(*irr);
print(" ISR: ");
print32(*isr);
print("\n");
if (code == 0x01)
break; // ESC
}
}
hang();
}

16
src/prompt/file.h Normal file
View File

@ -0,0 +1,16 @@
#ifndef FILE_H_
#define FILE_H_
struct token;
struct file {
int row;
int col;
int rows;
char** lines;
const char* name;
struct token* tokens;
struct token** tail;
};
#endif

43
src/prompt/iden.c Normal file
View File

@ -0,0 +1,43 @@
#include "../common.h"
#include "../print.h"
#include "iden.h"
struct iden identbl[MAX_IDEN];
static size_t identbl_len = 0;
static bool strneq(const char* l, const char* r, size_t n) {
for(size_t i = 0; i < n; i++) {
if(r[i] != l[i])
return false;
}
return true;
}
size_t register_iden(const char* data, size_t len, uint64_t hash) {
size_t i;
for(i = 0; i < identbl_len; i++) {
if(hash == identbl[i].hash &&
identbl[i].len == len &&
strneq(data, identbl[i].data, len)) {
return i;
}
}
if(identbl_len >= MAX_IDEN) {
print("Out of space for identifiers!");
hang();
return 0;
}
identbl_len++;
identbl[i].hash = hash;
identbl[i].len = len;
memcpy(identbl[i].data, data, len);
return i;
}

20
src/prompt/iden.h Normal file
View File

@ -0,0 +1,20 @@
#ifndef IDEN_H_
#define IDEN_H_
#include "../common.h"
#include <stdint.h>
#define MAX_IDEN_SIZE 240
#define MAX_IDEN (1024*4)
struct iden {
size_t len;
u64 hash;
char* data[MAX_IDEN_SIZE];
};
extern struct iden identbl[MAX_IDEN];
size_t register_iden(const char* data, size_t len, uint64_t hash);
#endif

251
src/prompt/lex.c Normal file
View File

@ -0,0 +1,251 @@
#include <stdint.h>
#include "iden.h"
#include "lex.h"
#include "../print.h"
static int lex_error(struct file* state, const char* err) {
print(state->name);
print16(state->row+1);
print16(state->col+1);
print(err);
print("\n");
return 1;
}
static size_t strlen(const char* str) {
size_t len = 0;
while(*(str++))
len++;
return len;
}
static bool strneq(const char* l, const char* r, size_t n) {
for(size_t i = 0; i < n; i++) {
if(r[i] != l[i])
return false;
}
return true;
}
static char* index(char* str, int ch) {
for(;*str && *str != ch; str++)
;
return *str?str : NULL;
}
#define MAX_TOK (1024*1024)
struct token token_pool[MAX_TOK];
size_t token_next = 0;
static struct token* tok(enum tokentype type, struct file* state, int col, void* val) {
struct token* ret = &token_pool[token_next];
if(++token_next >= MAX_TOK) {
print("OUT OF TOKENS!!!!");
hang();
}
ret->type = type;
ret->row = state->row;
ret->col = col;
ret->file = state;
ret->next = NULL;
ret->len = state->col-col;
switch(type) {
case TOK_OP:
ret->op = *(enum op*)val;
break;
case TOK_KEYWORD:
ret->keyword = *(enum keyword*)val;
break;
case TOK_NUM:
ret->num = *(int64_t*)val;
break;
case TOK_IDEN:
ret->iden = *(size_t*)val;
break;
default:
break;
}
return ret;
}
static bool isdigit(int ch) {
return (ch <= '9' && ch >= '0');
}
static bool isxdigit(int ch) {
return ((ch <= '9' && ch >= '0') || (ch <= 'F' && ch >= 'A') || (ch <= 'F' && ch >= 'A'));
}
static bool isalpha(int ch) {
return (ch <= 'z' && ch >= 'a')
|| (ch <= 'Z' && ch >= 'A');
}
static bool isalnum(int ch) {
return isdigit(ch) || isalpha(ch);
}
static int toupper(int ch) {
if(ch >= 'a' && ch <= 'z')
ch += 'A'-'a';
return ch;
}
static int tolower(int ch) {
if(ch >= 'A' && ch <= 'Z')
ch += 'a'-'A';
return ch;
}
// cribbed from musl
static bool isspace(int ch) {
return ch == ' ' || (unsigned)ch-'\t' < 5;
}
static int parse_num(struct file* state, int save, int sign) {
int64_t num = 0;
int ch = state->lines[state->row][state->col];
if(ch == '0' && state->lines[state->row][state->col+1] == 'x') {
state->col+=2;
if(!isdigit(state->lines[state->row][state->col])) {
lex_error(state, "expected hexadecimal digits in hex literal");
return 1;
}
while(isxdigit(state->lines[state->row][state->col])) {
num <<= 4;
ch = toupper(state->lines[state->row][state->col]);
if(ch <= '9') {
num += (ch - '0');
} else {
num += (ch - 'A' + 10);
}
}
} else {
while(isdigit(state->lines[state->row][state->col])) {
num *= 10;
num += state->lines[state->row][state->col] - '0';
state->col++;
}
if(isalpha(state->lines[state->row][state->col])) {
lex_error(state, "invalid decimal digit in decimal literal");
return 1;
}
}
num *= sign;
*state->tail = tok(TOK_NUM, state, save, &num);
state->tail = &((*state->tail)->next);
return 0;
}
/* Lexing
number :- [0-9]+ | 0x[0-9A-Fa-f]+
keyword :- 'if' | 'else'
identifier :- [A-z_][0-9A-z_]*
operator :- .....
*/
int lex(struct file* state) {
if(state->row >= state->rows)
return 0;
while(isspace(state->lines[state->row][state->col])) {
if(state->lines[state->row][state->col] == '\n') {
state->col = 0;
state->row++;
if(state->row >= state->rows)
return 0;
} else {
state->col++;
}
}
if(state->lines[state->row][state->col] == '/' && state->lines[state->row][state->col+1] == '*') {
state->col += 2;
while(!(state->lines[state->row][state->col] == '*' && state->lines[state->row][state->col+1] == '/')) {
if(state->lines[state->row][state->col] == '\n') {
state->col = -1;
state->row++;
if(state->row >= state->rows)
return lex_error(state, "unexpected EOF, comment not closed");
}
state->col++;
}
state->col += 2;
if(state->lines[state->row][state->col] == '\n') {
state->row++;
state->col = 0;
}
return lex(state);
}
int ch = state->lines[state->row][state->col];
int nextch = state->lines[state->row][state->col+1];
/* single line comment */
if(ch == nextch && ch == '/') {
state->row++;
state->col = 0;
return lex(state);
}
int save = state->col;
if(ch == '-' && isdigit(state->lines[state->row][state->col+1])) {
state->col++;
int e = parse_num(state, save, -1);
return e + lex(state);
} else if(isdigit(ch)) {
int e = parse_num(state, save, 1);
return e + lex(state);
} else if(isalpha(ch) || ch == '_') {
size_t len = 0;
char* str = &state->lines[state->row][state->col];
u64 hash = 0xcbf29ce484222325;
while(state->lines[state->row][state->col] == '_' || isalnum(state->lines[state->row][state->col])) {
hash = (hash ^ state->lines[state->row][state->col]) * 0x100000001b3;
len++;
state->col++;
}
for(size_t i = 0; i < sizeof(keywords)/sizeof(char*); i++) {
if(strlen(keywords[i]) == len && strneq(keywords[i], str, len)) {
*state->tail = tok(TOK_KEYWORD, state, save, &i);
}
}
if(!*state->tail) {
size_t idx = register_iden(str, len, hash);
*state->tail = tok(TOK_IDEN, state, save, &idx);
}
} else if(index("+-/%*|&^~><=!",state->lines[state->row][state->col])) {
for(int i = 0; (size_t)i < sizeof(ops)/sizeof(char*); i++) {
if(strneq(&state->lines[state->row][state->col], ops[i], strlen(ops[i]))) {
state->col += strlen(ops[i]);
*state->tail = tok(TOK_OP, state, save, &i);
break;
}
}
} else if(index("[({,;})]", state->lines[state->row][state->col])) {
state->col++;
*state->tail = tok(state->lines[state->row][state->col-1], state, save, NULL);
} else {
lex_error(state, "unrecognized character");
print("Character: (0x");
u8 ch = state->lines[state->row][state->col];
print8(ch);
print(") '");
printn(&ch,1);
print("\n");
state->col++;
return 1 + lex(state);
}
state->tail = &(*state->tail)->next;
return lex(state);
}

143
src/prompt/lex.h Normal file
View File

@ -0,0 +1,143 @@
#ifndef LEX_H_
#define LEX_H_
#include <stdint.h>
#include "file.h"
enum tokentype {
TOK_NUM,
TOK_IDEN,
TOK_KEYWORD,
TOK_OP,
TOK_SEMICOLON = ';',
TOK_COMMA = ',',
TOK_LBRACKET = '[',
TOK_RBRACKET = ']',
TOK_LPAREN = '(',
TOK_RPAREN = ')',
TOK_LBRACE = '{',
TOK_RBRACE = '}',
};
enum keyword {
KEY_VOID,
KEY_U8,
KEY_I8,
KEY_U16,
KEY_I16,
KEY_U32,
KEY_I32,
KEY_U64,
KEY_I64,
KEY_IF,
KEY_ELSE,
KEY_WHILE,
KEY_FOR,
};
static const char* keywords[] = {
"void",
"u8",
"i8",
"u16",
"i16",
"u32",
"i32",
"u64",
"i64",
"if",
"else",
"while",
"for"
};
static const char* ops[] = {
"==",
"=",
"!=",
"!",
"+=",
"+",
"-=",
"-",
"*=",
"*",
"/=",
"/",
"%=",
"%",
"<<=",
"<<",
">>=",
">>",
"<=",
">=",
"<",
">",
"~",
"&&",
"&=",
"&",
"||",
"|=",
"|",
"^=",
"^"
};
enum op {
OP_EQ,
OP_ASSGN,
OP_NOTEQ,
OP_LNOT,
OP_ADDASSGN,
OP_ADD,
OP_SUBASSGN,
OP_MINUS,
OP_MULASSGN,
OP_STAR,
OP_DIVASSGN,
OP_DIV,
OP_MODASSGN,
OP_MOD,
OP_LSHIFTASSGN,
OP_LSHIFT,
OP_RSHIFTASSGN,
OP_RSHIFT,
OP_LESSEQ,
OP_MOREEQ,
OP_LESS,
OP_MORE,
OP_BNOT,
OP_LAND,
OP_BANDASSGN,
OP_BAND,
OP_LOR,
OP_BORASSGN,
OP_BOR,
OP_XORASSGN,
OP_XOR
};
/* base token type */
struct token {
struct token* next;
/* location information */
int row;
int col;
size_t len;
struct file* file;
/* token type and union */
enum tokentype type;
union {
int64_t num;
size_t iden;
enum op op;
enum keyword keyword;
};
};
int lex(struct file* state);
#endif

138
src/prompt/print.c Normal file
View File

@ -0,0 +1,138 @@
#include "iden.h"
#include "lex.h"
#include "../print.h"
#include "../common.h"
static size_t strlen(const char* str) {
size_t len = 0;
while(*(str++))
len++;
return len;
}
extern u32 fg, bg;
void print_tok(struct token* tok) {
/* header: [file:row:col] */
print("[");
print(tok->file->name);
print(":");
if(tok->row + 1 < (1 << 16)) {
print16(tok->row + 1);
print(":");
print16(tok->col + 1);
} else {
print32(tok->row + 1);
print(":");
print32(tok->col + 1);
}
print("] ");
switch (tok->type) {
case TOK_KEYWORD:
print("keyword: ");
print(keywords[tok->keyword]);
print("\n");
break;
case TOK_NUM:
print("number: ");
print(" (");
print64((unsigned long)tok->num);
print("):\n");
break;
case TOK_IDEN: {
struct iden* id = &identbl[tok->iden];
print("iden: { .hash = 0x");
print32(id->hash);
print(", .data = \"");
print(*id->data);
print("\" (");
/* print pointer value as hex */
print("ptr"); /* pointer printing omitted; add if needed */
print("), .len = ");
print32((int)id->len);
print(" }:\n");
break;
}
case TOK_OP: {
print("operator: ");
print(ops[tok->op]);
print(" (#");
print32(tok->op);
print("):\n");
break;
}
default: {
/* print single char and hex */
char c[2] = { (char)tok->type, 0 };
print(c);
print(" (0x");
print32((unsigned char)tok->type);
print("):\n");
break;
}
}
/* line with number and separator: "\t%4d | " */
print("\t");
/* pad row number to width 4 */
{
char numbuf[8];
int len = 0;
long r = tok->row + 1;
if (r == 0) {
numbuf[len++] = '0';
} else {
long tmp = r;
char rev[8];
int rp = 0;
while (tmp) {
rev[rp++] = '0' + (tmp % 10);
tmp /= 10;
}
while (rp--) numbuf[len++] = rev[rp];
}
/* left pad with spaces to width 4 */
for (int i = 0; i < 4 - len; ++i) print(" ");
/* print number */
for (int i = 0; i < len; ++i) {
char s[2] = { numbuf[i], 0 };
print(s);
}
}
print(" | ");
/* print text before token */
if (tok->col > 0)
printn(tok->file->lines[tok->row], tok->col);
/* print token text */
if (tok->len > 0)
printn(tok->file->lines[tok->row] + tok->col, tok->len);
/* print rest of line after token */
printn(tok->file->lines[tok->row] + tok->col + tok->len,
strlen(tok->file->lines[tok->row] + tok->col + tok->len));
print("\n");
/* caret line */
print("\t | ");
for (int i = 0; i < tok->col; i++) {
if (tok->file->lines[tok->row][i] == '\t') {
print("\t");
} else {
print(" ");
}
}
u32 savebg = bg, savefg = fg;
fg = bg;
bg = ~fg;
print("^");
fg = savefg;
bg = savebg;
for (size_t i = 1; i < tok->len; i++) print("~");
print("\n");
}