diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 1a0337e..2813cbf 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -2,7 +2,7 @@ ## Project Overview -This is a Python implementation of LODA - an assembly language designed for integer sequences. The project enables reading, writing, evaluating, and generating LODA programs using machine learning techniques to discover new integer sequence programs. +This is a Python implementation of LODA - an assembly language designed for integer sequences. The project enables reading, writing, evaluating LODA programs and searching for matches in the OEIS database. ## Core Concepts @@ -15,10 +15,6 @@ This is a Python implementation of LODA - an assembly language designed for inte - **Operations**: `mov`, `add`, `sub`, `mul`, `div`, `dif`, `mod`, `pow`, `gcd`, `bin`, `cmp`, `min`, `max`, `lpb`, `lpe` - **Loops**: `lpb $n` starts loop, `lpe` ends loop (counter-based termination) -### Token Encoding for ML -Each operation becomes 3 tokens: `[operation_type, target_operand, source_operand]` -Example: `mov $1,5` → `["mov", "$1", "5"]` - ## Source Code Structure ### Core Language (`loda/lang/`) @@ -36,9 +32,8 @@ Example: `mov $1,5` → `["mov", "$1", "5"]` - **`program_cache.py`**: `ProgramCache` manages filesystem loading/caching - **`prefix_index.py`**: `PrefixIndex` enables sequence matching by prefix patterns -### Machine Learning (`loda/ml/`) +### Utilities (`loda/ml/`) - **`util.py`**: Token conversion utilities (program ↔ tokens, merging) -- **`keras/program_generation_rnn.py`**: RNN model for program generation using TensorFlow ### Mining (`loda/mine/`) - **`miner.py`**: `Miner` searches for programs matching OEIS sequences @@ -68,15 +63,6 @@ elif operand.type == OperandType.INDIRECT: value = memory[memory[operand.value]] ``` -### When working with ML tokens: -```python -# Convert programs to tokens for ML -from loda.ml.util import program_to_tokens, tokens_to_program - -tokens = program_to_tokens(program) -reconstructed = tokens_to_program(tokens) -``` - ### When working with sequences: ```python # Always specify term count and handle evaluation errors @@ -112,11 +98,11 @@ program = program_cache.get_program(sequence_id) ### Token Conversion Pattern: ```python -# ML workflow -tokens = program_to_tokens(program) -# Process with ML model -new_tokens = model.generate(tokens) -new_program = tokens_to_program(new_tokens) +# Token conversion utilities +from loda.ml.util import program_to_tokens, tokens_to_program + +tokens, vocab = program_to_tokens(program) +reconstructed = tokens_to_program(tokens) ``` ## Testing Conventions @@ -138,13 +124,11 @@ Always set appropriate limits: - Programs: `A######.asm` format (OEIS sequence numbers) - B-files: `b######.txt` format for sequence terms -- Models: Use descriptive names with hyperparameters - Use relative paths from project root ## Integration Points - OEIS database integration via sequence IDs -- TensorFlow/Keras for neural networks - File system caching for performance - CSV parsing for test data diff --git a/README.md b/README.md index dfd918c..585d5b6 100644 --- a/README.md +++ b/README.md @@ -4,18 +4,16 @@ This Python package contains an implementation of the [LODA Language](https://lo an assembly language and computational model for finding integer sequence programs. This Python package allows you to read and write LODA programs, to evaluate -them to integer sequences, to search for matches in the -[OEIS](https://www.oeis.org/) database, -and to use machine learning tools from [Tensorflow](https://www.tensorflow.org/) -to find new integer sequence programs. +them to integer sequences, and to search for matches in the +[OEIS](https://www.oeis.org/) database. ## Getting Started You need Python 3.7 or higher. To install the dependencies for LODA, run these commands: ```bash -python3 -m venv env -source env/bin/activate +python3 -m venv ./venv +source ./venv/bin/activate pip install -r requirements.txt ``` diff --git a/loda/documentation.md b/loda/documentation.md index 48b0ec3..eb22e4e 100644 --- a/loda/documentation.md +++ b/loda/documentation.md @@ -1,8 +1,6 @@ This Python package allows you to read and write LODA programs, to evaluate -them to integer sequences, to search for matches in the -[OEIS](https://www.oeis.org/) database, -and to use machine learning from [Tensorflow](https://www.tensorflow.org/) -to generate new integer sequence programs. +them to integer sequences, and to search for matches in the +[OEIS](https://www.oeis.org/) database. ## Installation diff --git a/requirements.txt b/requirements.txt index 63841f2..bbeb417 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,12 +2,3 @@ argparse nose2 parameterized requests -tensorflow; sys_platform != 'darwin' -tensorflow-macos; sys_platform == 'darwin' - -# LLM Dependencies -torch>=1.9.0 -transformers>=4.20.0 -datasets>=2.0.0 -tqdm>=4.62.0 -scikit-learn>=1.0.0 diff --git a/sample.py b/sample.py index 219edf1..6fe68ae 100644 --- a/sample.py +++ b/sample.py @@ -4,7 +4,6 @@ from loda.oeis import ProgramCache, Sequence from loda.runtime import Evaluator, Interpreter from loda.mine import Miner -from loda.ml.keras.program_generation_rnn import load_model, train_model, Generator class SampleLODA: diff --git a/tests/test_ml.py b/tests/test_ml.py index c527db4..a801c8b 100644 --- a/tests/test_ml.py +++ b/tests/test_ml.py @@ -1,28 +1,12 @@ # -*- coding: utf-8 -*- from unittest import TestCase -from loda.ml.keras.program_generation_rnn import * from loda.oeis import ProgramCache from loda.ml import util from tests.helpers import PROGRAMS_TEST_DIR -#class ProgramGenerationRNNTests(TestCase): -# -# def setUp(self): -# self.program_cache = ProgramCache(PROGRAMS_TEST_DIR) -# -# def test_model(self): -# model = train_model(self.program_cache) -# model.save("test_model") -# loaded = load_model("test_model") -# loaded.summary() -# generator = Generator(loaded, num_lanes=10) -# for _ in range(10): -# generator() - - class UtilTests(TestCase): def setUp(self):