Restructure proof (#1)

vana-com · Sep 9, 2024 · c44e35c · c44e35c
1 parent ceb8628
commit c44e35c
Show file tree

Hide file tree

Showing 11 changed files with 142 additions and 92 deletions.
diff --git a/.gitignore b/.gitignore
@@ -5,3 +5,5 @@ __pycache__/
 
 demo/*
 !demo/input/
+
+.idea/
diff --git a/Dockerfile b/Dockerfile
@@ -9,4 +9,7 @@ WORKDIR /app
 
 COPY . /app
 
-CMD ["python", "-m", "my_proof"]
+# Install any needed packages specified in requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt
+
+CMD ["python", "-m", "my_proof"]
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
-# Vana Satya Proof of Contribution Python Template
+# Vana Satya Proof of Contribution - Python Template
 
-This repository serves as a template for creating proof tasks using Python and Gramine for secure computation.
+This repository serves as a template for creating proof of contribution tasks using Python and Gramine for secure computation.
 
 ## Overview
 
@@ -35,12 +35,11 @@ To use this template:
 
 ## Customizing the Proof Logic
 
-The main proof logic is implemented in `my_proof/proof.py`. To customize it, update the `generate_proof` function to change how input files are processed.
+The main proof logic is implemented in `my_proof/proof.py`. To customize it, update the `Proof.generate()` function to change how input files are processed.
 
 The proof can be configured using environment variables:
 
-- `MY_PROOF_EXPECTED_WORDS`: Comma-separated list of words to search for in the input data (default: "hello,world")
-- `MY_PROOF_RANDOM_THRESHOLD`: Float value between 0 and 1 for the random number check threshold (default: 0.5)
+- `USER_EMAIL`: The email address of the data contributor, to verify data ownership
 
 If you want to use a language other than Python, you can modify the Dockerfile to install the necessary dependencies and build the proof task in the desired language.
 
@@ -55,8 +54,7 @@ docker run \
 --volume $(pwd)/demo/sealed:/sealed \
 --volume $(pwd)/demo/input:/input \
 --volume $(pwd)/demo/output:/output \
---env MY_PROOF_EXPECTED_WORDS=world \
---env MY_PROOF_RANDOM_THRESHOLD=0.1 \
+--env [email protected] \
 my-proof
 ```
 
@@ -117,8 +115,7 @@ docker run \
 --device /dev/sgx_enclave:/dev/sgx_enclave \
 --volume /var/run/aesmd:/var/run/aesmd \
 --volume /mnt/gsc-my-proof/sealed:/sealed \
---env MY_PROOF_EXPECTED_WORDS=world \
---env MY_PROOF_RANDOM_THRESHOLD=0.1 \
+--env [email protected] \
 gsc-my-proof
 ```
 
@@ -143,4 +140,4 @@ If you have suggestions for improving this template, please open an issue or sub
 
 ## License
 
-[MIT License](LICENSE)
+[MIT License](LICENSE)
diff --git a/demo/input/account.json b/demo/input/account.json
@@ -0,0 +1,6 @@
+{
+  "name": "user123",
+  "email": "[email protected]",
+  "score": 25,
+  "data": "hello world"
+}
diff --git a/demo/input/activity.json b/demo/input/activity.json
@@ -0,0 +1,10 @@
+[
+  {
+    "score": 0.23,
+    "timestamp": 1725893454951
+  },
+  {
+    "score": 0.27,
+    "timestamp": 1725893454952
+  }
+]
diff --git a/demo/input/user123.json b/demo/input/user123.json
diff --git a/demo/input/user456.json b/demo/input/user456.json
diff --git a/my_proof/__main__.py b/my_proof/__main__.py
@@ -1,3 +1,50 @@
-from .proof import generate_proof
+import json
+import logging
+import os
+import sys
+import traceback
+from typing import Dict, Any
 
-generate_proof()
+from my_proof.proof import Proof
+
+INPUT_DIR, OUTPUT_DIR, SEALED_DIR = '/input', '/output', '/sealed'
+
+logging.basicConfig(level=logging.INFO, format='%(message)s')
+
+
+def load_config() -> Dict[str, Any]:
+    """Load proof configuration from environment variables."""
+    config = {
+        'dlp_id': 1234,  # Set your own DLP ID here
+        'use_sealing': os.path.isdir(SEALED_DIR),
+        'input_dir': INPUT_DIR,
+        'user_email': os.environ.get('USER_EMAIL', None),
+    }
+    logging.info(f"Using config: {json.dumps(config, indent=2)}")
+    return config
+
+
+def run() -> None:
+    """Generate proofs for all input files."""
+    config = load_config()
+    input_files_exist = os.path.isdir(INPUT_DIR) and bool(os.listdir(INPUT_DIR))
+
+    if not input_files_exist:
+        raise FileNotFoundError(f"No input files found in {INPUT_DIR}")
+
+    proof = Proof(config)
+    proof_response = proof.generate()
+
+    output_path = os.path.join(OUTPUT_DIR, "results.json")
+    with open(output_path, 'w') as f:
+        json.dump(proof_response.dict(), f, indent=2)
+    logging.info(f"Proof generation complete: {proof_response}")
+
+
+if __name__ == "__main__":
+    try:
+        run()
+    except Exception as e:
+        logging.error(f"Error during proof generation: {e}")
+        traceback.print_exc()
+        sys.exit(1)
diff --git a/my_proof/models/proof_response.py b/my_proof/models/proof_response.py
@@ -0,0 +1,14 @@
+from typing import Dict, Optional, Any
+
+from pydantic import BaseModel
+
+
+class ProofResponse(BaseModel):
+    dlp_id: int
+    valid: bool = False
+    score: float = 0.0
+    authenticity: float = 0.0
+    ownership: float = 0.0
+    quality: float = 0.0
+    uniqueness: float = 0.0
+    attributes: Optional[Dict[str, Any]] = {}
diff --git a/my_proof/proof.py b/my_proof/proof.py
@@ -1,90 +1,66 @@
-import os
 import json
-import requests
 import logging
-from typing import Dict, Any, List, Callable
-
-INPUT_DIR, OUTPUT_DIR, SEALED_DIR = '/input', '/output', '/sealed'
-
-logging.basicConfig(level=logging.INFO, format='%(message)s')
-
-def load_config() -> Dict[str, Any]:
-    """Load configuration from environment variables."""
-    config = {
-        'expected_words': os.environ.get('MY_PROOF_EXPECTED_WORDS', 'hello,world').split(','),
-        'random_threshold': float(os.environ.get('MY_PROOF_RANDOM_THRESHOLD', '0.5'))
-    }
-    logging.info(f"Config: expected words = {config['expected_words']}, threshold = {config['random_threshold']}")
-    return config
+import os
+from typing import Dict, Any
 
-def fetch_random_number() -> float:
-    """Demonstrate HTTP requests by fetching a random number from random.org."""
-    try:
-        response = requests.get('https://www.random.org/decimal-fractions/?num=1&dec=2&col=1&format=plain&rnd=new')
-        return float(response.text.strip())
-    except requests.RequestException as e:
-        logging.warning(f"Error fetching random number: {e}. Using local random.")
-        return __import__('random').random()
+import requests
 
-def find_words_with_sealing(data: str, target_words: List[str], sealed_file: str) -> int:
-    """Find target words in data, writing results to a sealed file."""
-    with open(sealed_file, 'w') as sf:
-        for word in target_words:
-            if word in data:
-                sf.write(f"{word}\n")
+from my_proof.models.proof_response import ProofResponse
 
-    with open(sealed_file, 'r') as sf:
-        return sum(1 for _ in sf)
 
-def find_words_in_memory(data: str, target_words: List[str]) -> int:
-    """Find target words in data, keeping results in memory."""
-    return sum(word in data for word in target_words)
+class Proof:
+    def __init__(self, config: Dict[str, Any]):
+        self.config = config
+        self.proof_response = ProofResponse(dlp_id=config['dlp_id'])
 
-def process_input(input_file: str, config: Dict[str, Any], find_words: Callable) -> Dict[str, Any]:
-    """Process a single input file and generate the proof result."""
-    with open(input_file, 'r') as f:
-        input_data = json.load(f).get('data', '')
+    def generate(self) -> ProofResponse:
+        """Generate proofs for all input files."""
+        logging.info("Starting proof generation")
 
-    found_count = find_words(input_data, config['expected_words'])
-    random_value = fetch_random_number()
-    is_valid = (found_count == len(config['expected_words']) and
-                random_value >= config['random_threshold'])
+        # Iterate through files and calculate data validity
+        account_email = None
+        total_score = 0
+        for input_filename in os.listdir(self.config['input_dir']):
+            input_file = os.path.join(self.config['input_dir'], input_filename)
+            with open(input_file, 'r') as f:
+                input_data = json.load(f)
 
-    logging.info(f"File: {os.path.basename(input_file)}")
-    logging.info(f"  Words found: {found_count}/{len(config['expected_words'])}")
-    logging.info(f"  Random value: {random_value:.2f} (threshold: {config['random_threshold']})")
-    logging.info(f"  Valid contribution: {'Yes' if is_valid else 'No'}")
+                if input_filename == 'account.json':
+                    account_email = input_data.get('email', None)
+                    continue
 
-    return {
-        "valid_contribution": is_valid,
-        "random_value": random_value,
-        "found_keywords": found_count,
-        "threshold": config['random_threshold']
-    }
+                elif input_filename == 'activity.json':
+                    total_score = sum(item['score'] for item in input_data)
+                    continue
 
-def generate_proof() -> None:
-    """Generate proofs for all input files."""
-    logging.info("Starting proof generation")
-    config = load_config()
-    use_sealing = os.path.isdir(SEALED_DIR)
-    logging.info(f"Using sealed storage: {'Yes' if use_sealing else 'No'}")
+        email_matches = self.config['user_email'] == account_email
+        score_threshold = fetch_random_number()
 
-    for input_filename in os.listdir(INPUT_DIR):
-        input_path = os.path.join(INPUT_DIR, input_filename)
-        output_path = os.path.join(OUTPUT_DIR, f"{input_filename}_result.json")
+        # Calculate proof-of-contribution scores: https://docs.vana.org/vana/core-concepts/key-elements/proof-of-contribution/example-implementation
+        self.proof_response.ownership = 1.0 if email_matches else 0.0  # Does the data belong to the user? Or is it fraudulent?
+        self.proof_response.quality = max(0, min(total_score / score_threshold, 1.0))  # How high quality is the data?
+        self.proof_response.authenticity = 0  # How authentic is the data is (ie: not tampered with)? (Not implemented here)
+        self.proof_response.uniqueness = 0  # How unique is the data relative to other datasets? (Not implemented here)
 
-        if use_sealing:
-            sealed_file = os.path.join(SEALED_DIR, f'{input_filename}_sealed.txt')
-            find_words = lambda data, target_words: find_words_with_sealing(data, target_words, sealed_file)
-        else:
-            find_words = find_words_in_memory
+        # Calculate overall score and validity
+        self.proof_response.score = 0.6 * self.proof_response.quality + 0.4 * self.proof_response.ownership
+        self.proof_response.valid = email_matches and total_score >= score_threshold
 
-        result = process_input(input_path, config, find_words)
+        # Add additional metadata to the proof - these are public attributes about the data
+        self.proof_response.attributes = {
+            'total_score': total_score,
+            'score_threshold': score_threshold,
+            'email_verified': email_matches,
+        }
 
-        with open(output_path, 'w') as f:
-            json.dump(result, f, indent=2)
+        return self.proof_response
 
-    logging.info(f"Proof generation complete. Results in {OUTPUT_DIR}")
 
-if __name__ == "__main__":
-    generate_proof()
+def fetch_random_number() -> float:
+    """Demonstrate HTTP requests by fetching a random number from random.org."""
+    try:
+        response = requests.get('https://www.random.org/decimal-fractions/?num=1&dec=2&col=1&format=plain&rnd=new')
+        return float(response.text.strip())
+    except requests.RequestException as e:
+        logging.warning(f"Error fetching random number: {e}. Using local random.")
+        return __import__('random').random()
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1 @@
+pydantic
Original file line number	Diff line number	Diff line change
Expand Up		@@ -5,3 +5,5 @@ __pycache__/

		demo/*
		!demo/input/

		.idea/