Skip to content

Commit

Permalink
run new experiments
Browse files Browse the repository at this point in the history
  • Loading branch information
CShorten committed Jan 22, 2025
1 parent 704dcee commit cc98abe
Show file tree
Hide file tree
Showing 22 changed files with 384,733 additions and 20,083 deletions.
3,654 changes: 1,989 additions & 1,665 deletions app/backend/synthetic-weaviate-queries-with-results.json

Large diffs are not rendered by default.

18 changes: 17 additions & 1 deletion app/frontend/src/components/QueryVisualizer.js
Original file line number Diff line number Diff line change
Expand Up @@ -788,7 +788,7 @@ const QueryVisualizer = () => {
<span className="text-xl mt-2 block">{currentItem.query.corresponding_natural_language_query}</span>
</p>
)}
<h2 className="font-semibold text-2xl text-[#1c1468]">Query APIs utilized</h2>
<h2 className="font-semibold text-2xl text-[#1c1468]">Query APIs Utilized</h2>
{currentItem.query.target_collection && (
<p>
<span className="font-semibold">Collection:</span>{' '}
Expand Down Expand Up @@ -854,6 +854,22 @@ const QueryVisualizer = () => {
</p>
)}
</div>

<div className="mt-6">
<h2 className="font-semibold text-2xl text-[#1c1468]">Query Validation</h2>
<div className="mt-2">
<p className="font-semibold">
LLM-as-Judge Query Assessment:{' '}
<span className={currentItem.is_valid ? 'text-green-600' : 'text-red-600'}>
{currentItem.is_valid ? 'Valid' : 'Invalid'}
</span>
</p>
<p className="mt-2">
{currentItem.verification_rationale}
</p>
</div>
</div>

{renderQueryResult(currentItem.ground_truth_query_result)}
</>
)}
Expand Down
15,345 changes: 8,545 additions & 6,800 deletions data/OLD-synthetic-weaviate-queries-with-results.json

Large diffs are not rendered by default.

95 changes: 95 additions & 0 deletions data/analyze-queries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import json
from collections import Counter, defaultdict
from typing import Dict, List, Set
import itertools

def analyze_operator_distribution(data: List[Dict]) -> None:
"""
Analyze the distribution of operators in the generated queries.
"""
# Initialize counters
total_queries = len(data)
valid_queries = sum(1 for item in data if item['is_valid'])
operator_counts = Counter()
operator_combinations = Counter()
schemas_covered = set()

# Count operator occurrences and combinations
for item in data:
# Track schemas
schema_str = json.dumps(item['database_schema'], sort_keys=True)
schemas_covered.add(schema_str)

# Get operators used in this query
operators = set(item['ground_truth_operators'])

# Count individual operators
for op in operators:
operator_counts[op] += 1

# Count operator combinations
operator_combinations[tuple(sorted(operators))] += 1

# Print results
print("\n=== Query Generation Analysis ===")
print(f"\nTotal Queries: {total_queries}")
print(f"Valid Queries: {valid_queries} ({(valid_queries/total_queries)*100:.1f}%)")
print(f"Unique Schemas Used: {len(schemas_covered)}")

print("\n=== Individual Operator Distribution ===")
for operator, count in sorted(operator_counts.items()):
percentage = (count / total_queries) * 100
print(f"{operator}: {count} ({percentage:.1f}%)")

print("\n=== Operator Combination Distribution ===")
for combo, count in sorted(operator_combinations.items(), key=lambda x: (-len(x[0]), x[0])):
percentage = (count / total_queries) * 100
print(f"{' + '.join(combo)}: {count} ({percentage:.1f}%)")

# Verify completeness of combinations
print("\n=== Completeness Analysis ===")
operator_types = {
'search': ['search_query'],
'filter': ['integer_property_filter', 'text_property_filter', 'boolean_property_filter'],
'aggregation': ['integer_property_aggregation', 'text_property_aggregation', 'boolean_property_aggregation'],
'group': ['groupby_property']
}

# Generate all possible valid combinations
all_possible_combinations = set()
for r in range(1, len(operator_types) + 1):
for type_combo in itertools.combinations(operator_types.keys(), r):
# Get all possible operator combinations for these types
type_operators = [operator_types[t] for t in type_combo]
for op_combo in itertools.product(*type_operators):
all_possible_combinations.add(tuple(sorted(op_combo)))

# Check which combinations are missing
actual_combinations = set(operator_combinations.keys())
missing_combinations = all_possible_combinations - actual_combinations

print(f"\nFound {len(actual_combinations)} unique operator combinations")
print(f"Expected {len(all_possible_combinations)} possible combinations")

if missing_combinations:
print("\nMissing combinations:")
for combo in sorted(missing_combinations, key=lambda x: (len(x), x)):
print(f"- {' + '.join(combo)}")
else:
print("\nAll possible operator combinations are present!")

def main():
# Load the generated queries
try:
with open('synthetic-weaviate-queries-with-results.json', 'r') as f:
data = json.load(f)
analyze_operator_distribution(data)
except FileNotFoundError:
print("Error: Could not find the queries file. Make sure it's in the current directory.")
except json.JSONDecodeError:
print("Error: Could not parse the JSON file. Make sure it's properly formatted.")
except Exception as e:
print(f"An unexpected error occurred: {str(e)}")

if __name__ == "__main__":
main()
3,644 changes: 1,984 additions & 1,660 deletions data/synthetic-weaviate-queries-with-results.json

Large diffs are not rendered by default.

3,564 changes: 1,944 additions & 1,620 deletions data/synthetic-weaviate-queries-with-schemas.json

Large diffs are not rendered by default.

14 changes: 4 additions & 10 deletions notebooks/anthropic-connection.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,27 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"import anthropic\n",
"import os\n",
"lm_client = anthropic.Anthropic(\n",
" api_key = \"\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[TextBlock(text='Hello! How can I help you today?', type='text')]\n"
"[TextBlock(text=\"Hi! I'm Claude. How can I help you today?\", type='text')]\n"
]
}
],
Expand All @@ -44,13 +45,6 @@
"source": [
"lm_client.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
Loading

0 comments on commit cc98abe

Please sign in to comment.