class EnhancedPythonToRConverter:
"""
Enhanced Python to R converter with Gemini AI validation
"""
def __init__(self, gemini_api_key: str = None):
self.validator = GeminiValidator(gemini_api_key)
self.import_mappings = {
'pandas': 'library(dplyr)nlibrary(tidyr)nlibrary(readr)',
'numpy': 'library(base)',
'matplotlib.pyplot': 'library(ggplot2)',
'seaborn': 'library(ggplot2)nlibrary(RColorBrewer)',
'scipy.stats': 'library(stats)',
'sklearn': 'library(caret)nlibrary(randomForest)nlibrary(e1071)',
'statsmodels': 'library(stats)nlibrary(lmtest)',
'plotly': 'library(plotly)',
}
self.function_mappings = {
'pd.DataFrame': 'knowledge.body',
'pd.read_csv': 'learn.csv',
'pd.read_excel': 'read_excel',
'df.head': 'head',
'df.tail': 'tail',
'df.form': 'dim',
'df.information': 'str',
'df.describe': 'abstract',
'df.imply': 'imply',
'df.median': 'median',
'df.std': 'sd',
'df.var': 'var',
'df.sum': 'sum',
'df.depend': 'size',
'df.groupby': 'group_by',
'df.merge': 'merge',
'df.drop': 'choose',
'df.dropna': 'na.omit',
'df.fillna': 'replace_na',
'df.sort_values': 'prepare',
'df.value_counts': 'desk',
'np.array': 'c',
'np.imply': 'imply',
'np.median': 'median',
'np.std': 'sd',
'np.var': 'var',
'np.sum': 'sum',
'np.min': 'min',
'np.max': 'max',
'np.sqrt': 'sqrt',
'np.log': 'log',
'np.exp': 'exp',
'np.random.regular': 'rnorm',
'np.random.uniform': 'runif',
'np.linspace': 'seq',
'np.arange': 'seq',
'plt.determine': 'ggplot',
'plt.plot': 'geom_line',
'plt.scatter': 'geom_point',
'plt.hist': 'geom_histogram',
'plt.bar': 'geom_bar',
'plt.boxplot': 'geom_boxplot',
'plt.present': 'print',
'sns.scatterplot': 'geom_point',
'sns.histplot': 'geom_histogram',
'sns.boxplot': 'geom_boxplot',
'sns.heatmap': 'geom_tile',
'scipy.stats.ttest_ind': 't.take a look at',
'scipy.stats.chi2_contingency': 'chisq.take a look at',
'scipy.stats.pearsonr': 'cor.take a look at',
'scipy.stats.spearmanr': 'cor.take a look at',
'scipy.stats.normaltest': 'shapiro.take a look at',
'stats.ttest_ind': 't.take a look at',
'sklearn.linear_model.LinearRegression': 'lm',
'sklearn.ensemble.RandomForestRegressor': 'randomForest',
'sklearn.model_selection.train_test_split': 'pattern',
}
self.syntax_patterns = [
(r'bTrueb', 'TRUE'),
(r'bFalseb', 'FALSE'),
(r'bNoneb', 'NULL'),
(r'blen(', 'length('),
(r'range((d+))', r'1:1'),
(r'range((d+),s*(d+))', r'1:2'),
(r'.split(', '.strsplit('),
(r'.strip()', '.str_trim()'),
(r'.lower()', '.str_to_lower()'),
(r'.upper()', '.str_to_upper()'),
(r'[0]', '[1]'),
(r'f"([^"]*)"', r'paste0("1")'),
(r"f'([^']*)'", r"paste0('1')"),
]
def convert_imports(self, code: str) -> str:
"""Convert Python import statements to R library statements."""
strains = code.cut up('n')
converted_lines = []
for line in strains:
line = line.strip()
if line.startswith('import ') or line.startswith('from '):
if ' as ' in line:
if 'import' in line and 'as' in line:
components = line.cut up(' as ')
module = components[0].substitute('import ', '').strip()
if module in self.import_mappings:
converted_lines.append(f"# {line}")
converted_lines.append(self.import_mappings[module])
else:
converted_lines.append(f"# {line} # No direct R equal")
elif 'from' in line and 'import' in line and 'as' in line:
converted_lines.append(f"# {line} # Deal with particular imports manually")
elif line.startswith('from '):
components = line.cut up(' import ')
module = components[0].substitute('from ', '').strip()
if module in self.import_mappings:
converted_lines.append(f"# {line}")
converted_lines.append(self.import_mappings[module])
else:
converted_lines.append(f"# {line} # No direct R equal")
else:
module = line.substitute('import ', '').strip()
if module in self.import_mappings:
converted_lines.append(f"# {line}")
converted_lines.append(self.import_mappings[module])
else:
converted_lines.append(f"# {line} # No direct R equal")
else:
converted_lines.append(line)
return 'n'.be part of(converted_lines)
def convert_functions(self, code: str) -> str:
"""Convert Python operate calls to R equivalents."""
for py_func, r_func in self.function_mappings.gadgets():
code = code.substitute(py_func, r_func)
return code
def apply_syntax_patterns(self, code: str) -> str:
"""Apply regex patterns to transform Python syntax to R syntax."""
for sample, substitute in self.syntax_patterns:
code = re.sub(sample, substitute, code)
return code
def convert_pandas_operations(self, code: str) -> str:
"""Convert frequent pandas operations to dplyr/tidyr equivalents."""
code = re.sub(r'df[['"](.*?)['"]]', r'df$1', code)
code = re.sub(r'df.(w+)', r'df$1', code)
code = re.sub(r'df[df[['"](.*?)['"]]s*([> str:
"""Convert matplotlib/seaborn plotting to ggplot2."""
conversions = [
(r'plt.figure(figsize=((d+),s*(d+)))', r'# Set figure size in ggplot theme'),
(r'plt.title(['"](.*?)['"])', r'+ ggtitle("1")'),
(r'plt.xlabel(['"](.*?)['"])', r'+ xlab("1")'),
(r'plt.ylabel(['"](.*?)['"])', r'+ ylab("1")'),
(r'plt.legend()', r'+ theme(legend.place="proper")'),
(r'plt.grid(True)', r'+ theme(panel.grid.main = element_line())'),
]
for sample, substitute in conversions:
code = re.sub(sample, substitute, code)
return code
def add_r_context(self, code: str) -> str:
"""Add R-specific context and feedback."""
r_header=""'# R Statistical Evaluation Code
# Transformed from Python utilizing Enhanced Converter with Gemini AI Validation
# Set up required packages: set up.packages(c("dplyr", "ggplot2", "tidyr", "readr"))
'''
return r_header + code
def convert_code(self, python_code: str) -> str:
"""Foremost conversion methodology that applies all transformations."""
code = python_code.strip()
code = self.convert_imports(code)
code = self.convert_functions(code)
code = self.convert_pandas_operations(code)
code = self.convert_plotting(code)
code = self.apply_syntax_patterns(code)
code = self.add_r_context(code)
return code
def convert_and_validate(self, python_code: str, use_gemini: bool = True) -> Dict:
"""
Convert Python code to R and validate with Gemini AI
"""
r_code = self.convert_code(python_code)
end result = {
"original_python": python_code,
"converted_r": r_code,
"validation": None
}
if use_gemini and self.validator.api_key:
print("🔍 Validating conversion with Gemini AI...")
validation = self.validator.validate_conversion(python_code, r_code)
end result["validation"] = validation
if validation.get("improved_code") and validation.get("improved_code") != r_code:
end result["final_r_code"] = validation["improved_code"]
else:
end result["final_r_code"] = r_code
else:
end result["final_r_code"] = r_code
if not self.validator.api_key:
end result["validation"] = {"observe": "Set GEMINI_API_KEY for AI validation"}
return end result
def print_results(self, outcomes: Dict):
"""Fairly print the conversion outcomes"""
print("=" * 80)
print("🐍 ORIGINAL PYTHON CODE")
print("=" * 80)
print(outcomes["original_python"])
print("n" + "=" * 80)
print("📊 CONVERTED R CODE")
print("=" * 80)
print(outcomes["final_r_code"])
if outcomes.get("validation"):
validation = outcomes["validation"]
print("n" + "=" * 80)
print("🤖 GEMINI AI VALIDATION")
print("=" * 80)
if validation.get("validation_score"):
print(f"📈 Rating: {validation['validation_score']}/100")
if validation.get("abstract"):
print(f"📝 Abstract: {validation['summary']}")
if validation.get("issues_found"):
print("n⚠️ Points Discovered:")
for difficulty in validation["issues_found"]:
print(f" • {difficulty}")
if validation.get("strategies"):
print("n💡 Ideas:")
for suggestion in validation["suggestions"]:
print(f" • {suggestion}")