Coverage for src/alprina_cli/agents/web3_auditor/gas_optimizer.py: 25%

168 statements  

« prev     ^ index     » next       coverage.py v7.11.3, created at 2025-11-14 11:27 +0100

1""" 

2Gas Optimization Analyzer - Week 4 Day 3 

3 

4Analyzes Solidity smart contracts for gas efficiency and provides 

5optimization recommendations to reduce deployment and execution costs. 

6 

7Features: 

8- Pattern-based gas inefficiency detection 

9- Gas cost estimation for common operations 

10- Storage optimization suggestions 

11- Function optimization recommendations 

12- Dollar cost calculations based on gas prices 

13 

14Author: Alprina Development Team 

15Date: 2025-11-13 

16""" 

17 

18import re 

19from dataclasses import dataclass 

20from typing import List, Optional, Dict, Tuple 

21from enum import Enum 

22 

23 

24class GasIssueType(Enum): 

25 """Types of gas optimization issues""" 

26 STORAGE_LAYOUT = "storage_layout" 

27 REDUNDANT_OPERATIONS = "redundant_operations" 

28 LOOP_OPTIMIZATION = "loop_optimization" 

29 VISIBILITY = "visibility" 

30 DATA_TYPES = "data_types" 

31 CACHING = "caching" 

32 SHORT_CIRCUIT = "short_circuit" 

33 UNCHECKED_MATH = "unchecked_math" 

34 IMMUTABLE = "immutable" 

35 CONSTANT = "constant" 

36 

37 

38@dataclass 

39class GasOptimization: 

40 """Represents a gas optimization opportunity""" 

41 issue_type: GasIssueType 

42 severity: str # "high", "medium", "low" 

43 title: str 

44 description: str 

45 file_path: str 

46 line_number: Optional[int] 

47 function_name: Optional[str] 

48 

49 # Gas estimates 

50 current_gas_cost: int # Estimated current cost 

51 optimized_gas_cost: int # Estimated after optimization 

52 gas_saved: int # Difference 

53 

54 # Recommendations 

55 code_before: Optional[str] = None 

56 code_after: Optional[str] = None 

57 recommendation: str = "" 

58 

59 # Financial impact (based on gas price) 

60 eth_saved_per_tx: float = 0.0 # ETH saved per transaction 

61 usd_saved_per_tx: float = 0.0 # USD saved per transaction (at $2000/ETH) 

62 

63 # Context 

64 confidence: str = "high" # high, medium, low 

65 references: List[str] = None 

66 

67 def __post_init__(self): 

68 if self.references is None: 

69 self.references = [] 

70 

71 # Calculate financial impact 

72 # Assume 50 gwei gas price (typical) 

73 gwei_price = 50 

74 eth_price_usd = 2000 

75 

76 # 1 gwei = 0.000000001 ETH 

77 self.eth_saved_per_tx = (self.gas_saved * gwei_price) / 1_000_000_000 

78 self.usd_saved_per_tx = self.eth_saved_per_tx * eth_price_usd 

79 

80 

81class GasOptimizationAnalyzer: 

82 """ 

83 Gas Optimization Analyzer 

84 

85 Detects gas inefficiencies and provides optimization recommendations 

86 for Solidity smart contracts. 

87 """ 

88 

89 # Gas costs for common operations (approximate, based on Ethereum) 

90 GAS_COSTS = { 

91 "SLOAD": 2100, # Storage read (cold) 

92 "SLOAD_WARM": 100, # Storage read (warm) 

93 "SSTORE": 20000, # Storage write (cold, non-zero to non-zero) 

94 "SSTORE_NEW": 20000, # Storage write (zero to non-zero) 

95 "SSTORE_DELETE": 5000, # Storage write (non-zero to zero) + refund 

96 "MLOAD": 3, # Memory read 

97 "MSTORE": 3, # Memory write 

98 "CALL": 2600, # External call (base) 

99 "ADD": 3, # Addition 

100 "MUL": 5, # Multiplication 

101 "DIV": 5, # Division 

102 "LOG": 375, # Event log (base) 

103 "CREATE": 32000, # Contract creation 

104 "JUMPDEST": 1, # Jump destination 

105 } 

106 

107 def __init__(self): 

108 self.optimizations: List[GasOptimization] = [] 

109 

110 def analyze_contract(self, source_code: str, file_path: str) -> List[GasOptimization]: 

111 """ 

112 Analyze contract for gas optimization opportunities 

113 

114 Args: 

115 source_code: Solidity source code 

116 file_path: Path to contract file 

117 

118 Returns: 

119 List of gas optimization opportunities 

120 """ 

121 self.optimizations = [] 

122 

123 # Parse contract structure 

124 lines = source_code.split('\n') 

125 

126 # Detect patterns 

127 self._detect_storage_optimization(lines, file_path) 

128 self._detect_redundant_operations(lines, file_path) 

129 self._detect_loop_optimization(lines, file_path) 

130 self._detect_visibility_optimization(lines, file_path) 

131 self._detect_data_type_optimization(lines, file_path) 

132 self._detect_caching_opportunities(lines, file_path) 

133 self._detect_short_circuit_optimization(lines, file_path) 

134 self._detect_unchecked_math(lines, file_path) 

135 self._detect_immutable_optimization(lines, file_path) 

136 self._detect_constant_optimization(lines, file_path) 

137 

138 return self.optimizations 

139 

140 def _detect_storage_optimization(self, lines: List[str], file_path: str): 

141 """Detect storage layout inefficiencies""" 

142 

143 # Pattern: Multiple small variables that could be packed 

144 storage_vars = [] 

145 

146 for i, line in enumerate(lines): 

147 # Match storage variable declarations 

148 match = re.search(r'(uint8|uint16|uint32|uint64|uint128|bool|address)\s+(?:public\s+|private\s+)?(\w+);', line) 

149 if match: 

150 var_type = match.group(1) 

151 var_name = match.group(2) 

152 storage_vars.append((i + 1, var_type, var_name)) 

153 

154 # Check if variables can be packed better 

155 if len(storage_vars) >= 2: 

156 # Simple heuristic: if we have multiple uint8/uint16/bool, they could be packed 

157 small_types = [v for v in storage_vars if v[1] in ['uint8', 'uint16', 'uint32', 'uint64', 'uint128', 'bool']] 

158 

159 if len(small_types) >= 2: 

160 # Calculate potential savings 

161 # Each storage slot costs ~20,000 gas to write (cold) 

162 # Packing can save multiple slots 

163 slots_before = len(small_types) 

164 slots_after = (sum(self._get_type_size(v[1]) for v in small_types) + 255) // 256 

165 slots_saved = slots_before - slots_after 

166 

167 if slots_saved > 0: 

168 gas_saved = slots_saved * self.GAS_COSTS["SSTORE"] 

169 

170 self.optimizations.append(GasOptimization( 

171 issue_type=GasIssueType.STORAGE_LAYOUT, 

172 severity="high" if gas_saved > 40000 else "medium", 

173 title="Storage Packing Optimization", 

174 description=f"Multiple small storage variables detected that could be packed into fewer storage slots. Currently using ~{slots_before} slots, could be optimized to ~{slots_after} slots.", 

175 file_path=file_path, 

176 line_number=small_types[0][0], 

177 function_name=None, 

178 current_gas_cost=slots_before * self.GAS_COSTS["SSTORE"], 

179 optimized_gas_cost=slots_after * self.GAS_COSTS["SSTORE"], 

180 gas_saved=gas_saved, 

181 code_before=f"// Current: {len(small_types)} separate declarations", 

182 code_after=f"// Optimized: Pack into {slots_after} storage slots by declaring sequentially", 

183 recommendation="Declare storage variables of smaller types sequentially to pack them into the same storage slots. Each storage slot is 32 bytes (256 bits).", 

184 references=[ 

185 "https://docs.soliditylang.org/en/latest/internals/layout_in_storage.html" 

186 ] 

187 )) 

188 

189 def _detect_redundant_operations(self, lines: List[str], file_path: str): 

190 """Detect redundant calculations and operations""" 

191 

192 for i, line in enumerate(lines): 

193 # Pattern: Same expression calculated multiple times 

194 # Example: x + y appears multiple times in same function 

195 

196 # Pattern: Reading same storage variable multiple times 

197 storage_reads = re.findall(r'(\w+)\[', line) 

198 if len(storage_reads) > len(set(storage_reads)): 

199 # Duplicate array/mapping access 

200 self.optimizations.append(GasOptimization( 

201 issue_type=GasIssueType.REDUNDANT_OPERATIONS, 

202 severity="medium", 

203 title="Redundant Storage Access", 

204 description="Same storage location accessed multiple times in a single expression. Consider caching in memory.", 

205 file_path=file_path, 

206 line_number=i + 1, 

207 function_name=self._extract_function_name(lines, i), 

208 current_gas_cost=len(storage_reads) * self.GAS_COSTS["SLOAD"], 

209 optimized_gas_cost=self.GAS_COSTS["SLOAD"] + (len(storage_reads) - 1) * self.GAS_COSTS["MLOAD"], 

210 gas_saved=(len(storage_reads) - 1) * (self.GAS_COSTS["SLOAD"] - self.GAS_COSTS["MLOAD"]), 

211 code_before=line.strip(), 

212 code_after="// Cache storage value in memory variable first", 

213 recommendation="Cache the storage value in a memory variable and reuse it." 

214 )) 

215 

216 def _detect_loop_optimization(self, lines: List[str], file_path: str): 

217 """Detect loop inefficiencies""" 

218 

219 for i, line in enumerate(lines): 

220 # Pattern: for (uint i = 0; i < array.length; i++) 

221 if 'for' in line and '.length' in line and '++' in line: 

222 # Reading array.length in every iteration 

223 self.optimizations.append(GasOptimization( 

224 issue_type=GasIssueType.LOOP_OPTIMIZATION, 

225 severity="high", 

226 title="Loop Length Not Cached", 

227 description="Array length is read from storage in every loop iteration. Cache it in a local variable.", 

228 file_path=file_path, 

229 line_number=i + 1, 

230 function_name=self._extract_function_name(lines, i), 

231 current_gas_cost=100 * self.GAS_COSTS["SLOAD"], # Assume 100 iterations 

232 optimized_gas_cost=self.GAS_COSTS["SLOAD"] + 100 * self.GAS_COSTS["MLOAD"], 

233 gas_saved=100 * (self.GAS_COSTS["SLOAD"] - self.GAS_COSTS["MLOAD"]), 

234 code_before=line.strip(), 

235 code_after="uint256 length = array.length; for (uint256 i = 0; i < length; ++i)", 

236 recommendation="Cache array.length in a local variable before the loop. Use ++i instead of i++ for slightly lower gas.", 

237 references=[ 

238 "https://github.com/crytic/slither/wiki/Detector-Documentation#costly-operations-inside-a-loop" 

239 ] 

240 )) 

241 

242 # Pattern: i++ vs ++i 

243 if re.search(r'for\s*\([^;]+;[^;]+;\s*\w+\+\+\s*\)', line): 

244 self.optimizations.append(GasOptimization( 

245 issue_type=GasIssueType.LOOP_OPTIMIZATION, 

246 severity="low", 

247 title="Use Prefix Increment (++i) Instead of Postfix (i++)", 

248 description="Prefix increment (++i) is slightly cheaper than postfix (i++) in loops.", 

249 file_path=file_path, 

250 line_number=i + 1, 

251 function_name=self._extract_function_name(lines, i), 

252 current_gas_cost=100 * 8, # Approximate difference per iteration 

253 optimized_gas_cost=100 * 5, 

254 gas_saved=100 * 3, 

255 code_before=re.search(r'(for\s*\([^)]+\))', line).group(1) if re.search(r'(for\s*\([^)]+\))', line) else line.strip(), 

256 code_after="for (uint256 i = 0; i < length; ++i)", 

257 recommendation="Use ++i instead of i++ in loops to save gas." 

258 )) 

259 

260 def _detect_visibility_optimization(self, lines: List[str], file_path: str): 

261 """Detect functions that could have more restrictive visibility""" 

262 

263 for i, line in enumerate(lines): 

264 # Pattern: public function not called internally 

265 if 'function' in line and 'public' in line and 'view' not in line and 'pure' not in line: 

266 func_match = re.search(r'function\s+(\w+)\s*\(', line) 

267 if func_match: 

268 func_name = func_match.group(1) 

269 

270 # Simple heuristic: if function name doesn't appear elsewhere, could be external 

271 occurrences = sum(1 for l in lines if func_name in l) 

272 if occurrences == 1: # Only the declaration 

273 self.optimizations.append(GasOptimization( 

274 issue_type=GasIssueType.VISIBILITY, 

275 severity="low", 

276 title=f"Function '{func_name}' Can Be External", 

277 description="Function is marked 'public' but appears to never be called internally. Use 'external' to save gas.", 

278 file_path=file_path, 

279 line_number=i + 1, 

280 function_name=func_name, 

281 current_gas_cost=1000, # Approximate overhead for public 

282 optimized_gas_cost=600, 

283 gas_saved=400, 

284 code_before=line.strip(), 

285 code_after=line.strip().replace('public', 'external'), 

286 recommendation="Change 'public' to 'external' for functions not called internally. External functions can read arguments from calldata instead of copying to memory." 

287 )) 

288 

289 def _detect_data_type_optimization(self, lines: List[str], file_path: str): 

290 """Detect suboptimal data type choices""" 

291 

292 for i, line in enumerate(lines): 

293 # Pattern: Using uint8 in memory/function params (inefficient) 

294 if re.search(r'(uint8|uint16)\s+memory\s+\w+', line): 

295 self.optimizations.append(GasOptimization( 

296 issue_type=GasIssueType.DATA_TYPES, 

297 severity="low", 

298 title="Use uint256 for Memory Variables", 

299 description="Using uint8/uint16 for memory variables is less efficient than uint256. The EVM operates on 256-bit words.", 

300 file_path=file_path, 

301 line_number=i + 1, 

302 function_name=self._extract_function_name(lines, i), 

303 current_gas_cost=50, 

304 optimized_gas_cost=30, 

305 gas_saved=20, 

306 code_before=line.strip(), 

307 code_after=line.strip().replace('uint8', 'uint256').replace('uint16', 'uint256'), 

308 recommendation="Use uint256 for memory variables and function parameters unless you need storage packing." 

309 )) 

310 

311 def _detect_caching_opportunities(self, lines: List[str], file_path: str): 

312 """Detect values that should be cached""" 

313 

314 for i, line in enumerate(lines): 

315 # Pattern: msg.sender used multiple times 

316 sender_count = line.count('msg.sender') 

317 if sender_count > 1: 

318 gas_saved = (sender_count - 1) * 100 # Approximate 

319 self.optimizations.append(GasOptimization( 

320 issue_type=GasIssueType.CACHING, 

321 severity="low", 

322 title="Cache msg.sender", 

323 description=f"msg.sender is accessed {sender_count} times. Cache it in a local variable.", 

324 file_path=file_path, 

325 line_number=i + 1, 

326 function_name=self._extract_function_name(lines, i), 

327 current_gas_cost=sender_count * 100, 

328 optimized_gas_cost=100 + (sender_count - 1) * 3, 

329 gas_saved=gas_saved, 

330 code_before=line.strip(), 

331 code_after="address sender = msg.sender; // Cache and reuse", 

332 recommendation="Cache msg.sender in a local variable at the start of the function." 

333 )) 

334 

335 def _detect_short_circuit_optimization(self, lines: List[str], file_path: str): 

336 """Detect conditions that could benefit from short-circuiting""" 

337 

338 for i, line in enumerate(lines): 

339 # Pattern: require(expensive_check() && cheap_check()) 

340 # Should be: require(cheap_check() && expensive_check()) 

341 if 'require' in line and '&&' in line: 

342 # This is a heuristic - we can't determine which is cheaper without deeper analysis 

343 self.optimizations.append(GasOptimization( 

344 issue_type=GasIssueType.SHORT_CIRCUIT, 

345 severity="low", 

346 title="Optimize Condition Order", 

347 description="Order conditions from cheapest to most expensive in boolean expressions. Short-circuiting will skip expensive operations.", 

348 file_path=file_path, 

349 line_number=i + 1, 

350 function_name=self._extract_function_name(lines, i), 

351 current_gas_cost=1000, 

352 optimized_gas_cost=500, 

353 gas_saved=500, 

354 code_before=line.strip(), 

355 code_after="// Order: cheap check && expensive check", 

356 recommendation="Put the cheapest condition first in && expressions to take advantage of short-circuit evaluation.", 

357 confidence="medium" 

358 )) 

359 

360 def _detect_unchecked_math(self, lines: List[str], file_path: str): 

361 """Detect arithmetic that could use unchecked blocks (Solidity 0.8+)""" 

362 

363 for i, line in enumerate(lines): 

364 # Pattern: i++ in loops (Solidity 0.8+ has overflow checks) 

365 if 'for' in line and ('++' in line or '+=' in line): 

366 self.optimizations.append(GasOptimization( 

367 issue_type=GasIssueType.UNCHECKED_MATH, 

368 severity="medium", 

369 title="Use Unchecked Block for Loop Counter", 

370 description="Loop counters in Solidity 0.8+ have overflow checks. Use unchecked{} block for loop increments to save gas.", 

371 file_path=file_path, 

372 line_number=i + 1, 

373 function_name=self._extract_function_name(lines, i), 

374 current_gas_cost=100 * 50, # Assume 100 iterations 

375 optimized_gas_cost=100 * 30, 

376 gas_saved=100 * 20, 

377 code_before=line.strip(), 

378 code_after="for (uint256 i; i < length;) { ... unchecked { ++i; } }", 

379 recommendation="Wrap loop increments in unchecked{} blocks. Loop counters will never realistically overflow.", 

380 references=[ 

381 "https://docs.soliditylang.org/en/latest/control-structures.html#checked-or-unchecked-arithmetic" 

382 ] 

383 )) 

384 

385 def _detect_immutable_optimization(self, lines: List[str], file_path: str): 

386 """Detect variables that could be immutable""" 

387 

388 for i, line in enumerate(lines): 

389 # Pattern: Variable declared but only assigned in constructor 

390 if re.search(r'(address|uint256)\s+(?:public\s+|private\s+)?(\w+);', line): 

391 var_match = re.search(r'(address|uint256)\s+(?:public\s+|private\s+)?(\w+);', line) 

392 if var_match and 'immutable' not in line and 'constant' not in line: 

393 var_type = var_match.group(1) 

394 var_name = var_match.group(2) 

395 

396 # Check if it's assigned in constructor 

397 constructor_assignment = False 

398 other_assignments = 0 

399 

400 in_constructor = False 

401 for j, l in enumerate(lines): 

402 if 'constructor' in l: 

403 in_constructor = True 

404 if in_constructor and f'{var_name} =' in l: 

405 constructor_assignment = True 

406 if not in_constructor and f'{var_name} =' in l and j != i: 

407 other_assignments += 1 

408 if in_constructor and '}' in l: 

409 in_constructor = False 

410 

411 if constructor_assignment and other_assignments == 0: 

412 self.optimizations.append(GasOptimization( 

413 issue_type=GasIssueType.IMMUTABLE, 

414 severity="medium", 

415 title=f"Variable '{var_name}' Can Be Immutable", 

416 description="Variable is only assigned in constructor. Mark it as immutable to save gas.", 

417 file_path=file_path, 

418 line_number=i + 1, 

419 function_name=None, 

420 current_gas_cost=self.GAS_COSTS["SLOAD"], 

421 optimized_gas_cost=3, # Immutable is copied directly into bytecode 

422 gas_saved=self.GAS_COSTS["SLOAD"] - 3, 

423 code_before=line.strip(), 

424 code_after=line.strip().replace(';', ' immutable;'), 

425 recommendation="Mark as 'immutable' to embed value in contract bytecode. Saves 2100 gas per read." 

426 )) 

427 

428 def _detect_constant_optimization(self, lines: List[str], file_path: str): 

429 """Detect variables that could be constant""" 

430 

431 for i, line in enumerate(lines): 

432 # Pattern: Variable with literal value that never changes 

433 if re.search(r'(uint256|string)\s+(?:public\s+|private\s+)?(\w+)\s*=\s*["\d]', line): 

434 if 'constant' not in line and 'immutable' not in line: 

435 var_match = re.search(r'(uint256|string)\s+(?:public\s+|private\s+)?(\w+)', line) 

436 if var_match: 

437 var_name = var_match.group(2) 

438 

439 # Check if variable is never reassigned 

440 reassigned = any(f'{var_name} =' in l and i != j for j, l in enumerate(lines)) 

441 

442 if not reassigned: 

443 self.optimizations.append(GasOptimization( 

444 issue_type=GasIssueType.CONSTANT, 

445 severity="medium", 

446 title=f"Variable '{var_name}' Can Be Constant", 

447 description="Variable has a fixed value and is never reassigned. Mark it as constant.", 

448 file_path=file_path, 

449 line_number=i + 1, 

450 function_name=None, 

451 current_gas_cost=self.GAS_COSTS["SLOAD"], 

452 optimized_gas_cost=0, # Constants are free 

453 gas_saved=self.GAS_COSTS["SLOAD"], 

454 code_before=line.strip(), 

455 code_after=line.strip().replace('=', 'constant ='), 

456 recommendation="Mark as 'constant' to replace storage access with direct value substitution. Saves 2100 gas per read." 

457 )) 

458 

459 def _get_type_size(self, type_name: str) -> int: 

460 """Get size of type in bits""" 

461 if 'uint8' in type_name or 'int8' in type_name or type_name == 'bool': 

462 return 8 

463 elif 'uint16' in type_name or 'int16' in type_name: 

464 return 16 

465 elif 'uint32' in type_name or 'int32' in type_name: 

466 return 32 

467 elif 'uint64' in type_name or 'int64' in type_name: 

468 return 64 

469 elif 'uint128' in type_name or 'int128' in type_name: 

470 return 128 

471 elif 'address' in type_name: 

472 return 160 

473 else: 

474 return 256 

475 

476 def _extract_function_name(self, lines: List[str], current_line: int) -> Optional[str]: 

477 """Extract function name for a given line""" 

478 # Look backwards to find function declaration 

479 for i in range(current_line, -1, -1): 

480 if 'function' in lines[i]: 

481 match = re.search(r'function\s+(\w+)\s*\(', lines[i]) 

482 if match: 

483 return match.group(1) 

484 return None 

485 

486 def generate_report(self) -> Dict: 

487 """Generate comprehensive gas optimization report""" 

488 total_gas_saved = sum(opt.gas_saved for opt in self.optimizations) 

489 total_eth_saved = sum(opt.eth_saved_per_tx for opt in self.optimizations) 

490 total_usd_saved = sum(opt.usd_saved_per_tx for opt in self.optimizations) 

491 

492 by_severity = { 

493 'high': [o for o in self.optimizations if o.severity == 'high'], 

494 'medium': [o for o in self.optimizations if o.severity == 'medium'], 

495 'low': [o for o in self.optimizations if o.severity == 'low'] 

496 } 

497 

498 by_type = {} 

499 for opt in self.optimizations: 

500 type_name = opt.issue_type.value 

501 if type_name not in by_type: 

502 by_type[type_name] = [] 

503 by_type[type_name].append(opt) 

504 

505 return { 

506 'total_optimizations': len(self.optimizations), 

507 'total_gas_saved': total_gas_saved, 

508 'total_eth_saved_per_tx': total_eth_saved, 

509 'total_usd_saved_per_tx': total_usd_saved, 

510 'by_severity': {k: len(v) for k, v in by_severity.items()}, 

511 'by_type': {k: len(v) for k, v in by_type.items()}, 

512 'optimizations': self.optimizations 

513 }