emailsec._authentication_results

  1from dataclasses import dataclass
  2from pyparsing import (
  3    Word,
  4    alphas,
  5    alphanums,
  6    nums,
  7    Literal,
  8    Optional,
  9    Group,
 10    OneOrMore,
 11    QuotedString,
 12    ZeroOrMore,
 13    printables,
 14    Suppress,
 15    White,
 16    ParseException,
 17)
 18
 19from emailsec.arc import ARCChainStatus
 20
 21# RFC 8601 ABNF components following the specification
 22WSP = White(" \t", exact=1)
 23CRLF = White("\r", exact=1) + White("\n", exact=1)
 24FWS = Optional(ZeroOrMore(WSP) + CRLF) + OneOrMore(WSP)
 25
 26authserv_id = Word(alphanums + ".-_")
 27method = Word(alphas, alphanums + "-")
 28result = Word(alphas)
 29ptype = Word(alphas)
 30dot_atom_text = Word(alphanums + ".-_")
 31# More flexible value that can handle email addresses and other real-world content
 32value = Word(printables, exclude_chars=";") | QuotedString('"', escChar="\\")
 33
 34property = Group(
 35    Optional(FWS).suppress()
 36    + ptype
 37    + Literal(".").suppress()
 38    + dot_atom_text
 39    + Optional(FWS).suppress()
 40    + Literal("=").suppress()
 41    + Optional(FWS).suppress()
 42    + value
 43    + Optional(FWS).suppress()
 44)
 45
 46resinfo = Group(
 47    Optional(FWS).suppress()
 48    + method
 49    + Optional(FWS).suppress()
 50    + Literal("=").suppress()
 51    + Optional(FWS).suppress()
 52    + result
 53    + Optional(FWS).suppress()
 54    + ZeroOrMore(property)
 55)
 56
 57instance_tag = Suppress("i=") + Word(nums)
 58
 59arc_auth_results = (
 60    Optional(instance_tag)
 61    + Suppress(Optional(";"))
 62    + Optional(FWS).suppress()
 63    + authserv_id
 64    + Optional(FWS).suppress()
 65    + Suppress(";")
 66    + Optional(FWS).suppress()
 67    + OneOrMore(resinfo + Suppress(Optional(";")))
 68)
 69
 70
 71@dataclass
 72class AuthResult:
 73    """Single authentication method result"""
 74
 75    method: str  # spf, dkim, dmarc, arc
 76    result: str  # pass, fail, temperror, etc
 77    properties: dict[str, str]  # smtp.mailfrom, header.d, etc
 78
 79
 80@dataclass
 81class ARCAuthenticationResults:
 82    """Parsed ARC-Authentication-Results header (RFC 8617 Section 4.1.1)"""
 83
 84    instance: int
 85    authserv_id: str
 86    results: list[AuthResult]
 87
 88
 89def normalize_header(header_value: str) -> str:
 90    """
 91    Pre-process header value to normalize multi-line properties and remove problematic whitespace.
 92
 93    This handles real-world cases like:
 94    - Multi-line property values (e.g., smtp.mailfrom=value\ncontinued)
 95    - Comments in parentheses
 96    - Extra whitespace and newlines
 97    """
 98    if not header_value:
 99        return header_value
100
101    # Split into lines and process each line
102    lines = header_value.split("\n")
103    normalized_parts: list[str] = []
104
105    for line in lines:
106        line = line.strip()
107        if not line:
108            continue
109
110        # If this line starts with a property continuation (no method=result pattern)
111        if not ("=" in line and not line.startswith(" ") and not line.startswith("\t")):
112            # This might be a continuation of a previous property value
113            if normalized_parts and "=" in normalized_parts[-1]:
114                # Append to the last property value
115                normalized_parts[-1] += " " + line
116            else:
117                normalized_parts.append(line)
118        else:
119            # This is a new method=result or property
120            normalized_parts.append(line)
121
122    # Join all parts and clean up extra whitespace
123    normalized = " ".join(normalized_parts)
124
125    # Remove comments in parentheses (RFC 8601 doesn't specify these)
126    import re
127
128    normalized = re.sub(r"\([^)]*\)", "", normalized)
129
130    # Clean up extra whitespace around semicolons and equals
131    normalized = re.sub(r"\s*;\s*", ";", normalized)
132    normalized = re.sub(r"\s*=\s*", "=", normalized)
133
134    # Clean up multiple spaces
135    normalized = re.sub(r"\s+", " ", normalized)
136
137    return normalized.strip()
138
139
140def parse_arc_authentication_results(header_value: str) -> ARCAuthenticationResults:
141    """
142    Parse ARC-Authentication-Results header per RFC 8617 Section 4.1.1.
143
144    Format: i=1; authserv-id; method=result property=value
145    Example: i=1; mx.example.com; spf=pass smtp.mailfrom=example.org;
146             dkim=pass header.d=example.org
147    """
148    try:
149        if not header_value or not header_value.strip():
150            raise ValueError("Empty header value")
151
152        # Pre-process the header to normalize multi-line properties
153        normalized_header = normalize_header(header_value)
154
155        parsed = arc_auth_results.parse_string(normalized_header)
156
157        # Extract components
158        instance = 1
159        authserv_id = None
160        results = []
161
162        for item in parsed:
163            if isinstance(item, str) and item.isdigit():
164                instance = int(item)
165            elif isinstance(item, str) and item.strip() and authserv_id is None:
166                authserv_id = item.strip()
167            elif hasattr(item, "as_list"):
168                # This is a resinfo group
169                resinfo_list = item.as_list()
170                if len(resinfo_list) >= 2:
171                    method = resinfo_list[0]
172                    result = resinfo_list[1]
173
174                    properties = {}
175                    for i in range(2, len(resinfo_list)):
176                        if (
177                            isinstance(resinfo_list[i], list)
178                            and len(resinfo_list[i]) >= 3
179                        ):
180                            # This is a property group: [ptype, dot_atom_text, value]
181                            prop_list = resinfo_list[i]
182                            prop_name = (
183                                prop_list[0] + "." + prop_list[1]
184                            )  # ptype.dot_atom_text
185                            prop_value = prop_list[2]
186                            properties[prop_name] = prop_value
187
188                    results.append(
189                        AuthResult(method=method, result=result, properties=properties)
190                    )
191
192        # Ensure authserv_id is always a string (default to empty string if None)
193        if authserv_id is None:
194            authserv_id = ""
195
196        return ARCAuthenticationResults(
197            instance=instance, authserv_id=authserv_id, results=results
198        )
199    except ParseException as e:
200        raise ValueError(f"Invalid ARC-Authentication-Results: {e}")
201    except Exception as e:
202        raise ValueError(f"Invalid ARC-Authentication-Results: {e}")
203
204
205def extract_original_auth_results(
206    arc_chain_status: ARCChainStatus, aar_header: bytes
207) -> dict[str, str] | None:
208    """
209    Extract trusted authentication results from ARC chain.
210
211    Per RFC 8617 Section 7.2: "If the ARC chain validates, the Authentication-Results
212    from the ARC-Authentication-Results header field SHOULD be considered equivalent
213    to locally performed authentication checks."
214    """
215    if arc_chain_status != ARCChainStatus.PASS:
216        return None
217
218    try:
219        parsed = parse_arc_authentication_results(aar_header.decode())
220
221        # Extract key results for DMARC override consideration
222        auth_results = {}
223        for result in parsed.results:
224            if result.method in ["spf", "dkim", "dmarc"]:
225                auth_results[result.method] = result.result
226                # Could also extract properties like smtp.mailfrom, header.d
227
228        return auth_results
229    except Exception:
230        return None
WSP = <SP><TAB>
CRLF = {<CR> <LF>}
FWS = {[[<SP><TAB>]... {<CR> <LF>}] {<SP><TAB>}...}
authserv_id = W:(-.0-9A-Z_a-z)
method = W:(A-Za-z, -0-9A-Za-z)
result = W:(A-Za-z)
ptype = W:(A-Za-z)
dot_atom_text = W:(-.0-9A-Z_a-z)
value = {W:(!-:<-~) | string enclosed in '"'}
property = Group:({{{{{{{Suppress:([[[<SP><TAB>]... {<CR> <LF>}] {<SP><TAB>}...]) W:(A-Za-z)} Suppress:('.')} W:(-.0-9A-Z_a-z)} Suppress:([[[<SP><TAB>]... {<CR> <LF>}] {<SP><TAB>}...])} Suppress:('=')} Suppress:([[[<SP><TAB>]... {<CR> <LF>}] {<SP><TAB>}...])} {W:(!-:<-~) | string enclosed in '"'} Suppress:([[[<SP><TAB>]... {<CR> <LF>}] {<SP><TAB>}...])})
resinfo = Group:({{{{{{{Suppress:([[[<SP><TAB>]... {<CR> <LF>}] {<SP><TAB>}...]) W:(A-Za-z, -0-9A-Za-z)} Suppress:([[[<SP><TAB>]... {<CR> <LF>}] {<SP><TAB>}...])} Suppress:('=')} Suppress:([[[<SP><TAB>]... {<CR> <LF>}] {<SP><TAB>}...])} W:(A-Za-z)} Suppress:([[[<SP><TAB>]... {<CR> <LF>}] {<SP><TAB>}...])} [Group:({{{{{{{Suppress:([[[<SP><TAB>]... {<CR> <LF>}] {<SP><TAB>}...]) W:(A-Za-z)} Suppress:('.')} W:(-.0-9A-Z_a-z)} Suppress:([[[<SP><TAB>]... {<CR> <LF>}] {<SP><TAB>}...])} Suppress:('=')} Suppress:([[[<SP><TAB>]... {<CR> <LF>}] {<SP><TAB>}...])} {W:(!-:<-~) | string enclosed in '"'} Suppress:([[[<SP><TAB>]... {<CR> <LF>}] {<SP><TAB>}...])})]...})
instance_tag = {Suppress:('i=') W:(0-9)}
arc_auth_results = {{{{{{{[Suppress:('i=') W:(0-9)] Suppress:([';'])} Suppress:([[[<SP><TAB>]... {<CR> <LF>}] {<SP><TAB>}...])} W:(-.0-9A-Z_a-z)} Suppress:([[[<SP><TAB>]... {<CR> <LF>}] {<SP><TAB>}...])} Suppress:(';')} Suppress:([[[<SP><TAB>]... {<CR> <LF>}] {<SP><TAB>}...])} {{Group:({{{{{{{Suppress:([[[<SP><TAB>]... {<CR> <LF>}] {<SP><TAB>}...]) W:(A-Za-z, -0-9A-Za-z)} Suppress:([[[<SP><TAB>]... {<CR> <LF>}] {<SP><TAB>}...])} Suppress:('=')} Suppress:([[[<SP><TAB>]... {<CR> <LF>}] {<SP><TAB>}...])} W:(A-Za-z)} Suppress:([[[<SP><TAB>]... {<CR> <LF>}] {<SP><TAB>}...])} [Group:({{{{{{{Suppress:([[[<SP><TAB>]... {<CR> <LF>}] {<SP><TAB>}...]) W:(A-Za-z)} Suppress:('.')} W:(-.0-9A-Z_a-z)} Suppress:([[[<SP><TAB>]... {<CR> <LF>}] {<SP><TAB>}...])} Suppress:('=')} Suppress:([[[<SP><TAB>]... {<CR> <LF>}] {<SP><TAB>}...])} {W:(!-:<-~) | string enclosed in '"'} Suppress:([[[<SP><TAB>]... {<CR> <LF>}] {<SP><TAB>}...])})]...}) Suppress:([';'])}}...}
@dataclass
class AuthResult:
72@dataclass
73class AuthResult:
74    """Single authentication method result"""
75
76    method: str  # spf, dkim, dmarc, arc
77    result: str  # pass, fail, temperror, etc
78    properties: dict[str, str]  # smtp.mailfrom, header.d, etc

Single authentication method result

AuthResult(method: str, result: str, properties: dict[str, str])
method: str
result: str
properties: dict[str, str]
@dataclass
class ARCAuthenticationResults:
81@dataclass
82class ARCAuthenticationResults:
83    """Parsed ARC-Authentication-Results header (RFC 8617 Section 4.1.1)"""
84
85    instance: int
86    authserv_id: str
87    results: list[AuthResult]

Parsed ARC-Authentication-Results header (RFC 8617 Section 4.1.1)

ARCAuthenticationResults( instance: int, authserv_id: str, results: list[AuthResult])
instance: int
authserv_id: str
results: list[AuthResult]
def normalize_header(header_value: str) -> str:
 90def normalize_header(header_value: str) -> str:
 91    """
 92    Pre-process header value to normalize multi-line properties and remove problematic whitespace.
 93
 94    This handles real-world cases like:
 95    - Multi-line property values (e.g., smtp.mailfrom=value\ncontinued)
 96    - Comments in parentheses
 97    - Extra whitespace and newlines
 98    """
 99    if not header_value:
100        return header_value
101
102    # Split into lines and process each line
103    lines = header_value.split("\n")
104    normalized_parts: list[str] = []
105
106    for line in lines:
107        line = line.strip()
108        if not line:
109            continue
110
111        # If this line starts with a property continuation (no method=result pattern)
112        if not ("=" in line and not line.startswith(" ") and not line.startswith("\t")):
113            # This might be a continuation of a previous property value
114            if normalized_parts and "=" in normalized_parts[-1]:
115                # Append to the last property value
116                normalized_parts[-1] += " " + line
117            else:
118                normalized_parts.append(line)
119        else:
120            # This is a new method=result or property
121            normalized_parts.append(line)
122
123    # Join all parts and clean up extra whitespace
124    normalized = " ".join(normalized_parts)
125
126    # Remove comments in parentheses (RFC 8601 doesn't specify these)
127    import re
128
129    normalized = re.sub(r"\([^)]*\)", "", normalized)
130
131    # Clean up extra whitespace around semicolons and equals
132    normalized = re.sub(r"\s*;\s*", ";", normalized)
133    normalized = re.sub(r"\s*=\s*", "=", normalized)
134
135    # Clean up multiple spaces
136    normalized = re.sub(r"\s+", " ", normalized)
137
138    return normalized.strip()

Pre-process header value to normalize multi-line properties and remove problematic whitespace.

This handles real-world cases like:
- Multi-line property values (e.g., smtp.mailfrom=value

continued) - Comments in parentheses - Extra whitespace and newlines

def parse_arc_authentication_results( header_value: str) -> ARCAuthenticationResults:
141def parse_arc_authentication_results(header_value: str) -> ARCAuthenticationResults:
142    """
143    Parse ARC-Authentication-Results header per RFC 8617 Section 4.1.1.
144
145    Format: i=1; authserv-id; method=result property=value
146    Example: i=1; mx.example.com; spf=pass smtp.mailfrom=example.org;
147             dkim=pass header.d=example.org
148    """
149    try:
150        if not header_value or not header_value.strip():
151            raise ValueError("Empty header value")
152
153        # Pre-process the header to normalize multi-line properties
154        normalized_header = normalize_header(header_value)
155
156        parsed = arc_auth_results.parse_string(normalized_header)
157
158        # Extract components
159        instance = 1
160        authserv_id = None
161        results = []
162
163        for item in parsed:
164            if isinstance(item, str) and item.isdigit():
165                instance = int(item)
166            elif isinstance(item, str) and item.strip() and authserv_id is None:
167                authserv_id = item.strip()
168            elif hasattr(item, "as_list"):
169                # This is a resinfo group
170                resinfo_list = item.as_list()
171                if len(resinfo_list) >= 2:
172                    method = resinfo_list[0]
173                    result = resinfo_list[1]
174
175                    properties = {}
176                    for i in range(2, len(resinfo_list)):
177                        if (
178                            isinstance(resinfo_list[i], list)
179                            and len(resinfo_list[i]) >= 3
180                        ):
181                            # This is a property group: [ptype, dot_atom_text, value]
182                            prop_list = resinfo_list[i]
183                            prop_name = (
184                                prop_list[0] + "." + prop_list[1]
185                            )  # ptype.dot_atom_text
186                            prop_value = prop_list[2]
187                            properties[prop_name] = prop_value
188
189                    results.append(
190                        AuthResult(method=method, result=result, properties=properties)
191                    )
192
193        # Ensure authserv_id is always a string (default to empty string if None)
194        if authserv_id is None:
195            authserv_id = ""
196
197        return ARCAuthenticationResults(
198            instance=instance, authserv_id=authserv_id, results=results
199        )
200    except ParseException as e:
201        raise ValueError(f"Invalid ARC-Authentication-Results: {e}")
202    except Exception as e:
203        raise ValueError(f"Invalid ARC-Authentication-Results: {e}")

Parse ARC-Authentication-Results header per RFC 8617 Section 4.1.1.

Format: i=1; authserv-id; method=result property=value Example: i=1; mx.example.com; spf=pass smtp.mailfrom=example.org; dkim=pass header.d=example.org

def extract_original_auth_results( arc_chain_status: emailsec.arc.ARCChainStatus, aar_header: bytes) -> dict[str, str] | None:
206def extract_original_auth_results(
207    arc_chain_status: ARCChainStatus, aar_header: bytes
208) -> dict[str, str] | None:
209    """
210    Extract trusted authentication results from ARC chain.
211
212    Per RFC 8617 Section 7.2: "If the ARC chain validates, the Authentication-Results
213    from the ARC-Authentication-Results header field SHOULD be considered equivalent
214    to locally performed authentication checks."
215    """
216    if arc_chain_status != ARCChainStatus.PASS:
217        return None
218
219    try:
220        parsed = parse_arc_authentication_results(aar_header.decode())
221
222        # Extract key results for DMARC override consideration
223        auth_results = {}
224        for result in parsed.results:
225            if result.method in ["spf", "dkim", "dmarc"]:
226                auth_results[result.method] = result.result
227                # Could also extract properties like smtp.mailfrom, header.d
228
229        return auth_results
230    except Exception:
231        return None

Extract trusted authentication results from ARC chain.

Per RFC 8617 Section 7.2: "If the ARC chain validates, the Authentication-Results from the ARC-Authentication-Results header field SHOULD be considered equivalent to locally performed authentication checks."