"""Document validation decision tree graph setup functions"""
from compass.common import (
setup_graph_no_nodes,
llm_response_starts_with_yes,
llm_response_starts_with_no,
)
[docs]
def setup_graph_correct_document_type(**kwargs):
"""Setup graph to check for correct document type in legal text
Parameters
----------
**kwargs
Keyword-value pairs to add to graph.
Returns
-------
nx.DiGraph
Graph instance that can be used to initialize an
`elm.tree.DecisionTree`.
"""
doc_is_from_ocr = kwargs.pop("doc_is_from_ocr", False)
is_draft_prompt = [
"Does this text appear to be from a document that is currently "
"being edited or formatted, such as a draft or work in progress?\n"
"\n**Important**:\n"
]
if doc_is_from_ocr:
is_draft_prompt.append(
"* Disregard formatting inconsistencies, typographical errors, or "
"visual artifacts (such as OCR noise, broken lines, or unusual "
"spacing). These do **not** indicate draft status unless "
"supported by actual content-based cues."
)
is_draft_prompt.append(
"* Do **not** assume that a document is a draft simply because it "
"refers to amendments, revisions of law, or changing legal "
"standards. Many finalized legal documents contain such "
"references as part of their normal content.\n"
"\nFocus instead on signs of incompleteness or active "
"editing, such as (but not limited to):\n"
"* Placeholder content (e.g., 'TBD', 'insert text here', etc.)\n"
"* Comments or revision marks\n"
"* Incomplete sentences or headings\n"
"* Unfinished sections or abrupt endings\n"
"* Explicit labels like 'draft', 'working version', or 'not "
"final'\n\n"
"Please begin your answer with **Yes** or **No**, and briefly "
"explain your reasoning based only on these content-based signals."
)
is_draft_prompt = "\n".join(is_draft_prompt)
G = setup_graph_no_nodes(**kwargs) # noqa: N806
G.add_node(
"init",
prompt=(
"Does the following text resemble an excerpt from a legal "
"statute, such as an ordinance or code?"
"Please start your response with either 'Yes' or 'No' and "
"briefly explain your answer."
'\n\n"""\n{text}\n"""'
),
)
G.add_edge("init", "check_for_laws", condition=llm_response_starts_with_no)
G.add_node(
"check_for_laws",
prompt=(
"Does the text excerpt detail in-effect legal statutes? "
"Please start your response with either 'Yes' or 'No' and "
"briefly explain your answer."
),
)
G.add_edge("init", "is_model", condition=llm_response_starts_with_yes)
G.add_edge(
"check_for_laws", "is_model", condition=llm_response_starts_with_yes
)
G.add_node(
"is_model",
prompt=(
"Does this text appear to be from a model ordinance or other "
"kind of model law? "
"Please start your response with either 'Yes' or 'No' and briefly "
"explain why you chose your answer."
),
)
G.add_edge("is_model", "is_pd", condition=llm_response_starts_with_no)
G.add_node(
"is_pd",
prompt=(
"Does this text appear to be from a project planning document? "
"Please start your response with either 'Yes' or 'No' and briefly "
"explain why you chose your answer."
),
)
G.add_edge("is_pd", "is_pres", condition=llm_response_starts_with_no)
G.add_node(
"is_pres",
prompt=(
"Does this text appear to be from a presentation? "
"Please start your response with either 'Yes' or 'No' and briefly "
"explain why you chose your answer."
),
)
G.add_edge("is_pres", "is_draft", condition=llm_response_starts_with_no)
G.add_node(
"is_draft",
prompt=(
"Does this text appear to be from a document that is currently "
"being edited or formatted, such as a draft or work in progress?\n"
"\n**Important**:\n\n"
"* Disregard formatting inconsistencies, typographical errors, or "
"visual artifacts (such as OCR noise, broken lines, or unusual "
"spacing). These do **not** indicate draft status unless "
"supported by actual content-based cues.\n"
"* Do **not** assume that a document is a draft simply because it "
"refers to amendments, revisions of law, or changing legal "
"standards. Many finalized legal documents contain such "
"references as part of their normal content.\n"
"\nFocus instead on signs of incompleteness or active "
"editing, such as (but not limited to):\n"
"* Placeholder content (e.g., 'TBD', 'insert text here', etc.)\n"
"* Comments or revision marks\n"
"* Incomplete sentences or headings\n"
"* Unfinished sections or abrupt endings\n"
"* Explicit labels like 'draft', 'working version', or 'not "
"final'\n\n"
"Please begin your answer with **Yes** or **No**, and briefly "
"explain your reasoning based only on these content-based signals."
),
)
G.add_edge("is_draft", "is_report", condition=llm_response_starts_with_no)
G.add_node(
"is_report",
prompt=(
"Does this text appear to be from a report or summary "
"document? "
"Please start your response with either 'Yes' or 'No' and briefly "
"explain why you chose your answer."
),
)
G.add_edge(
"is_report", "is_article", condition=llm_response_starts_with_no
)
G.add_node(
"is_article",
prompt=(
"Does this text appear to be from a news article or "
"other media? "
"Please start your response with either 'Yes' or 'No' and briefly "
"explain why you chose your answer."
),
)
G.add_edge(
"is_article", "is_court_doc", condition=llm_response_starts_with_no
)
G.add_node(
"is_court_doc",
prompt=(
"Does this text appear to be from a lawsuit, legal complaint, "
"application form, or other legal or court document that is not "
"intended to detail specific laws, ordinances, and/or "
"regulations? "
"Please start your response with either 'Yes' or 'No' and briefly "
"explain why you chose your answer."
),
)
G.add_edge("is_court_doc", "final", condition=llm_response_starts_with_no)
G.add_node(
"final",
prompt=(
"Respond based on our entire conversation so far. Return your "
"answer as a dictionary in JSON format (not markdown). Your JSON "
"file must include exactly three keys:\n\n"
"1. **'summary'** (string) - A concise summary of the text.\n"
"2. **'type'** (string) - The best-fitting category for the "
"source of the text.\n"
"3. **'{key}'** (boolean) -\n"
"\t- `true` if the text is a **legally binding regulation**.\n"
"\t- `false` if the text belongs to any other type of document or "
"if you cannot tell for certain one way or another.\n\n"
),
)
return G
[docs]
def setup_graph_correct_jurisdiction_type(jurisdiction, **kwargs):
"""Setup graph to check for correct jurisdiction type in legal text
Parameters
----------
jurisdiction : compass.utilities.location.Jurisdiction
Jurisdiction for which validation is being performed.
**kwargs
Keyword-value pairs to add to graph.
Returns
-------
nx.DiGraph
Graph instance that can be used to initialize an
`elm.tree.DecisionTree`.
"""
G = setup_graph_no_nodes(**kwargs) # noqa: N806
G.add_node(
"init",
prompt=(
"Does the following legal text explicitly include enough "
"information to reasonably conclude what type of "
"jurisdiction it applies to? Common types of jurisdictions "
"include 'state', 'county', 'city', 'township',' borough', etc. "
"Please start your response with either 'Yes' or 'No' and briefly "
"explain your answer."
'\n\n"""\n{text}\n"""'
),
)
names_we_want = _jurisdiction_names_to_extract(jurisdiction)
G.add_edge("init", "has_name", condition=llm_response_starts_with_yes)
G.add_node(
"has_name",
prompt=(
"Does the legal text explicitly include enough information to "
"reasonably determine the **full name** of the jurisdiction it "
f"applies to? We want to know at least {names_we_want}. "
"Please start your response with either 'Yes' or 'No' and briefly "
"explain your answer."
),
)
G.add_edge("has_name", "is_state", condition=llm_response_starts_with_yes)
G.add_node(
"is_state",
prompt=(
"Based on the legal text, is it reasonable to conclude that the "
"provisions within apply specifically to the entire state of "
f"**{jurisdiction.state}**, either directly or through reference "
"to a statewide statute, agency, or regulatory authority? If the "
"text only applies to a county, municipality, or other local "
f"subdivision within {jurisdiction.state}, or if the text applies "
"to a different state entirely, or if there is no "
"reasonable basis to infer statewide application, respond with "
"'No'. Start your response with 'Yes' or 'No' and briefly explain."
),
)
if not jurisdiction.county and not jurisdiction.subdivision_name:
G.add_edge(
"is_state",
"has_state_name",
condition=llm_response_starts_with_yes,
)
G.add_edge("is_state", "final", condition=llm_response_starts_with_no)
G.add_node(
"has_state_name",
prompt=(
"Based on the legal text, is there clear and specific "
"evidence that the ordinance applies specifically to "
f"**{jurisdiction.full_name_the_prefixed}**? This could "
f"include a direct mention of **{jurisdiction.state}**, a "
"title, heading, or citation indicating it's an ordinance for "
f"{jurisdiction.state} state, or other language that "
f"reasonably ties the text to {jurisdiction.full_name} "
"specifically. Generic references such as 'the state' or "
"'State Zoning Administrator' are not sufficient on their own "
"unless clearly linked to "
f"{jurisdiction.full_name_the_prefixed}. "
"Start your response with 'Yes' or 'No' and explain briefly."
),
)
G.add_edge("has_state_name", "final")
node_to_connect = "is_state"
if jurisdiction.county:
G.add_edge(
node_to_connect, "is_county", condition=llm_response_starts_with_no
)
G.add_edge(
node_to_connect, "final", condition=llm_response_starts_with_yes
)
G.add_node(
"is_county",
prompt=(
"Based on the legal text, is it reasonable to conclude that "
"the provisions within apply specifically to "
f"**{jurisdiction.full_county_phrase}** "
"(incorporated or unincorporated areas), either directly or "
"through reference to a county-wide code, planning authority, "
"commission, or joint resolution with other local "
"governments? If the provisions within the text apply "
"**only** to a **subdivision** of "
f"{jurisdiction.full_county_phrase} (such as a city or "
"township), or the text applies to a different county or "
"borough entirely, or if the scope is unclear, respond with "
"'No'. "
"Start your answer with 'Yes' or 'No' and explain briefly."
),
)
if not jurisdiction.subdivision_name:
G.add_edge(
"is_county", "final", condition=llm_response_starts_with_no
)
G.add_edge(
"is_county",
"has_county_name",
condition=llm_response_starts_with_yes,
)
G.add_node(
"has_county_name",
prompt=(
"Based on the legal text, is there clear and specific "
"evidence that the ordinance applies specifically to "
f"**{jurisdiction.full_name_the_prefixed}**? This could "
f"include a direct mention of **{jurisdiction.county}**, "
"a title, heading, or citation indicating it's an "
f"ordinance for {jurisdiction.county} "
f"{jurisdiction.type.casefold()}, or other language that "
f"reasonably ties the text to {jurisdiction.full_name} "
"specifically. Generic references such as 'the "
f"{jurisdiction.type.casefold()}' or "
f"'{jurisdiction.type} Zoning Administrator' are not "
"sufficient on their own unless clearly linked to "
f"{jurisdiction.full_name_the_prefixed}. "
"Start your response with 'Yes' or 'No' and explain "
"briefly."
),
)
G.add_edge("has_county_name", "final")
else:
G.add_edge(
"is_county", "final", condition=llm_response_starts_with_yes
)
node_to_connect = "is_county"
if jurisdiction.subdivision_name:
G.add_edge(
node_to_connect, "is_city", condition=llm_response_starts_with_no
)
G.add_edge(
node_to_connect, "final", condition=llm_response_starts_with_yes
)
G.add_node(
"is_city",
prompt=(
"Based on the legal text, is it reasonable to conclude that "
"the provisions apply specifically to "
f"**{jurisdiction.full_subdivision_phrase_the_prefixed}** "
"(rather than a county, state, federal jurisdiction, or a "
f"different {jurisdiction.type.casefold()})? If the text "
"instead applies to a broader jurisdiction, or applies to "
f"a different {jurisdiction.type.casefold()}, or does not "
"provide a reasonable basis to infer that it is limited to "
"municipal governance, respond with 'No'. "
"Start your response with 'Yes' or 'No' and explain briefly."
),
)
G.add_edge("is_city", "final", condition=llm_response_starts_with_no)
G.add_edge(
"is_city", "has_city_name", condition=llm_response_starts_with_yes
)
G.add_node(
"has_city_name",
prompt=(
"Based on the legal text, is there clear and specific "
"evidence that the ordinance applies specifically to "
f"**{jurisdiction.full_name_the_prefixed}**? This could "
"include a direct mention of "
f"**{jurisdiction.subdivision_name}**, "
"a title, heading, or citation indicating it's an ordinance "
f"for {jurisdiction.full_subdivision_phrase_the_prefixed}, "
"or other language that reasonably ties the text to "
f"{jurisdiction.full_name_the_prefixed} specifically. "
"Generic references such as 'the "
f"{jurisdiction.type.casefold()}' or "
f"'{jurisdiction.type} Zoning Administrator' are not "
"sufficient on their own unless clearly linked to "
f"{jurisdiction.full_name_the_prefixed}. "
"Start your response with 'Yes' or 'No' and explain "
"briefly."
),
)
G.add_edge("has_city_name", "final")
G.add_node(
"final",
prompt=(
"Respond based on our entire conversation so far. Return your "
"answer as a dictionary in JSON format (not markdown). Your JSON "
"file must include exactly two keys. The keys are "
"'correct_jurisdiction' and 'explanation'. The value of the "
"'correct_jurisdiction' key should be a boolean that is set to "
"`true` **only if** it is reasonable to conclude that the "
"provisions within apply to the entire area (i.e. "
f"{jurisdiction.type.casefold()}-wide) governed by "
f"**{jurisdiction.full_name_the_prefixed}** "
"(`false` otherwise). The value of the 'explanation' key should "
"be a string containing a brief explanation for your choice. "
),
)
return G
[docs]
def setup_graph_correct_jurisdiction_from_url(jurisdiction, **kwargs):
"""Setup graph to check for correct jurisdiction in URL
Parameters
----------
jurisdiction : compass.utilities.location.Jurisdiction
Jurisdiction for which validation is being performed.
**kwargs
Keyword-value pairs to add to graph.
Returns
-------
nx.DiGraph
Graph instance that can be used to initialize an
`elm.tree.DecisionTree`.
"""
G = setup_graph_no_nodes(**kwargs) # noqa: N806
G.add_node(
"init",
prompt=(
f"Does the following URL explicitly mention {jurisdiction.state} "
"state in some way (e.g. either by full name or abbreviation)? "
"**Do not** answer based on auxiliary information like county or "
"city names. "
"Please start your response with either 'Yes' or 'No' and explain "
"your answer."
"\n\nURL: '{url}\n'"
),
)
node_to_connect = "init"
keys_to_collect = {"correct_state": f"{jurisdiction.state} state"}
if jurisdiction.county:
G.add_edge(
node_to_connect,
"mentions_county",
condition=llm_response_starts_with_yes,
)
G.add_node(
"mentions_county",
prompt=(
"Does the URL explicitly mention "
f"{jurisdiction.full_county_phrase} in some way (e.g. either "
"by full name or abbreviation)? **Do not** answer based on "
"auxiliary information like state or city names. "
"Please start your response with either 'Yes' or 'No' and "
"briefly explain your answer."
"\n\nURL: '{url}\n'"
),
)
keys_to_collect["correct_county"] = (
f"{jurisdiction.full_county_phrase}"
)
node_to_connect = "mentions_county"
if jurisdiction.subdivision_name:
G.add_edge(
node_to_connect,
"mentions_city",
condition=llm_response_starts_with_yes,
)
G.add_node(
"mentions_city",
prompt=(
"Does the URL explicitly mention "
f"{jurisdiction.full_subdivision_phrase_the_prefixed} in "
"some way (e.g. either by full name or abbreviation)? **Do "
"not** answer based on auxiliary information like state or "
"county names. "
"Please start your response with either 'Yes' or 'No' and "
"briefly explain your answer."
"\n\nURL: '{url}\n'"
),
)
keys_to_collect[f"correct_{jurisdiction.type.casefold()}"] = (
f"{jurisdiction.full_subdivision_phrase}"
)
node_to_connect = "mentions_city"
G.add_edge(
node_to_connect, "final", condition=llm_response_starts_with_yes
)
G.add_node("final", prompt=_compile_final_url_prompt(keys_to_collect))
return G
def _compile_final_url_prompt(keys_to_collect):
"""Compile final URL instruction prompt"""
num_keys = len(keys_to_collect) + 1
num_keys = f"Your JSON file must include exactly {num_keys} keys. "
out_keys = ", ".join([f"'{key}'" for key in keys_to_collect])
out_keys = f"The keys are {out_keys} and 'explanation'. "
explain_text = _compile_url_key_explain_text(keys_to_collect)
return (
"Respond based on our entire conversation so far. Return your "
"answer as a dictionary in JSON format (not markdown). "
f"{num_keys}{out_keys}{explain_text}"
)
def _compile_url_key_explain_text(keys_to_collect):
"""Compile explanations ofr each output key"""
explain_text = []
for key, name in keys_to_collect.items():
explain_text.append(
f"The value of the '{key}' key should be a boolean that is set to "
f"`True` if the URL explicitly mentions {name} in some way "
"(`False` otherwise). "
)
choices = "choices" if len(keys_to_collect) > 1 else "choice"
explain_text.append(
"The value of the 'explanation' key should be a string containing a "
f"short explanation for your {choices}. "
)
return "".join(explain_text)
def _jurisdiction_names_to_extract(jurisdiction):
"""Determine whether jurisdiction name is required or not"""
if not jurisdiction.subdivision_name and not jurisdiction.county:
return "the state name"
return f"the state name and the {jurisdiction.type.casefold()} name"