Skip to content
83 changes: 65 additions & 18 deletions twikit/client/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

import filetype
import pyotp
from httpx import AsyncClient, AsyncHTTPTransport, Response
from httpx import AsyncClient, AsyncHTTPTransport, HTTPError, Response
from httpx._utils import URLPattern

from .._captcha import Capsolver
Expand Down Expand Up @@ -126,6 +126,7 @@ async def request(
url: str,
auto_unlock: bool = True,
raise_exception: bool = True,
check_user_state: bool = True,
**kwargs
) -> tuple[dict | Any, Response]:
':meta private:'
Expand Down Expand Up @@ -193,7 +194,11 @@ async def request(
elif status_code == 408:
raise RequestTimeout(message, headers=response.headers)
elif status_code == 429:
if await self._get_user_state() == 'suspended':
# `check_user_state=False` when called recursively from
# `_get_user_state()` itself — otherwise a 429 on the nested
# user_state GET would re-enter this branch, call
# `_get_user_state()` again, and loop until RecursionError.
if check_user_state and await self._get_user_state() == 'suspended':
raise AccountSuspended(message, headers=response.headers)
raise TooManyRequests(message, headers=response.headers)
elif 500 <= status_code < 600:
Expand Down Expand Up @@ -1522,12 +1527,22 @@ async def _get_more_replies(
if tweet is not None:
results.append(tweet)

if entries[-1]['entryId'].startswith('cursor'):
next_cursor = entries[-1]['content']['itemContent']['value']
_fetch_next_result = partial(self._get_more_replies, tweet_id, next_cursor)
else:
next_cursor = None
_fetch_next_result = None
# Mirror the two-shape handling added to `get_tweet_by_id`: without it
# the first `await tweet.replies.next()` call would re-introduce the
# KeyError that the parent fix eliminated (X serves the trailing cursor
# as either `content.itemContent.value` or flat `content.value`).
next_cursor = None
_fetch_next_result = None
if entries and entries[-1].get('entryId', '').startswith('cursor'):
content = entries[-1].get('content') or {}
item_content = content.get('itemContent')
if isinstance(item_content, dict) and 'value' in item_content:
next_cursor = item_content['value']
elif 'value' in content:
next_cursor = content['value']
if next_cursor is not None:
_fetch_next_result = partial(
self._get_more_replies, tweet_id, next_cursor)

return Result(
results,
Expand Down Expand Up @@ -1630,14 +1645,24 @@ async def get_tweet_by_id(
if display_type and display_type[0] == 'SelfThread':
tweet.thread = [tweet_object, *replies]

if entries[-1]['entryId'].startswith('cursor'):
# if has more replies
reply_next_cursor = entries[-1]['content']['itemContent']['value']
_fetch_more_replies = partial(self._get_more_replies,
tweet_id, reply_next_cursor)
else:
reply_next_cursor = None
_fetch_more_replies = None
reply_next_cursor = None
_fetch_more_replies = None
if entries and entries[-1].get('entryId', '').startswith('cursor'):
# X has two shapes for the trailing cursor entry: the legacy
# `content.itemContent.value` and a newer, flatter `content.value`
# (TimelineTimelineCursor without an itemContent wrapper). Reading
# the old path unconditionally raises KeyError: 'itemContent' for
# any tweet served with the new shape, which breaks the whole
# `get_tweet_by_id` call — not just pagination of further replies.
content = entries[-1].get('content') or {}
item_content = content.get('itemContent')
if isinstance(item_content, dict) and 'value' in item_content:
reply_next_cursor = item_content['value']
elif 'value' in content:
reply_next_cursor = content['value']
if reply_next_cursor is not None:
_fetch_more_replies = partial(self._get_more_replies,
tweet_id, reply_next_cursor)
Comment thread
coderabbitai[bot] marked this conversation as resolved.

tweet.replies = Result(
replies_list,
Expand Down Expand Up @@ -4320,5 +4345,27 @@ async def _update_subscriptions(
return _payload_from_data(response)

async def _get_user_state(self) -> Literal['normal', 'bounced', 'suspended']:
response, _ = await self.v11.user_state()
return response['userState']
# `request()` calls this method whenever it receives a 429, to
# decide between `TooManyRequests` and `AccountSuspended`. But the
# call itself goes through `request()` as well, so if the
# user_state endpoint is ALSO rate-limited (very common — X rate
# limits the whole account, not per-endpoint), we re-enter this
# branch and recurse until Python raises `RecursionError`. That
# masks the real 429 with an unrelated crash.
#
# Pass `check_user_state=False` to the nested request so that if
# this user_state GET also 429s, `request()` raises `TooManyRequests`
# directly instead of re-entering this branch. That eliminates the
# recursion at the source — not just after N levels deep — so we
# don't burn through HTTP calls climbing back up the stack.
#
# We still trap the remaining failure modes: the expected
# `TooManyRequests` (now raised on the first retry, not at the
# recursion limit), and any transport-level `HTTPError`. Anything
# else (unexpected JSON, auth issues, programming errors) keeps
# propagating so real bugs surface.
try:
response, _ = await self.v11.user_state(check_user_state=False)
return response['userState']
except (TooManyRequests, HTTPError):
return 'normal'
10 changes: 7 additions & 3 deletions twikit/client/gql.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
JOIN_COMMUNITY_FEATURES,
LIST_FEATURES,
NOTE_TWEET_FEATURES,
SEARCH_TIMELINE_FEATURES,
SIMILAR_POSTS_FEATURES,
TWEET_RESULT_BY_REST_ID_FEATURES,
TWEET_RESULTS_BY_REST_IDS_FEATURES,
Expand All @@ -31,7 +32,7 @@ class Endpoint:
def url(path):
return f'https://{DOMAIN}/i/api/graphql/{path}'

SEARCH_TIMELINE = url('flaR-PUMshxFWZWPNpq4zA/SearchTimeline')
SEARCH_TIMELINE = url('R0u1RWRf748KzyGBXvOYRA/SearchTimeline')
SIMILAR_POSTS = url('EToazR74i0rJyZYalfVEAQ/SimilarPosts')
CREATE_NOTE_TWEET = url('iCUB42lIfXf9qPKctjE5rQ/CreateNoteTweet')
CREATE_TWEET = url('SiM_cAu83R0wnrpmKQQSEw/CreateTweet')
Expand Down Expand Up @@ -152,11 +153,14 @@ async def search_timeline(
'rawQuery': query,
'count': count,
'querySource': 'typed_query',
'product': product
'product': product,
'withGrokTranslatedBio': True
}
if cursor is not None:
variables['cursor'] = cursor
return await self.gql_get(Endpoint.SEARCH_TIMELINE, variables, FEATURES)
return await self.gql_get(
Endpoint.SEARCH_TIMELINE, variables, SEARCH_TIMELINE_FEATURES
)

async def similar_posts(self, tweet_id: str):
variables = {'tweet_id': tweet_id}
Expand Down
10 changes: 8 additions & 2 deletions twikit/client/v11.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,8 +505,14 @@ async def live_pipeline_update_subscriptions(self, session, subscribe, unsubscri
Endpoint.LIVE_PIPELINE_UPDATE_SUBSCRIPTIONS, data=data, headers=headers
)

async def user_state(self):
async def user_state(self, **kwargs):
# `**kwargs` is forwarded to `base.get` → `base.request`. The 429
# recovery path in `Client.request` calls `_get_user_state()`, which
# ends up back here; we need to pass `check_user_state=False` down
# so that if this nested call also returns 429 we don't retry the
# recovery check and loop.
return await self.base.get(
Endpoint.USER_STATE,
headers=self.base._base_headers
headers=self.base._base_headers,
**kwargs
)
40 changes: 40 additions & 0 deletions twikit/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,46 @@
'responsive_web_enhance_cards_enabled': False
}

SEARCH_TIMELINE_FEATURES = {
'rweb_video_screen_enabled': False,
'rweb_cashtags_enabled': True,
'profile_label_improvements_pcf_label_in_post_enabled': True,
'responsive_web_profile_redirect_enabled': False,
'rweb_tipjar_consumption_enabled': False,
'verified_phone_label_enabled': False,
'creator_subscriptions_tweet_preview_api_enabled': True,
'responsive_web_graphql_timeline_navigation_enabled': True,
'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False,
'premium_content_api_read_enabled': False,
'communities_web_enable_tweet_community_results_fetch': True,
'c9s_tweet_anatomy_moderator_badge_enabled': True,
'responsive_web_grok_analyze_button_fetch_trends_enabled': False,
'responsive_web_grok_analyze_post_followups_enabled': True,
'responsive_web_jetfuel_frame': True,
'responsive_web_grok_share_attachment_enabled': True,
'responsive_web_grok_annotations_enabled': True,
'articles_preview_enabled': True,
'responsive_web_edit_tweet_api_enabled': True,
'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True,
'view_counts_everywhere_api_enabled': True,
'longform_notetweets_consumption_enabled': True,
'responsive_web_twitter_article_tweet_consumption_enabled': True,
'content_disclosure_indicator_enabled': True,
'content_disclosure_ai_generated_indicator_enabled': True,
'responsive_web_grok_show_grok_translated_post': True,
'responsive_web_grok_analysis_button_from_backend': True,
'post_ctas_fetch_enabled': True,
'freedom_of_speech_not_reach_fetch_enabled': True,
'standardized_nudges_misinfo': True,
'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': True,
'longform_notetweets_rich_text_read_enabled': True,
'longform_notetweets_inline_media_enabled': False,
'responsive_web_grok_image_annotation_enabled': True,
'responsive_web_grok_imagine_annotation_enabled': True,
'responsive_web_grok_community_note_auto_translation_is_enabled': True,
'responsive_web_enhance_cards_enabled': False
}

TWEET_RESULTS_BY_REST_IDS_FEATURES = {
'creator_subscriptions_tweet_preview_api_enabled': True,
'premium_content_api_read_enabled': False,
Expand Down
50 changes: 46 additions & 4 deletions twikit/x_client_transaction/transaction.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,24 @@
from .rotation import convert_rotation_to_matrix
from .utils import float_to_hex, is_odd, base64_encode, handle_x_migration

# The ondemand.s hash is no longer placed directly next to the
# "ondemand.s" label in the webpack bundle. The current layout emits
# the chunk id first, then the hash is listed against the same id
# elsewhere on the page. Two-step lookup: find the id, then find the
# hash that was shipped for that id.
#
# Leading boundary is `[,{]` (not just `,`) so we also match the entry
# when `ondemand.s` happens to be the first key of the webpack chunk
# map (`{123:"ondemand.s",...}`). Quote class accepts both single and
# double quotes on both halves — X has shipped both styles depending
# on the minifier run, and a mismatch caused the hash lookup to miss.
ON_DEMAND_FILE_REGEX = re.compile(
r"""['|\"]{1}ondemand\.s['|\"]{1}:\s*['|\"]{1}([\w]*)['|\"]{1}""", flags=(re.VERBOSE | re.MULTILINE))
r"""[,{](\d+):["']ondemand\.s["']""", flags=(re.VERBOSE | re.MULTILINE))
# `{{` / `}}` escape the literal braces so `str.format()` substitutes
# only the `{chunk_id}` placeholder. Otherwise the `{` at the start of
# the character class is parsed as an unnamed format field and raises
# `ValueError: unexpected '{' in field name`.
ON_DEMAND_HASH_PATTERN = r'[,{{]{chunk_id}:["\']([0-9a-f]+)["\']'
INDICES_REGEX = re.compile(
r"""(\(\w{1}\[(\d{1,2})\],\s*16\))+""", flags=(re.VERBOSE | re.MULTILINE))

Expand Down Expand Up @@ -42,10 +58,36 @@ async def get_indices(self, home_page_response, session, headers):
key_byte_indices = []
response = self.validate_response(
home_page_response) or self.home_page_response
on_demand_file = ON_DEMAND_FILE_REGEX.search(str(response))
response_text = str(response)
on_demand_file = ON_DEMAND_FILE_REGEX.search(response_text)
if on_demand_file:
on_demand_file_url = f"https://abs.twimg.com/responsive-web/client-web/ondemand.s.{on_demand_file.group(1)}a.js"
on_demand_file_response = await session.request(method="GET", url=on_demand_file_url, headers=headers)
# `on_demand_file.group(1)` is the webpack chunk id (a number
# like "123"), not the file hash itself. Look up the hash shipped
# for that id via a second regex. Previously we concatenated
# the match with 'a.js' directly, which matched the old bundle
# layout where `"ondemand.s":"HASH"` appeared together — that
# layout no longer exists, so the old path raised
# "Couldn't get KEY_BYTE indices" on every init.
chunk_id = on_demand_file.group(1)
hash_match = re.search(
ON_DEMAND_HASH_PATTERN.format(chunk_id=chunk_id),
response_text)
if not hash_match:
# Distinct failure mode from "couldn't get indices": we found
# the `ondemand.s` label but the hash mapping for that chunk
# id isn't in the page. Surface this separately so diagnosis
# doesn't conflate "page layout changed" with "hash missing"
# (they need different fixes — regex vs. re-capture).
raise Exception(
f"Couldn't find ondemand.s hash for chunk id {chunk_id!r} "
f"(page layout may have changed)"
)
on_demand_file_url = (
f"https://abs.twimg.com/responsive-web/client-web/"
f"ondemand.s.{hash_match.group(1)}a.js"
)
on_demand_file_response = await session.request(
method="GET", url=on_demand_file_url, headers=headers)
key_byte_indices_match = INDICES_REGEX.finditer(
str(on_demand_file_response.text))
for item in key_byte_indices_match:
Expand Down